HADOOP-13079. Add -q option to Ls to print ? instead of non-printable characters. Contributed by John Zhuge.
This commit is contained in:
parent
28b66ae919
commit
0accc3306d
|
@ -48,6 +48,7 @@ class Ls extends FsCommand {
|
|||
private static final String OPTION_PATHONLY = "C";
|
||||
private static final String OPTION_DIRECTORY = "d";
|
||||
private static final String OPTION_HUMAN = "h";
|
||||
private static final String OPTION_HIDENONPRINTABLE = "q";
|
||||
private static final String OPTION_RECURSIVE = "R";
|
||||
private static final String OPTION_REVERSE = "r";
|
||||
private static final String OPTION_MTIME = "t";
|
||||
|
@ -55,10 +56,11 @@ class Ls extends FsCommand {
|
|||
private static final String OPTION_SIZE = "S";
|
||||
|
||||
public static final String NAME = "ls";
|
||||
public static final String USAGE = "[-" + OPTION_PATHONLY + "] [-"
|
||||
+ OPTION_DIRECTORY + "] [-" + OPTION_HUMAN + "] [-" + OPTION_RECURSIVE
|
||||
+ "] [-" + OPTION_MTIME + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE
|
||||
+ "] [-" + OPTION_ATIME + "] [<path> ...]";
|
||||
public static final String USAGE = "[-" + OPTION_PATHONLY + "] [-" +
|
||||
OPTION_DIRECTORY + "] [-" + OPTION_HUMAN + "] [-" +
|
||||
OPTION_HIDENONPRINTABLE + "] [-" + OPTION_RECURSIVE + "] [-" +
|
||||
OPTION_MTIME + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE + "] [-" +
|
||||
OPTION_ATIME + "] [<path> ...]";
|
||||
|
||||
public static final String DESCRIPTION =
|
||||
"List the contents that match the specified file pattern. If " +
|
||||
|
@ -77,6 +79,8 @@ class Ls extends FsCommand {
|
|||
" -" + OPTION_HUMAN +
|
||||
" Formats the sizes of files in a human-readable fashion\n" +
|
||||
" rather than a number of bytes.\n" +
|
||||
" -" + OPTION_HIDENONPRINTABLE +
|
||||
" Print ? instead of non-printable characters.\n" +
|
||||
" -" + OPTION_RECURSIVE +
|
||||
" Recursively list the contents of directories.\n" +
|
||||
" -" + OPTION_MTIME +
|
||||
|
@ -104,6 +108,9 @@ class Ls extends FsCommand {
|
|||
|
||||
protected boolean humanReadable = false;
|
||||
|
||||
/** Whether to print ? instead of non-printable characters. */
|
||||
private boolean hideNonPrintable = false;
|
||||
|
||||
protected Ls() {}
|
||||
|
||||
protected Ls(Configuration conf) {
|
||||
|
@ -119,14 +126,16 @@ class Ls extends FsCommand {
|
|||
@Override
|
||||
protected void processOptions(LinkedList<String> args)
|
||||
throws IOException {
|
||||
CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, OPTION_PATHONLY,
|
||||
OPTION_DIRECTORY, OPTION_HUMAN, OPTION_RECURSIVE, OPTION_REVERSE,
|
||||
CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE,
|
||||
OPTION_PATHONLY, OPTION_DIRECTORY, OPTION_HUMAN,
|
||||
OPTION_HIDENONPRINTABLE, OPTION_RECURSIVE, OPTION_REVERSE,
|
||||
OPTION_MTIME, OPTION_SIZE, OPTION_ATIME);
|
||||
cf.parse(args);
|
||||
pathOnly = cf.getOpt(OPTION_PATHONLY);
|
||||
dirRecurse = !cf.getOpt(OPTION_DIRECTORY);
|
||||
setRecursive(cf.getOpt(OPTION_RECURSIVE) && dirRecurse);
|
||||
humanReadable = cf.getOpt(OPTION_HUMAN);
|
||||
hideNonPrintable = cf.getOpt(OPTION_HIDENONPRINTABLE);
|
||||
orderReverse = cf.getOpt(OPTION_REVERSE);
|
||||
orderTime = cf.getOpt(OPTION_MTIME);
|
||||
orderSize = !orderTime && cf.getOpt(OPTION_SIZE);
|
||||
|
@ -163,6 +172,11 @@ class Ls extends FsCommand {
|
|||
return this.humanReadable;
|
||||
}
|
||||
|
||||
@InterfaceAudience.Private
|
||||
private boolean isHideNonPrintable() {
|
||||
return hideNonPrintable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should directory contents be displayed in reverse order
|
||||
* @return true reverse order, false default order
|
||||
|
@ -241,7 +255,7 @@ class Ls extends FsCommand {
|
|||
dateFormat.format(new Date(isUseAtime()
|
||||
? stat.getAccessTime()
|
||||
: stat.getModificationTime())),
|
||||
item);
|
||||
isHideNonPrintable() ? new PrintableString(item.toString()) : item);
|
||||
out.println(line);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.shell;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
|
||||
/**
|
||||
* The {code PrintableString} class converts any string to a printable string
|
||||
* by replacing non-printable characters with ?.
|
||||
*
|
||||
* Categories of Unicode non-printable characters:
|
||||
* <ul>
|
||||
* <li> Control characters (Cc)
|
||||
* <li> Formatting Unicode (Cf)
|
||||
* <li> Private use Unicode (Co)
|
||||
* <li> Unassigned Unicode (Cn)
|
||||
* <li> Standalone surrogate (Unfortunately no matching Unicode category)
|
||||
* </ul>
|
||||
*
|
||||
* @see Character
|
||||
* @see <a href="http://www.unicode.org/">The Unicode Consortium</a>
|
||||
*/
|
||||
@InterfaceAudience.Public
|
||||
@InterfaceStability.Evolving
|
||||
class PrintableString {
|
||||
private static final char REPLACEMENT_CHAR = '?';
|
||||
|
||||
private final String printableString;
|
||||
|
||||
PrintableString(String rawString) {
|
||||
StringBuilder stringBuilder = new StringBuilder(rawString.length());
|
||||
for (int offset = 0; offset < rawString.length();) {
|
||||
int codePoint = rawString.codePointAt(offset);
|
||||
offset += Character.charCount(codePoint);
|
||||
|
||||
switch (Character.getType(codePoint)) {
|
||||
case Character.CONTROL: // Cc
|
||||
case Character.FORMAT: // Cf
|
||||
case Character.PRIVATE_USE: // Co
|
||||
case Character.SURROGATE: // Cs
|
||||
case Character.UNASSIGNED: // Cn
|
||||
stringBuilder.append(REPLACEMENT_CHAR);
|
||||
break;
|
||||
default:
|
||||
stringBuilder.append(Character.toChars(codePoint));
|
||||
break;
|
||||
}
|
||||
}
|
||||
printableString = stringBuilder.toString();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return printableString;
|
||||
}
|
||||
}
|
|
@ -384,13 +384,14 @@ Return usage output.
|
|||
ls
|
||||
----
|
||||
|
||||
Usage: `hadoop fs -ls [-C] [-d] [-h] [-R] [-t] [-S] [-r] [-u] <args> `
|
||||
Usage: `hadoop fs -ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] <args> `
|
||||
|
||||
Options:
|
||||
|
||||
* -C: Display the paths of files and directories only.
|
||||
* -d: Directories are listed as plain files.
|
||||
* -h: Format file sizes in a human-readable fashion (eg 64.0m instead of 67108864).
|
||||
* -q: Print ? instead of non-printable characters.
|
||||
* -R: Recursively list subdirectories encountered.
|
||||
* -t: Sort output by modification time (most recent first).
|
||||
* -S: Sort output by file size.
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import static org.hamcrest.core.Is.is;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Test FsShell -ls command.
|
||||
*/
|
||||
public class TestFsShellList {
|
||||
private static Configuration conf;
|
||||
private static FsShell shell;
|
||||
private static LocalFileSystem lfs;
|
||||
private static Path testRootDir;
|
||||
|
||||
@BeforeClass
|
||||
public static void setup() throws Exception {
|
||||
conf = new Configuration();
|
||||
shell = new FsShell(conf);
|
||||
lfs = FileSystem.getLocal(conf);
|
||||
lfs.setVerifyChecksum(true);
|
||||
lfs.setWriteChecksum(true);
|
||||
|
||||
String root = System.getProperty("test.build.data", "test/build/data");
|
||||
testRootDir = lfs.makeQualified(new Path(root, "testFsShellList"));
|
||||
assertThat(lfs.mkdirs(testRootDir), is(true));
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void teardown() throws Exception {
|
||||
lfs.delete(testRootDir, true);
|
||||
}
|
||||
|
||||
private void createFile(Path filePath) throws Exception {
|
||||
FSDataOutputStream out = lfs.create(filePath);
|
||||
out.writeChars("I am " + filePath);
|
||||
out.close();
|
||||
assertThat(lfs.exists(lfs.getChecksumFile(filePath)), is(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testList() throws Exception {
|
||||
createFile(new Path(testRootDir, "abc"));
|
||||
String[] lsArgv = new String[]{"-ls", testRootDir.toString()};
|
||||
assertThat(shell.run(lsArgv), is(0));
|
||||
|
||||
createFile(new Path(testRootDir, "abc\bd\tef"));
|
||||
createFile(new Path(testRootDir, "ghi"));
|
||||
createFile(new Path(testRootDir, "qq\r123"));
|
||||
lsArgv = new String[]{"-ls", testRootDir.toString()};
|
||||
assertThat(shell.run(lsArgv), is(0));
|
||||
|
||||
lsArgv = new String[]{"-ls", "-q", testRootDir.toString()};
|
||||
assertThat(shell.run(lsArgv), is(0));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.shell;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Test {@code PrintableString} class.
|
||||
*/
|
||||
public class TestPrintableString {
|
||||
|
||||
private void expect(String reason, String raw, String expected) {
|
||||
assertThat(reason, new PrintableString(raw).toString(), is(expected));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test printable characters.
|
||||
*/
|
||||
@Test
|
||||
public void testPrintableCharacters() throws Exception {
|
||||
// ASCII
|
||||
expect("Should keep ASCII letter", "abcdef237", "abcdef237");
|
||||
expect("Should keep ASCII symbol", " !\"|}~", " !\"|}~");
|
||||
|
||||
// Unicode BMP
|
||||
expect("Should keep Georgian U+1050 and Box Drawing U+2533",
|
||||
"\u1050\u2533--", "\u1050\u2533--");
|
||||
|
||||
// Unicode SMP
|
||||
expect("Should keep Linear B U+10000 and Phoenician U+10900",
|
||||
"\uD800\uDC00'''\uD802\uDD00", "\uD800\uDC00'''\uD802\uDD00");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test non-printable characters.
|
||||
*/
|
||||
@Test
|
||||
public void testNonPrintableCharacters() throws Exception {
|
||||
// Control characters
|
||||
expect("Should replace single control character", "abc\rdef", "abc?def");
|
||||
expect("Should replace multiple control characters",
|
||||
"\babc\tdef", "?abc?def");
|
||||
expect("Should replace all control characters", "\f\f\b\n", "????");
|
||||
expect("Should replace mixed characters starting with a control",
|
||||
"\027ab\0", "?ab?");
|
||||
|
||||
// Formatting Unicode
|
||||
expect("Should replace Byte Order Mark", "-\uFEFF--", "-?--");
|
||||
expect("Should replace Invisible Separator", "\u2063\t", "??");
|
||||
|
||||
// Private use Unicode
|
||||
expect("Should replace private use U+E000", "\uE000", "?");
|
||||
expect("Should replace private use U+E123 and U+F432",
|
||||
"\uE123abc\uF432", "?abc?");
|
||||
expect("Should replace private use in Plane 15 and 16: U+F0000 and " +
|
||||
"U+10FFFD, but keep U+1050",
|
||||
"x\uDB80\uDC00y\uDBFF\uDFFDz\u1050", "x?y?z\u1050");
|
||||
|
||||
// Unassigned Unicode
|
||||
expect("Should replace unassigned U+30000 and U+DFFFF",
|
||||
"-\uD880\uDC00-\uDB3F\uDFFF-", "-?-?-");
|
||||
|
||||
// Standalone surrogate character (not in a pair)
|
||||
expect("Should replace standalone surrogate U+DB80", "x\uDB80yz", "x?yz");
|
||||
expect("Should replace standalone surrogate mixed with valid pair",
|
||||
"x\uDB80\uD802\uDD00yz", "x?\uD802\uDD00yz");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Package for {@code org.apache.hadoop.fs.shell} test classes.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
@InterfaceStability.Unstable
|
||||
package org.apache.hadoop.fs.shell;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
|
@ -54,7 +54,7 @@
|
|||
<comparators>
|
||||
<comparator>
|
||||
<type>RegexpComparator</type>
|
||||
<expected-output>^-ls \[-C\] \[-d\] \[-h\] \[-R\] \[-t\] \[-S\] \[-r\] \[-u\] \[<path> \.\.\.\] :( |\t)*</expected-output>
|
||||
<expected-output>^-ls \[-C\] \[-d\] \[-h\] \[-q\] \[-R\] \[-t\] \[-S\] \[-r\] \[-u\] \[<path> \.\.\.\] :( |\t)*</expected-output>
|
||||
</comparator>
|
||||
<comparator>
|
||||
<type>RegexpComparator</type>
|
||||
|
@ -104,6 +104,10 @@
|
|||
<type>RegexpComparator</type>
|
||||
<expected-output>^\s*-h\s+Formats the sizes of files in a human-readable fashion( )*</expected-output>
|
||||
</comparator>
|
||||
<comparator>
|
||||
<type>RegexpComparator</type>
|
||||
<expected-output>^\s*-q\s+Print \? instead of non-printable characters\.( )*</expected-output>
|
||||
</comparator>
|
||||
<comparator>
|
||||
<type>RegexpComparator</type>
|
||||
<expected-output>^\s*rather than a number of bytes\.( )*</expected-output>
|
||||
|
|
Loading…
Reference in New Issue