HADOOP-13079. Add -q option to Ls to print ? instead of non-printable characters. Contributed by John Zhuge.

This commit is contained in:
Andrew Wang 2016-06-13 11:43:46 -07:00
parent 28b66ae919
commit 0accc3306d
7 changed files with 291 additions and 9 deletions

View File

@ -48,6 +48,7 @@ class Ls extends FsCommand {
private static final String OPTION_PATHONLY = "C";
private static final String OPTION_DIRECTORY = "d";
private static final String OPTION_HUMAN = "h";
private static final String OPTION_HIDENONPRINTABLE = "q";
private static final String OPTION_RECURSIVE = "R";
private static final String OPTION_REVERSE = "r";
private static final String OPTION_MTIME = "t";
@ -55,10 +56,11 @@ class Ls extends FsCommand {
private static final String OPTION_SIZE = "S";
public static final String NAME = "ls";
public static final String USAGE = "[-" + OPTION_PATHONLY + "] [-"
+ OPTION_DIRECTORY + "] [-" + OPTION_HUMAN + "] [-" + OPTION_RECURSIVE
+ "] [-" + OPTION_MTIME + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE
+ "] [-" + OPTION_ATIME + "] [<path> ...]";
public static final String USAGE = "[-" + OPTION_PATHONLY + "] [-" +
OPTION_DIRECTORY + "] [-" + OPTION_HUMAN + "] [-" +
OPTION_HIDENONPRINTABLE + "] [-" + OPTION_RECURSIVE + "] [-" +
OPTION_MTIME + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE + "] [-" +
OPTION_ATIME + "] [<path> ...]";
public static final String DESCRIPTION =
"List the contents that match the specified file pattern. If " +
@ -77,6 +79,8 @@ class Ls extends FsCommand {
" -" + OPTION_HUMAN +
" Formats the sizes of files in a human-readable fashion\n" +
" rather than a number of bytes.\n" +
" -" + OPTION_HIDENONPRINTABLE +
" Print ? instead of non-printable characters.\n" +
" -" + OPTION_RECURSIVE +
" Recursively list the contents of directories.\n" +
" -" + OPTION_MTIME +
@ -104,6 +108,9 @@ class Ls extends FsCommand {
protected boolean humanReadable = false;
/** Whether to print ? instead of non-printable characters. */
private boolean hideNonPrintable = false;
protected Ls() {}
protected Ls(Configuration conf) {
@ -119,14 +126,16 @@ class Ls extends FsCommand {
@Override
protected void processOptions(LinkedList<String> args)
throws IOException {
CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, OPTION_PATHONLY,
OPTION_DIRECTORY, OPTION_HUMAN, OPTION_RECURSIVE, OPTION_REVERSE,
CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE,
OPTION_PATHONLY, OPTION_DIRECTORY, OPTION_HUMAN,
OPTION_HIDENONPRINTABLE, OPTION_RECURSIVE, OPTION_REVERSE,
OPTION_MTIME, OPTION_SIZE, OPTION_ATIME);
cf.parse(args);
pathOnly = cf.getOpt(OPTION_PATHONLY);
dirRecurse = !cf.getOpt(OPTION_DIRECTORY);
setRecursive(cf.getOpt(OPTION_RECURSIVE) && dirRecurse);
humanReadable = cf.getOpt(OPTION_HUMAN);
hideNonPrintable = cf.getOpt(OPTION_HIDENONPRINTABLE);
orderReverse = cf.getOpt(OPTION_REVERSE);
orderTime = cf.getOpt(OPTION_MTIME);
orderSize = !orderTime && cf.getOpt(OPTION_SIZE);
@ -163,6 +172,11 @@ class Ls extends FsCommand {
return this.humanReadable;
}
@InterfaceAudience.Private
private boolean isHideNonPrintable() {
return hideNonPrintable;
}
/**
* Should directory contents be displayed in reverse order
* @return true reverse order, false default order
@ -241,7 +255,7 @@ class Ls extends FsCommand {
dateFormat.format(new Date(isUseAtime()
? stat.getAccessTime()
: stat.getModificationTime())),
item);
isHideNonPrintable() ? new PrintableString(item.toString()) : item);
out.println(line);
}

View File

@ -0,0 +1,72 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.shell;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* The {code PrintableString} class converts any string to a printable string
* by replacing non-printable characters with ?.
*
* Categories of Unicode non-printable characters:
* <ul>
* <li> Control characters (Cc)
* <li> Formatting Unicode (Cf)
* <li> Private use Unicode (Co)
* <li> Unassigned Unicode (Cn)
* <li> Standalone surrogate (Unfortunately no matching Unicode category)
* </ul>
*
* @see Character
* @see <a href="http://www.unicode.org/">The Unicode Consortium</a>
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
class PrintableString {
private static final char REPLACEMENT_CHAR = '?';
private final String printableString;
PrintableString(String rawString) {
StringBuilder stringBuilder = new StringBuilder(rawString.length());
for (int offset = 0; offset < rawString.length();) {
int codePoint = rawString.codePointAt(offset);
offset += Character.charCount(codePoint);
switch (Character.getType(codePoint)) {
case Character.CONTROL: // Cc
case Character.FORMAT: // Cf
case Character.PRIVATE_USE: // Co
case Character.SURROGATE: // Cs
case Character.UNASSIGNED: // Cn
stringBuilder.append(REPLACEMENT_CHAR);
break;
default:
stringBuilder.append(Character.toChars(codePoint));
break;
}
}
printableString = stringBuilder.toString();
}
public String toString() {
return printableString;
}
}

View File

@ -384,13 +384,14 @@ Return usage output.
ls
----
Usage: `hadoop fs -ls [-C] [-d] [-h] [-R] [-t] [-S] [-r] [-u] <args> `
Usage: `hadoop fs -ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] <args> `
Options:
* -C: Display the paths of files and directories only.
* -d: Directories are listed as plain files.
* -h: Format file sizes in a human-readable fashion (eg 64.0m instead of 67108864).
* -q: Print ? instead of non-printable characters.
* -R: Recursively list subdirectories encountered.
* -t: Sort output by modification time (most recent first).
* -S: Sort output by file size.

View File

@ -0,0 +1,78 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;
import org.apache.hadoop.conf.Configuration;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Test FsShell -ls command.
*/
public class TestFsShellList {
private static Configuration conf;
private static FsShell shell;
private static LocalFileSystem lfs;
private static Path testRootDir;
@BeforeClass
public static void setup() throws Exception {
conf = new Configuration();
shell = new FsShell(conf);
lfs = FileSystem.getLocal(conf);
lfs.setVerifyChecksum(true);
lfs.setWriteChecksum(true);
String root = System.getProperty("test.build.data", "test/build/data");
testRootDir = lfs.makeQualified(new Path(root, "testFsShellList"));
assertThat(lfs.mkdirs(testRootDir), is(true));
}
@AfterClass
public static void teardown() throws Exception {
lfs.delete(testRootDir, true);
}
private void createFile(Path filePath) throws Exception {
FSDataOutputStream out = lfs.create(filePath);
out.writeChars("I am " + filePath);
out.close();
assertThat(lfs.exists(lfs.getChecksumFile(filePath)), is(true));
}
@Test
public void testList() throws Exception {
createFile(new Path(testRootDir, "abc"));
String[] lsArgv = new String[]{"-ls", testRootDir.toString()};
assertThat(shell.run(lsArgv), is(0));
createFile(new Path(testRootDir, "abc\bd\tef"));
createFile(new Path(testRootDir, "ghi"));
createFile(new Path(testRootDir, "qq\r123"));
lsArgv = new String[]{"-ls", testRootDir.toString()};
assertThat(shell.run(lsArgv), is(0));
lsArgv = new String[]{"-ls", "-q", testRootDir.toString()};
assertThat(shell.run(lsArgv), is(0));
}
}

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.shell;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import org.junit.Test;
/**
* Test {@code PrintableString} class.
*/
public class TestPrintableString {
private void expect(String reason, String raw, String expected) {
assertThat(reason, new PrintableString(raw).toString(), is(expected));
}
/**
* Test printable characters.
*/
@Test
public void testPrintableCharacters() throws Exception {
// ASCII
expect("Should keep ASCII letter", "abcdef237", "abcdef237");
expect("Should keep ASCII symbol", " !\"|}~", " !\"|}~");
// Unicode BMP
expect("Should keep Georgian U+1050 and Box Drawing U+2533",
"\u1050\u2533--", "\u1050\u2533--");
// Unicode SMP
expect("Should keep Linear B U+10000 and Phoenician U+10900",
"\uD800\uDC00'''\uD802\uDD00", "\uD800\uDC00'''\uD802\uDD00");
}
/**
* Test non-printable characters.
*/
@Test
public void testNonPrintableCharacters() throws Exception {
// Control characters
expect("Should replace single control character", "abc\rdef", "abc?def");
expect("Should replace multiple control characters",
"\babc\tdef", "?abc?def");
expect("Should replace all control characters", "\f\f\b\n", "????");
expect("Should replace mixed characters starting with a control",
"\027ab\0", "?ab?");
// Formatting Unicode
expect("Should replace Byte Order Mark", "-\uFEFF--", "-?--");
expect("Should replace Invisible Separator", "\u2063\t", "??");
// Private use Unicode
expect("Should replace private use U+E000", "\uE000", "?");
expect("Should replace private use U+E123 and U+F432",
"\uE123abc\uF432", "?abc?");
expect("Should replace private use in Plane 15 and 16: U+F0000 and " +
"U+10FFFD, but keep U+1050",
"x\uDB80\uDC00y\uDBFF\uDFFDz\u1050", "x?y?z\u1050");
// Unassigned Unicode
expect("Should replace unassigned U+30000 and U+DFFFF",
"-\uD880\uDC00-\uDB3F\uDFFF-", "-?-?-");
// Standalone surrogate character (not in a pair)
expect("Should replace standalone surrogate U+DB80", "x\uDB80yz", "x?yz");
expect("Should replace standalone surrogate mixed with valid pair",
"x\uDB80\uD802\uDD00yz", "x?\uD802\uDD00yz");
}
}

View File

@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Package for {@code org.apache.hadoop.fs.shell} test classes.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
package org.apache.hadoop.fs.shell;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

View File

@ -54,7 +54,7 @@
<comparators>
<comparator>
<type>RegexpComparator</type>
<expected-output>^-ls \[-C\] \[-d\] \[-h\] \[-R\] \[-t\] \[-S\] \[-r\] \[-u\] \[&lt;path&gt; \.\.\.\] :( |\t)*</expected-output>
<expected-output>^-ls \[-C\] \[-d\] \[-h\] \[-q\] \[-R\] \[-t\] \[-S\] \[-r\] \[-u\] \[&lt;path&gt; \.\.\.\] :( |\t)*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
@ -104,6 +104,10 @@
<type>RegexpComparator</type>
<expected-output>^\s*-h\s+Formats the sizes of files in a human-readable fashion( )*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^\s*-q\s+Print \? instead of non-printable characters\.( )*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^\s*rather than a number of bytes\.( )*</expected-output>