HADOOP-12657. Add a option to skip newline on empty files with getMerge -nl. Contributed by Kanaka Kumar Avvaru.
(cherry picked from commit 061c05cc05
)
This commit is contained in:
parent
e06c291245
commit
af49823499
|
@ -55,6 +55,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
|
|
||||||
HADOOP-12366. expose calculated paths (aw)
|
HADOOP-12366. expose calculated paths (aw)
|
||||||
|
|
||||||
|
HADOOP-12657. Add a option to skip newline on empty files with getMerge -nl.
|
||||||
|
(Kanaka Kumar Avvaru via aajisaka)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
HADOOP-12458. Retries is typoed to spell Retires in parts of
|
HADOOP-12458. Retries is typoed to spell Retires in parts of
|
||||||
|
|
|
@ -53,24 +53,29 @@ class CopyCommands {
|
||||||
/** merge multiple files together */
|
/** merge multiple files together */
|
||||||
public static class Merge extends FsCommand {
|
public static class Merge extends FsCommand {
|
||||||
public static final String NAME = "getmerge";
|
public static final String NAME = "getmerge";
|
||||||
public static final String USAGE = "[-nl] <src> <localdst>";
|
public static final String USAGE = "[-nl] [-skip-empty-file] "
|
||||||
|
+ "<src> <localdst>";
|
||||||
public static final String DESCRIPTION =
|
public static final String DESCRIPTION =
|
||||||
"Get all the files in the directories that " +
|
"Get all the files in the directories that "
|
||||||
"match the source file pattern and merge and sort them to only " +
|
+ "match the source file pattern and merge and sort them to only "
|
||||||
"one file on local fs. <src> is kept.\n" +
|
+ "one file on local fs. <src> is kept.\n"
|
||||||
"-nl: Add a newline character at the end of each file.";
|
+ "-nl: Add a newline character at the end of each file.\n"
|
||||||
|
+ "-skip-empty-file: Do not add new line character for empty file.";
|
||||||
|
|
||||||
protected PathData dst = null;
|
protected PathData dst = null;
|
||||||
protected String delimiter = null;
|
protected String delimiter = null;
|
||||||
|
private boolean skipEmptyFileDelimiter;
|
||||||
protected List<PathData> srcs = null;
|
protected List<PathData> srcs = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void processOptions(LinkedList<String> args) throws IOException {
|
protected void processOptions(LinkedList<String> args) throws IOException {
|
||||||
try {
|
try {
|
||||||
CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl");
|
CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl",
|
||||||
|
"skip-empty-file");
|
||||||
cf.parse(args);
|
cf.parse(args);
|
||||||
|
|
||||||
delimiter = cf.getOpt("nl") ? "\n" : null;
|
delimiter = cf.getOpt("nl") ? "\n" : null;
|
||||||
|
skipEmptyFileDelimiter = cf.getOpt("skip-empty-file");
|
||||||
|
|
||||||
dst = new PathData(new URI(args.removeLast()), getConf());
|
dst = new PathData(new URI(args.removeLast()), getConf());
|
||||||
if (dst.exists && dst.stat.isDirectory()) {
|
if (dst.exists && dst.stat.isDirectory()) {
|
||||||
|
@ -92,14 +97,13 @@ class CopyCommands {
|
||||||
FSDataOutputStream out = dst.fs.create(dst.path);
|
FSDataOutputStream out = dst.fs.create(dst.path);
|
||||||
try {
|
try {
|
||||||
for (PathData src : srcs) {
|
for (PathData src : srcs) {
|
||||||
FSDataInputStream in = src.fs.open(src.path);
|
if (src.stat.getLen() != 0) {
|
||||||
try {
|
try (FSDataInputStream in = src.fs.open(src.path)) {
|
||||||
IOUtils.copyBytes(in, out, getConf(), false);
|
IOUtils.copyBytes(in, out, getConf(), false);
|
||||||
if (delimiter != null) {
|
writeDelimiter(out);
|
||||||
out.write(delimiter.getBytes("UTF-8"));
|
|
||||||
}
|
}
|
||||||
} finally {
|
} else if (!skipEmptyFileDelimiter) {
|
||||||
in.close();
|
writeDelimiter(out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -107,6 +111,12 @@ class CopyCommands {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void writeDelimiter(FSDataOutputStream out) throws IOException {
|
||||||
|
if (delimiter != null) {
|
||||||
|
out.write(delimiter.getBytes("UTF-8"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void processNonexistentPath(PathData item) throws IOException {
|
protected void processNonexistentPath(PathData item) throws IOException {
|
||||||
exitCode = 1; // flag that a path is bad
|
exitCode = 1; // flag that a path is bad
|
||||||
|
|
|
@ -375,6 +375,7 @@ getmerge
|
||||||
Usage: `hadoop fs -getmerge [-nl] <src> <localdst>`
|
Usage: `hadoop fs -getmerge [-nl] <src> <localdst>`
|
||||||
|
|
||||||
Takes a source directory and a destination file as input and concatenates files in src into the destination local file. Optionally -nl can be set to enable adding a newline character (LF) at the end of each file.
|
Takes a source directory and a destination file as input and concatenates files in src into the destination local file. Optionally -nl can be set to enable adding a newline character (LF) at the end of each file.
|
||||||
|
-skip-empty-file can be used to avoid unwanted newline characters in case of empty files.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
|
|
|
@ -318,6 +318,7 @@ public class TestFsShellCopy {
|
||||||
Path f1 = new Path(root, "f1");
|
Path f1 = new Path(root, "f1");
|
||||||
Path f2 = new Path(root, "f2");
|
Path f2 = new Path(root, "f2");
|
||||||
Path f3 = new Path(root, "f3");
|
Path f3 = new Path(root, "f3");
|
||||||
|
Path empty = new Path(root, "empty");
|
||||||
Path fnf = new Path(root, "fnf");
|
Path fnf = new Path(root, "fnf");
|
||||||
Path d = new Path(root, "dir");
|
Path d = new Path(root, "dir");
|
||||||
Path df1 = new Path(d, "df1");
|
Path df1 = new Path(d, "df1");
|
||||||
|
@ -325,6 +326,7 @@ public class TestFsShellCopy {
|
||||||
Path df3 = new Path(d, "df3");
|
Path df3 = new Path(d, "df3");
|
||||||
|
|
||||||
createFile(f1, f2, f3, df1, df2, df3);
|
createFile(f1, f2, f3, df1, df2, df3);
|
||||||
|
createEmptyFile(empty);
|
||||||
|
|
||||||
int exit;
|
int exit;
|
||||||
// one file, kind of silly
|
// one file, kind of silly
|
||||||
|
@ -366,6 +368,13 @@ public class TestFsShellCopy {
|
||||||
assertEquals(0, exit);
|
assertEquals(0, exit);
|
||||||
assertEquals("f1\nf2\n", readFile("out"));
|
assertEquals("f1\nf2\n", readFile("out"));
|
||||||
|
|
||||||
|
exit = shell.run(new String[]{
|
||||||
|
"-getmerge", "-nl", "-skip-empty-file",
|
||||||
|
f1.toString(), f2.toString(), empty.toString(),
|
||||||
|
"out" });
|
||||||
|
assertEquals(0, exit);
|
||||||
|
assertEquals("f1\nf2\n", readFile("out"));
|
||||||
|
|
||||||
// glob three files
|
// glob three files
|
||||||
shell.run(new String[]{
|
shell.run(new String[]{
|
||||||
"-getmerge", "-nl",
|
"-getmerge", "-nl",
|
||||||
|
@ -374,13 +383,13 @@ public class TestFsShellCopy {
|
||||||
assertEquals(0, exit);
|
assertEquals(0, exit);
|
||||||
assertEquals("f1\nf2\nf3\n", readFile("out"));
|
assertEquals("f1\nf2\nf3\n", readFile("out"));
|
||||||
|
|
||||||
// directory with 3 files, should skip subdir
|
// directory with 1 empty + 3 non empty files, should skip subdir
|
||||||
shell.run(new String[]{
|
shell.run(new String[]{
|
||||||
"-getmerge", "-nl",
|
"-getmerge", "-nl",
|
||||||
root.toString(),
|
root.toString(),
|
||||||
"out" });
|
"out" });
|
||||||
assertEquals(0, exit);
|
assertEquals(0, exit);
|
||||||
assertEquals("f1\nf2\nf3\n", readFile("out"));
|
assertEquals("\nf1\nf2\nf3\n", readFile("out"));
|
||||||
|
|
||||||
// subdir
|
// subdir
|
||||||
shell.run(new String[]{
|
shell.run(new String[]{
|
||||||
|
@ -539,6 +548,13 @@ public class TestFsShellCopy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void createEmptyFile(Path ... paths) throws IOException {
|
||||||
|
for (Path path : paths) {
|
||||||
|
FSDataOutputStream out = lfs.create(path);
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private String readFile(String out) throws IOException {
|
private String readFile(String out) throws IOException {
|
||||||
Path path = new Path(out);
|
Path path = new Path(out);
|
||||||
FileStatus stat = lfs.getFileStatus(path);
|
FileStatus stat = lfs.getFileStatus(path);
|
||||||
|
|
|
@ -601,7 +601,7 @@
|
||||||
<comparators>
|
<comparators>
|
||||||
<comparator>
|
<comparator>
|
||||||
<type>RegexpComparator</type>
|
<type>RegexpComparator</type>
|
||||||
<expected-output>^-getmerge \[-nl\] <src> <localdst> :\s*</expected-output>
|
<expected-output>^-getmerge \[-nl\] \[-skip-empty-file\] <src> <localdst> :\s*</expected-output>
|
||||||
</comparator>
|
</comparator>
|
||||||
<comparator>
|
<comparator>
|
||||||
<type>RegexpComparator</type>
|
<type>RegexpComparator</type>
|
||||||
|
@ -615,6 +615,11 @@
|
||||||
<type>RegexpComparator</type>
|
<type>RegexpComparator</type>
|
||||||
<expected-output>^( |\t)*-nl\s+Add a newline character at the end of each file.( )*</expected-output>
|
<expected-output>^( |\t)*-nl\s+Add a newline character at the end of each file.( )*</expected-output>
|
||||||
</comparator>
|
</comparator>
|
||||||
|
<comparator>
|
||||||
|
<type>RegexpComparator</type>
|
||||||
|
<expected-output>^( |\t)*-skip-empty-file\s+Do not add new line character for empty file.( )*</expected-output>
|
||||||
|
</comparator>
|
||||||
|
|
||||||
</comparators>
|
</comparators>
|
||||||
</test>
|
</test>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue