HADOOP-8140. dfs -getmerge should process its argments better (Daryn Sharp via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1297771 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Joseph Evans 2012-03-06 22:44:26 +00:00
parent 4ce5f6553f
commit bb86825d49
4 changed files with 183 additions and 5 deletions

View File

@ -345,6 +345,9 @@ Release 0.23.2 - UNRELEASED
HADOOP-8064. Remove unnecessary dependency on w3c.org in document processing
(Khiwal Lee via bobby)
HADOOP-8140. dfs -getmerge should process its argments better (Daryn Sharp
via bobby)
Release 0.23.1 - 2012-02-17
INCOMPATIBLE CHANGES

View File

@ -25,7 +25,10 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
import org.apache.hadoop.io.IOUtils;
/** Various commands for copy files */
@InterfaceAudience.Private
@ -53,21 +56,69 @@ public static class Merge extends FsCommand {
protected PathData dst = null;
protected String delimiter = null;
protected List<PathData> srcs = null;
@Override
protected void processOptions(LinkedList<String> args) throws IOException {
CommandFormat cf = new CommandFormat(2, 3, "nl");
CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl");
cf.parse(args);
delimiter = cf.getOpt("nl") ? "\n" : null;
dst = new PathData(new File(args.removeLast()), getConf());
if (dst.exists && dst.stat.isDirectory()) {
throw new PathIsDirectoryException(dst.toString());
}
srcs = new LinkedList<PathData>();
}
@Override
protected void processArguments(LinkedList<PathData> items)
throws IOException {
super.processArguments(items);
if (exitCode != 0) { // check for error collecting paths
return;
}
FSDataOutputStream out = dst.fs.create(dst.path);
try {
FSDataInputStream in = null;
for (PathData src : srcs) {
try {
in = src.fs.open(src.path);
IOUtils.copyBytes(in, out, getConf(), false);
if (delimiter != null) {
out.write(delimiter.getBytes("UTF-8"));
}
} finally {
in.close();
}
}
} finally {
out.close();
}
}
@Override
protected void processNonexistentPath(PathData item) throws IOException {
exitCode = 1; // flag that a path is bad
super.processNonexistentPath(item);
}
// this command is handled a bit differently than others. the paths
// are batched up instead of actually being processed. this avoids
// unnecessarily streaming into the merge file and then encountering
// a path error that should abort the merge
@Override
protected void processPath(PathData src) throws IOException {
FileUtil.copyMerge(src.fs, src.path,
dst.fs, dst.path, false, getConf(), delimiter);
// for directories, recurse one level to get its files, else skip it
if (src.stat.isDirectory()) {
if (getDepth() == 0) {
recursePath(src);
} // skip subdirs
} else {
srcs.add(src);
}
}
}

View File

@ -23,6 +23,7 @@
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -35,6 +36,7 @@
import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
import org.apache.hadoop.fs.shell.PathExceptions.PathIsNotDirectoryException;
import org.apache.hadoop.fs.shell.PathExceptions.PathNotFoundException;
import org.apache.hadoop.io.Writable;
/**
* Encapsulates a Path (path), its FileStatus (stat), and its FileSystem (fs).
@ -43,7 +45,7 @@
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class PathData {
public class PathData implements Comparable<PathData> {
protected final URI uri;
public final FileSystem fs;
public final Path path;
@ -210,6 +212,7 @@ public PathData[] getDirectoryContents() throws IOException {
String child = getStringForChildPath(stats[i].getPath());
items[i] = new PathData(fs, child, stats[i]);
}
Arrays.sort(items);
return items;
}
@ -303,6 +306,7 @@ public static PathData[] expandAsGlob(String pattern, Configuration conf)
items[i++] = new PathData(fs, globMatch, stat);
}
}
Arrays.sort(items);
return items;
}
@ -446,4 +450,20 @@ private static URI stringToUri(String pathString) {
}
}
@Override
public int compareTo(PathData o) {
return path.compareTo(((PathData)o).path);
}
@Override
public boolean equals(Object o) {
return (o != null) &&
(o instanceof PathData) &&
path.equals(((PathData)o).path);
}
@Override
public int hashCode() {
return path.hashCode();
}
}

View File

@ -225,6 +225,110 @@ private void checkPut(int exitCode, Path src, Path dest) throws Exception {
assertEquals(exitCode, gotExit);
}
@Test
public void testCopyMerge() throws Exception {
Path root = new Path(testRootDir, "TestMerge");
Path f1 = new Path(root, "f1");
Path f2 = new Path(root, "f2");
Path f3 = new Path(root, "f3");
Path fnf = new Path(root, "fnf");
Path d = new Path(root, "dir");
Path df1 = new Path(d, "df1");
Path df2 = new Path(d, "df2");
Path df3 = new Path(d, "df3");
createFile(f1, f2, f3, df1, df2, df3);
int exit;
// one file, kind of silly
exit = shell.run(new String[]{
"-getmerge",
f1.toString(),
"out" });
assertEquals(0, exit);
assertEquals("f1", readFile("out"));
exit = shell.run(new String[]{
"-getmerge",
fnf.toString(),
"out" });
assertEquals(1, exit);
assertFalse(lfs.exists(new Path("out")));
// two files
exit = shell.run(new String[]{
"-getmerge",
f1.toString(), f2.toString(),
"out" });
assertEquals(0, exit);
assertEquals("f1f2", readFile("out"));
// two files, preserves order
exit = shell.run(new String[]{
"-getmerge",
f2.toString(), f1.toString(),
"out" });
assertEquals(0, exit);
assertEquals("f2f1", readFile("out"));
// two files
exit = shell.run(new String[]{
"-getmerge", "-nl",
f1.toString(), f2.toString(),
"out" });
assertEquals(0, exit);
assertEquals("f1\nf2\n", readFile("out"));
// glob three files
shell.run(new String[]{
"-getmerge", "-nl",
new Path(root, "f*").toString(),
"out" });
assertEquals(0, exit);
assertEquals("f1\nf2\nf3\n", readFile("out"));
// directory with 3 files, should skip subdir
shell.run(new String[]{
"-getmerge", "-nl",
root.toString(),
"out" });
assertEquals(0, exit);
assertEquals("f1\nf2\nf3\n", readFile("out"));
// subdir
shell.run(new String[]{
"-getmerge", "-nl",
d.toString(), "out"});
assertEquals(0, exit);
assertEquals("df1\ndf2\ndf3\n", readFile("out"));
// file, dir, file
shell.run(new String[]{
"-getmerge", "-nl",
f1.toString(), d.toString(), f2.toString(), "out" });
assertEquals(0, exit);
assertEquals("f1\ndf1\ndf2\ndf3\nf2\n", readFile("out"));
}
private void createFile(Path ... paths) throws IOException {
for (Path path : paths) {
FSDataOutputStream out = lfs.create(path);
out.write(path.getName().getBytes());
out.close();
}
}
private String readFile(String out) throws IOException {
Path path = new Path(out);
FileStatus stat = lfs.getFileStatus(path);
FSDataInputStream in = lfs.open(path);
byte[] buffer = new byte[(int)stat.getLen()];
in.readFully(buffer);
in.close();
lfs.delete(path, false);
return new String(buffer);
}
// path handles "." rather oddly
private String pathAsString(Path p) {
String s = (p == null) ? Path.CUR_DIR : p.toString();