HADOOP-18056. DistCp: Filter duplicates in the source paths. (#3825). Contributed by Ayush Saxena.
Reviewed-by: tomscut <litao@bigo.sg> Reviewed-by: Steve Loughran <stevel@apache.org>
This commit is contained in:
parent
d7cacea07b
commit
46b1411189
|
@ -30,8 +30,10 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.tools.util.DistCpUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
|
@ -233,7 +235,18 @@ public final class DistCpOptions {
|
|||
|
||||
public List<Path> getSourcePaths() {
|
||||
return sourcePaths == null ?
|
||||
null : Collections.unmodifiableList(sourcePaths);
|
||||
null :
|
||||
Collections.unmodifiableList(getUniquePaths(sourcePaths));
|
||||
}
|
||||
|
||||
private List<Path> getUniquePaths(List<Path> srcPaths) {
|
||||
Set<Path> uniquePaths = new LinkedHashSet<>();
|
||||
for (Path path : srcPaths) {
|
||||
if (!uniquePaths.add(path)) {
|
||||
LOG.info("Path: {} added multiple times, ignoring the redundant entry.", path);
|
||||
}
|
||||
}
|
||||
return new ArrayList<>(uniquePaths);
|
||||
}
|
||||
|
||||
public Path getTargetPath() {
|
||||
|
|
|
@ -167,6 +167,29 @@ public class TestCopyListing extends SimpleCopyListing {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDuplicateSourcePaths() throws Exception {
|
||||
FileSystem fs = FileSystem.get(getConf());
|
||||
List<Path> srcPaths = new ArrayList<Path>();
|
||||
try {
|
||||
srcPaths.add(new Path("/tmp/in"));
|
||||
srcPaths.add(new Path("/tmp/in"));
|
||||
TestDistCpUtils.createFile(fs, "/tmp/in/src1/1.txt");
|
||||
TestDistCpUtils.createFile(fs, "/tmp/in/src2/1.txt");
|
||||
Path target = new Path("/tmp/out");
|
||||
Path listingFile = new Path("/tmp/list");
|
||||
final DistCpOptions options =
|
||||
new DistCpOptions.Builder(srcPaths, target).build();
|
||||
final DistCpContext context = new DistCpContext(options);
|
||||
CopyListing listing =
|
||||
CopyListing.getCopyListing(getConf(), CREDENTIALS, context);
|
||||
listing.buildListing(listingFile, context);
|
||||
Assert.assertTrue(fs.exists(listingFile));
|
||||
} finally {
|
||||
TestDistCpUtils.delete(fs, "/tmp");
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=10000)
|
||||
public void testBuildListing() {
|
||||
FileSystem fs = null;
|
||||
|
|
Loading…
Reference in New Issue