HADOOP-15384. distcp numListstatusThreads option doesn't get to -delete scan.

Contributed by Steve Loughran.
This commit is contained in:
Steve Loughran 2018-07-10 10:43:59 +01:00
parent 9bd5bef297
commit ca8b80bf59
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
3 changed files with 16 additions and 4 deletions

View File

@ -387,7 +387,10 @@ public void appendToConf(Configuration conf) {
DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.TRACK_MISSING, DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.TRACK_MISSING,
String.valueOf(trackPath)); String.valueOf(trackPath));
} }
if (numListstatusThreads > 0) {
DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.NUM_LISTSTATUS_THREADS,
Integer.toString(numListstatusThreads));
}
} }
/** /**

View File

@ -392,6 +392,9 @@ private void deleteMissing(Configuration conf) throws IOException {
Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
FileSystem clusterFS = sourceListing.getFileSystem(conf); FileSystem clusterFS = sourceListing.getFileSystem(conf);
Path sortedSourceListing = DistCpUtils.sortListing(conf, sourceListing); Path sortedSourceListing = DistCpUtils.sortListing(conf, sourceListing);
long sourceListingCompleted = System.currentTimeMillis();
LOG.info("Source listing completed in {}",
formatDuration(sourceListingCompleted - listingStart));
// Similarly, create the listing of target-files. Sort alphabetically. // Similarly, create the listing of target-files. Sort alphabetically.
Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
@ -409,8 +412,8 @@ private void deleteMissing(Configuration conf) throws IOException {
// Walk both source and target file listings. // Walk both source and target file listings.
// Delete all from target that doesn't also exist on source. // Delete all from target that doesn't also exist on source.
long deletionStart = System.currentTimeMillis(); long deletionStart = System.currentTimeMillis();
LOG.info("Listing completed in {}", LOG.info("Destination listing completed in {}",
formatDuration(deletionStart - listingStart)); formatDuration(deletionStart - sourceListingCompleted));
long deletedEntries = 0; long deletedEntries = 0;
long filesDeleted = 0; long filesDeleted = 0;
@ -545,9 +548,15 @@ private Path listTargetFiles(final Configuration conf,
// Set up options to be the same from the CopyListing.buildListing's // Set up options to be the same from the CopyListing.buildListing's
// perspective, so to collect similar listings as when doing the copy // perspective, so to collect similar listings as when doing the copy
// //
// thread count is picked up from the job
int threads = conf.getInt(DistCpConstants.CONF_LABEL_LISTSTATUS_THREADS,
DistCpConstants.DEFAULT_LISTSTATUS_THREADS);
LOG.info("Scanning destination directory {} with thread count: {}",
targetFinalPath, threads);
DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath) DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath)
.withOverwrite(overwrite) .withOverwrite(overwrite)
.withSyncFolder(syncFolder) .withSyncFolder(syncFolder)
.withNumListstatusThreads(threads)
.build(); .build();
DistCpContext distCpContext = new DistCpContext(options); DistCpContext distCpContext = new DistCpContext(options);
distCpContext.setTargetPathExists(targetPathExists); distCpContext.setTargetPathExists(targetPathExists);

View File

@ -572,7 +572,7 @@ private Job runDistCp(final DistCpOptions options) throws Exception {
private DistCpOptions buildWithStandardOptions( private DistCpOptions buildWithStandardOptions(
DistCpOptions.Builder builder) { DistCpOptions.Builder builder) {
return builder return builder
.withNumListstatusThreads(8) .withNumListstatusThreads(DistCpOptions.MAX_NUM_LISTSTATUS_THREADS)
.build(); .build();
} }