diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 261ec545bbd..b8a6b8f340b 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -44,6 +44,9 @@ Release 2.8.0 - UNRELEASED OPTIMIZATIONS + HADOOP-11785. Reduce the number of listStatus operation in distcp + buildListing (Zoran Dimitrijevic via Colin P. McCabe) + BUG FIXES HADOOP-11568. Description on usage of classpath in hadoop command is diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index 6dc827a92e3..e8a23aa6ff0 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -193,12 +193,12 @@ public void doBuildListing(SequenceFile.Writer fileListWriter, writeToFileListing(fileListWriter, sourceCopyListingStatus, sourcePathRoot, options); - if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) { + if (sourceStatus.isDirectory()) { if (LOG.isDebugEnabled()) { - LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath()); + LOG.debug("Traversing source dir: " + sourceStatus.getPath()); } - traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, - options); + traverseDirectory(fileListWriter, sourceFS, sourceStatus, + sourcePathRoot, options); } } } @@ -275,22 +275,17 @@ private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException { SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE)); } - private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem, - FileStatus fileStatus) throws IOException { - return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0; - } - private static FileStatus[] getChildren(FileSystem fileSystem, FileStatus parent) throws IOException { return fileSystem.listStatus(parent.getPath()); } - private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter, - FileStatus sourceStatus, - Path sourcePathRoot, - DistCpOptions options) - throws IOException { - FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf()); + private void traverseDirectory(SequenceFile.Writer fileListWriter, + FileSystem sourceFS, + FileStatus sourceStatus, + Path sourcePathRoot, + DistCpOptions options) + throws IOException { final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL); final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR); final boolean preserveRawXattrs = options.shouldPreserveRawXattrs(); @@ -299,9 +294,9 @@ private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter, while (!pathStack.isEmpty()) { for (FileStatus child: getChildren(sourceFS, pathStack.pop())) { - if (LOG.isDebugEnabled()) - LOG.debug("Recording source-path: " - + sourceStatus.getPath() + " for copy."); + if (LOG.isDebugEnabled()) { + LOG.debug("Recording source-path: " + child.getPath() + " for copy."); + } CopyListingFileStatus childCopyListingStatus = DistCpUtils.toCopyListingFileStatus(sourceFS, child, preserveAcls && child.isDirectory(), @@ -309,16 +304,16 @@ private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter, preserveRawXattrs && child.isDirectory()); writeToFileListing(fileListWriter, childCopyListingStatus, sourcePathRoot, options); - if (isDirectoryAndNotEmpty(sourceFS, child)) { - if (LOG.isDebugEnabled()) - LOG.debug("Traversing non-empty source dir: " - + sourceStatus.getPath()); + if (child.isDirectory()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Traversing into source dir: " + child.getPath()); + } pathStack.push(child); } } } } - + private void writeToFileListingRoot(SequenceFile.Writer fileListWriter, CopyListingFileStatus fileStatus, Path sourcePathRoot, DistCpOptions options) throws IOException {