From faba3591d32f2e4808c2faeb9472348d52619c8a Mon Sep 17 00:00:00 2001 From: Andrew Olson Date: Fri, 22 Mar 2019 10:35:30 +0000 Subject: [PATCH] HADOOP-16147. Allow CopyListing sequence file keys and values to be more easily customized. Author: Andrew Olson --- .../org/apache/hadoop/tools/CopyListing.java | 23 +++++++++++++++++++ .../hadoop/tools/SimpleCopyListing.java | 4 ++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java index e018b0b9573..6f8aa34b295 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java @@ -248,6 +248,29 @@ protected Credentials getCredentials() { return credentials; } + /** + * Returns the key for an entry in the copy listing sequence file. + * @param sourcePathRoot the root source path for determining the relative + * target path + * @param fileStatus the copy listing file status + * @return the key for the sequence file entry + */ + protected Text getFileListingKey(Path sourcePathRoot, + CopyListingFileStatus fileStatus) { + return new Text(DistCpUtils.getRelativePath(sourcePathRoot, + fileStatus.getPath())); + } + + /** + * Returns the value for an entry in the copy listing sequence file. + * @param fileStatus the copy listing file status + * @return the value for the sequence file entry + */ + protected CopyListingFileStatus getFileListingValue( + CopyListingFileStatus fileStatus) { + return fileStatus; + } + /** * Public Factory method with which the appropriate CopyListing implementation may be retrieved. * @param configuration The input configuration. diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index a908e1223ae..7e5a26a36ab 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -718,8 +718,8 @@ private void writeToFileListing(SequenceFile.Writer fileListWriter, return; } - fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot, - fileStatus.getPath())), fileStatus); + fileListWriter.append(getFileListingKey(sourcePathRoot, fileStatus), + getFileListingValue(fileStatus)); fileListWriter.sync(); if (!fileStatus.isDirectory()) {