HADOOP-16147. Allow CopyListing sequence file keys and values to be more easily customized.

Author:    Andrew Olson
This commit is contained in:
Andrew Olson 2019-03-22 10:35:30 +00:00 committed by Steve Loughran
parent d18d0859eb
commit faba3591d3
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
2 changed files with 25 additions and 2 deletions

View File

@ -248,6 +248,29 @@ public abstract class CopyListing extends Configured {
return credentials;
}
/**
* Returns the key for an entry in the copy listing sequence file.
* @param sourcePathRoot the root source path for determining the relative
* target path
* @param fileStatus the copy listing file status
* @return the key for the sequence file entry
*/
protected Text getFileListingKey(Path sourcePathRoot,
CopyListingFileStatus fileStatus) {
return new Text(DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath()));
}
/**
* Returns the value for an entry in the copy listing sequence file.
* @param fileStatus the copy listing file status
* @return the value for the sequence file entry
*/
protected CopyListingFileStatus getFileListingValue(
CopyListingFileStatus fileStatus) {
return fileStatus;
}
/**
* Public Factory method with which the appropriate CopyListing implementation may be retrieved.
* @param configuration The input configuration.

View File

@ -718,8 +718,8 @@ public class SimpleCopyListing extends CopyListing {
return;
}
fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath())), fileStatus);
fileListWriter.append(getFileListingKey(sourcePathRoot, fileStatus),
getFileListingValue(fileStatus));
fileListWriter.sync();
if (!fileStatus.isDirectory()) {