HBASE-18843 Add DistCp support to incremental backup with bulk loading - revert due to missing credit
This commit is contained in:
@ -18,6 +18,7 @@
package org.apache.hadoop.hbase.backup.impl;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
@ -32,15 +33,17 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupCopyJob;
import org.apache.hadoop.hbase.backup.BackupInfo;
import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
import org.apache.hadoop.hbase.backup.BackupRequest;
import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
import org.apache.hadoop.hbase.backup.BackupType;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.backup.util.FixedRelativePathCopyListing;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
@ -49,9 +52,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.util.Tool;
import org.apache.yetus.audience.InterfaceAudience;
* Incremental backup implementation.
@ -111,8 +112,6 @@ public class IncrementalTableBackupClient extends TableBackupClient {
protected Map<byte[], List<Path>>[] handleBulkLoad(List<TableName> sTableList) throws IOException {
Map<byte[], List<Path>>[] mapForSrc = new Map[sTableList.size()];
List<String> activeFiles = new ArrayList<String>();
List<String> archiveFiles = new ArrayList<String>();
Pair<Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>>, List<byte[]>> pair =
Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>> map = pair.getFirst();
@ -128,7 +127,6 @@ public class IncrementalTableBackupClient extends TableBackupClient {
for (Map.Entry<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>> tblEntry :
map.entrySet()) {
TableName srcTable = tblEntry.getKey();
int srcIdx = getIndex(srcTable, sTableList);
if (srcIdx < 0) {
LOG.warn("Couldn't find " + srcTable + " in source table List");
@ -164,6 +162,7 @@ public class IncrementalTableBackupClient extends TableBackupClient {
for (Pair<String, Boolean> fileWithState : famEntry.getValue()) {
String file = fileWithState.getFirst();
boolean raw = fileWithState.getSecond();
int idx = file.lastIndexOf("/");
String filename = file;
if (idx > 0) {
@ -176,55 +175,37 @@ public class IncrementalTableBackupClient extends TableBackupClient {
if (LOG.isTraceEnabled()) {
LOG.trace("found bulk hfile " + file + " in " + famDir + " for " + tblName);
try {
if (LOG.isTraceEnabled()) {
LOG.trace("copying " + p + " to " + tgt);
} else if (fs.exists(archive)){
FileUtil.copy(fs, p, tgtFs, tgt, false,conf);
} catch (FileNotFoundException e) {
LOG.debug("copying archive " + archive + " to " + tgt);
try {
FileUtil.copy(fs, archive, tgtFs, tgt, false, conf);
} catch (FileNotFoundException fnfe) {
if (!raw) throw fnfe;
} else {
LOG.debug("copying archive " + archive + " to " + tgt);
try {
FileUtil.copy(fs, archive, tgtFs, tgt, false, conf);
} catch (FileNotFoundException fnfe) {
if (!raw) throw fnfe;
copyBulkLoadedFiles(activeFiles, archiveFiles);
backupManager.writeBulkLoadedFiles(sTableList, mapForSrc);
backupManager.removeBulkLoadedRows(sTableList, pair.getSecond());
return mapForSrc;
private void copyBulkLoadedFiles(List<String> activeFiles, List<String> archiveFiles)
throws IOException
try {
conf.setInt(FixedRelativePathCopyListing.NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 5);
// Copy active files
String tgtDest = backupInfo.getBackupRootDir() + Path.SEPARATOR + backupInfo.getBackupId();
if (activeFiles.size() > 0) {
String[] toCopy = new String[activeFiles.size()];
incrementalCopyHFiles(toCopy, tgtDest);
if (archiveFiles.size() > 0) {
String[] toCopy = new String[archiveFiles.size()];
incrementalCopyHFiles(toCopy, tgtDest);
} finally {
public void execute() throws IOException {
@ -248,8 +229,8 @@ public class IncrementalTableBackupClient extends TableBackupClient {
// copy out the table and region info files for each table
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
incrementalCopyHFiles(new String[] {getBulkOutputDir().toString()}, backupInfo.getBackupRootDir());
// Save list of WAL files copied
} catch (Exception e) {
@ -288,25 +269,27 @@ public class IncrementalTableBackupClient extends TableBackupClient {
protected void incrementalCopyHFiles(String[] files, String backupDest) throws IOException {
protected void incrementalCopyHFiles(BackupInfo backupInfo) throws Exception {
try {
LOG.debug("Incremental copy HFiles is starting. dest="+backupDest);
LOG.debug("Incremental copy HFiles is starting.");
// set overall backup phase: incremental_copy
// get incremental backup file list and prepare parms for DistCp
String[] strArr = new String[files.length + 1];
System.arraycopy(files, 0, strArr, 0, files.length);
strArr[strArr.length - 1] = backupDest;
List<String> incrBackupFileList = new ArrayList<String>();
// Add Bulk output
String[] strArr = incrBackupFileList.toArray(new String[incrBackupFileList.size() + 1]);
strArr[strArr.length - 1] = backupInfo.getBackupRootDir();
BackupCopyJob copyService = BackupRestoreFactory.getBackupCopyJob(conf);
int res = copyService.copy(backupInfo, backupManager, conf, BackupType.INCREMENTAL, strArr);
if (res != 0) {
LOG.error("Copy incremental HFile files failed with return code: " + res + ".");
throw new IOException("Failed copy from " + StringUtils.join(files, ',')
+ " to " + backupDest);
throw new IOException("Failed copy from " + StringUtils.join(incrBackupFileList, ',')
+ " to " + backupInfo.getHLogTargetDir());
LOG.debug("Incremental copy HFiles from " + StringUtils.join(files, ',')
+ " to " + backupDest + " finished.");
LOG.debug("Incremental copy HFiles from " + StringUtils.join(incrBackupFileList, ',')
+ " to " + backupInfo.getBackupRootDir() + " finished.");
} finally {
@ -323,7 +306,7 @@ public class IncrementalTableBackupClient extends TableBackupClient {
protected void convertWALsToHFiles() throws IOException {
protected void convertWALsToHFiles(BackupInfo backupInfo) throws IOException {
// get incremental backup file list and prepare parameters for DistCp
List<String> incrBackupFileList = backupInfo.getIncrBackupFileList();
// Get list of tables in incremental backup set
@ -142,7 +142,6 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
* Only the argument "src1, [src2, [...]] dst" is supported,
* no more DistCp options.
class BackupDistCp extends DistCp {
private BackupInfo backupInfo;
@ -155,7 +154,6 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
this.backupManager = backupManager;
public Job execute() throws Exception {
@ -251,7 +249,7 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
LOG.debug("Backup progress data updated to backup system table: \"Progress: "
+ newProgressStr + " - " + bytesCopied + " bytes copied.\"");
} catch (Throwable t) {
LOG.error("distcp " + job == null ? "" : job.getJobID() + " encountered error", t);
throw t;
} finally {
if (!fieldSubmitted.getBoolean(this)) {
@ -1,288 +0,0 @@
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.hadoop.hbase.backup.util;
import java.io.IOException;
import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.tools.SimpleCopyListing;
import org.apache.hadoop.tools.util.DistCpUtils;
import org.apache.yetus.audience.InterfaceAudience;
import com.google.common.annotations.VisibleForTesting;
* The FixedRelativePathCopyListing is responsible for making the exhaustive list of
* all files/directories under its specified list of input-paths.
* These are written into the specified copy-listing file.
* This CopyListing implementation allows to preserve fixed -level structures
* between source and destination paths for every file being copied.
* Note: The FixedRelativePathCopyListing doesn't handle wild-cards in the input-paths.
public class FixedRelativePathCopyListing extends SimpleCopyListing {
public static final String NUMBER_OF_LEVELS_TO_PRESERVE_KEY = "num.levels.preserve";
private static final Log LOG = LogFactory.getLog(FixedRelativePathCopyListing.class);
private long totalPaths = 0;
private long totalBytesToCopy = 0;
private int numLevelsToPreserve = 1;
* Public constructor, to initialize configuration.
* @param configuration The input configuration, with which the source/target FileSystems may be accessed.
* @param credentials - Credentials object on which the FS delegation tokens are cached. If null
* delegation token caching is skipped
public FixedRelativePathCopyListing(Configuration configuration, Credentials credentials) {
super(configuration, credentials);
this.numLevelsToPreserve = configuration.getInt(NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 1);
/** {@inheritDoc} */
public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
doBuildListing(getWriter(pathToListingFile), options);
* Collect the list of
* {@literal <sourceRelativePath, sourceFileStatus>}
* to be copied and write to the sequence file. In essence, any file or
* directory that need to be copied or sync-ed is written as an entry to the
* sequence file, with the possible exception of the source root:
* when either -update (sync) or -overwrite switch is specified, and if
* the the source root is a directory, then the source root entry is not
* written to the sequence file, because only the contents of the source
* directory need to be copied in this case.
* See {@link org.apache.hadoop.tools.util.DistCpUtils#getRelativePath} for
* how relative path is computed.
* See computeSourceRootPath method for how the root path of the source is
* computed.
* @param fileListWriter
* @param options
* @throws IOException
public void doBuildListing(SequenceFile.Writer fileListWriter,
DistCpOptions options) throws IOException {
try {
for (Path path: options.getSourcePaths()) {
FileSystem sourceFS = path.getFileSystem(getConf());
final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
final boolean preserveRawXAttrs = options.shouldPreserveRawXattrs();
path = makeQualified(path);
FileStatus rootStatus = sourceFS.getFileStatus(path);
Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
FileStatus[] sourceFiles = sourceFS.listStatus(path);
boolean explore = (sourceFiles != null && sourceFiles.length > 0);
if (!explore || rootStatus.isDirectory()) {
CopyListingFileStatus rootCopyListingStatus =
DistCpUtils.toCopyListingFileStatus(sourceFS, rootStatus,
preserveAcls, preserveXAttrs, preserveRawXAttrs);
writeToFileListingRoot(fileListWriter, rootCopyListingStatus,
sourcePathRoot, options);
if (explore) {
for (FileStatus sourceStatus: sourceFiles) {
if (LOG.isDebugEnabled()) {
LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
CopyListingFileStatus sourceCopyListingStatus =
DistCpUtils.toCopyListingFileStatus(sourceFS, sourceStatus,
preserveAcls && sourceStatus.isDirectory(),
preserveXAttrs && sourceStatus.isDirectory(),
preserveRawXAttrs && sourceStatus.isDirectory());
writeToFileListing(fileListWriter, sourceCopyListingStatus,
sourcePathRoot, options);
if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
fileListWriter = null;
} finally {
IOUtils.cleanup(LOG, fileListWriter);
private Path computeSourceRootPath(FileStatus sourceStatus,
DistCpOptions options) throws IOException {
Path path = sourceStatus.getPath();
int level = 0;
while (level++ < numLevelsToPreserve) {
path = path.getParent();
return path;
* Provide an option to skip copy of a path, Allows for exclusion
* of files such as {@link org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#SUCCEEDED_FILE_NAME}
* @param path - Path being considered for copy while building the file listing
* @param options - Input options passed during DistCp invocation
* @return - True if the path should be considered for copy, false otherwise
protected boolean shouldCopy(Path path, DistCpOptions options) {
return true;
/** {@inheritDoc} */
protected long getBytesToCopy() {
return totalBytesToCopy;
/** {@inheritDoc} */
protected long getNumberOfPaths() {
return totalPaths;
private Path makeQualified(Path path) throws IOException {
final FileSystem fs = path.getFileSystem(getConf());
return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
FileSystem fs = pathToListFile.getFileSystem(getConf());
if (fs.exists(pathToListFile)) {
fs.delete(pathToListFile, false);
return SequenceFile.createWriter(getConf(),
private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
FileStatus fileStatus) throws IOException {
return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
private static FileStatus[] getChildren(FileSystem fileSystem,
FileStatus parent) throws IOException {
return fileSystem.listStatus(parent.getPath());
private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
FileStatus sourceStatus,
Path sourcePathRoot,
DistCpOptions options)
throws IOException {
FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
final boolean preserveRawXattrs = options.shouldPreserveRawXattrs();
Stack<FileStatus> pathStack = new Stack<FileStatus>();
while (!pathStack.isEmpty()) {
for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
if (LOG.isDebugEnabled())
LOG.debug("Recording source-path: "
+ sourceStatus.getPath() + " for copy.");
CopyListingFileStatus childCopyListingStatus =
DistCpUtils.toCopyListingFileStatus(sourceFS, child,
preserveAcls && child.isDirectory(),
preserveXAttrs && child.isDirectory(),
preserveRawXattrs && child.isDirectory());
writeToFileListing(fileListWriter, childCopyListingStatus,
sourcePathRoot, options);
if (isDirectoryAndNotEmpty(sourceFS, child)) {
if (LOG.isDebugEnabled())
LOG.debug("Traversing non-empty source dir: "
+ sourceStatus.getPath());
private void writeToFileListingRoot(SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus, Path sourcePathRoot,
DistCpOptions options) throws IOException {
boolean syncOrOverwrite = options.shouldSyncFolder() ||
if (fileStatus.getPath().equals(sourcePathRoot) &&
fileStatus.isDirectory() && syncOrOverwrite) {
// Skip the root-paths when syncOrOverwrite
if (LOG.isDebugEnabled()) {
LOG.debug("Skip " + fileStatus.getPath());
writeToFileListing(fileListWriter, fileStatus, sourcePathRoot, options);
private void writeToFileListing(SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus,
Path sourcePathRoot,
DistCpOptions options) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
FileStatus status = fileStatus;
if (!shouldCopy(fileStatus.getPath(), options)) {
fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath())), status);
if (!fileStatus.isDirectory()) {
totalBytesToCopy += fileStatus.getLen();
@ -136,9 +136,8 @@ public class TestBackupBase {
// copy out the table and region info files for each table
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
incrementalCopyHFiles(new String[] {getBulkOutputDir().toString()},
// Save list of WAL files copied
Reference in New Issue
Block a user