HBASE-18843 Add DistCp support to incremental backup with bulk loading (Vladimir Rodionov)

This commit is contained in:
tedyu 2017-09-26 20:59:12 -07:00
parent 7e9534746c
commit 447b591b08
4 changed files with 346 additions and 38 deletions

View File

@ -18,7 +18,6 @@
package org.apache.hadoop.hbase.backup.impl;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
@ -33,17 +32,15 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupCopyJob;
import org.apache.hadoop.hbase.backup.BackupInfo;
import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
import org.apache.hadoop.hbase.backup.BackupRequest;
import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
import org.apache.hadoop.hbase.backup.BackupType;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.backup.util.FixedRelativePathCopyListing;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
@ -52,7 +49,9 @@ import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.util.Tool;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Incremental backup implementation.
@ -112,6 +111,8 @@ public class IncrementalTableBackupClient extends TableBackupClient {
*/
protected Map<byte[], List<Path>>[] handleBulkLoad(List<TableName> sTableList) throws IOException {
Map<byte[], List<Path>>[] mapForSrc = new Map[sTableList.size()];
List<String> activeFiles = new ArrayList<String>();
List<String> archiveFiles = new ArrayList<String>();
Pair<Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>>, List<byte[]>> pair =
backupManager.readBulkloadRows(sTableList);
Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>> map = pair.getFirst();
@ -127,6 +128,7 @@ public class IncrementalTableBackupClient extends TableBackupClient {
for (Map.Entry<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>> tblEntry :
map.entrySet()) {
TableName srcTable = tblEntry.getKey();
int srcIdx = getIndex(srcTable, sTableList);
if (srcIdx < 0) {
LOG.warn("Couldn't find " + srcTable + " in source table List");
@ -162,7 +164,6 @@ public class IncrementalTableBackupClient extends TableBackupClient {
}
for (Pair<String, Boolean> fileWithState : famEntry.getValue()) {
String file = fileWithState.getFirst();
boolean raw = fileWithState.getSecond();
int idx = file.lastIndexOf("/");
String filename = file;
if (idx > 0) {
@ -175,37 +176,55 @@ public class IncrementalTableBackupClient extends TableBackupClient {
if (LOG.isTraceEnabled()) {
LOG.trace("found bulk hfile " + file + " in " + famDir + " for " + tblName);
}
try {
if (LOG.isTraceEnabled()) {
LOG.trace("copying " + p + " to " + tgt);
}
FileUtil.copy(fs, p, tgtFs, tgt, false,conf);
} catch (FileNotFoundException e) {
activeFiles.add(p.toString());
} else if (fs.exists(archive)){
LOG.debug("copying archive " + archive + " to " + tgt);
try {
FileUtil.copy(fs, archive, tgtFs, tgt, false, conf);
} catch (FileNotFoundException fnfe) {
if (!raw) throw fnfe;
}
}
} else {
LOG.debug("copying archive " + archive + " to " + tgt);
try {
FileUtil.copy(fs, archive, tgtFs, tgt, false, conf);
} catch (FileNotFoundException fnfe) {
if (!raw) throw fnfe;
}
archiveFiles.add(archive.toString());
}
files.add(tgt);
}
}
}
}
copyBulkLoadedFiles(activeFiles, archiveFiles);
backupManager.writeBulkLoadedFiles(sTableList, mapForSrc);
backupManager.removeBulkLoadedRows(sTableList, pair.getSecond());
return mapForSrc;
}
private void copyBulkLoadedFiles(List<String> activeFiles, List<String> archiveFiles)
throws IOException
{
try {
conf.set(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS,
FixedRelativePathCopyListing.class.getName());
conf.setInt(FixedRelativePathCopyListing.NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 5);
// Copy active files
String tgtDest = backupInfo.getBackupRootDir() + Path.SEPARATOR + backupInfo.getBackupId();
if (activeFiles.size() > 0) {
String[] toCopy = new String[activeFiles.size()];
activeFiles.toArray(toCopy);
incrementalCopyHFiles(toCopy, tgtDest);
}
if (archiveFiles.size() > 0) {
String[] toCopy = new String[archiveFiles.size()];
archiveFiles.toArray(toCopy);
incrementalCopyHFiles(toCopy, tgtDest);
}
} finally {
conf.unset(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS);
conf.unset(FixedRelativePathCopyListing.NUMBER_OF_LEVELS_TO_PRESERVE_KEY);
}
}
@Override
public void execute() throws IOException {
@ -229,8 +248,8 @@ public class IncrementalTableBackupClient extends TableBackupClient {
// copy out the table and region info files for each table
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
convertWALsToHFiles(backupInfo);
incrementalCopyHFiles(backupInfo);
convertWALsToHFiles();
incrementalCopyHFiles(new String[] {getBulkOutputDir().toString()}, backupInfo.getBackupRootDir());
// Save list of WAL files copied
backupManager.recordWALFiles(backupInfo.getIncrBackupFileList());
} catch (Exception e) {
@ -269,27 +288,25 @@ public class IncrementalTableBackupClient extends TableBackupClient {
}
}
protected void incrementalCopyHFiles(BackupInfo backupInfo) throws Exception {
protected void incrementalCopyHFiles(String[] files, String backupDest) throws IOException {
try {
LOG.debug("Incremental copy HFiles is starting.");
LOG.debug("Incremental copy HFiles is starting. dest="+backupDest);
// set overall backup phase: incremental_copy
backupInfo.setPhase(BackupPhase.INCREMENTAL_COPY);
// get incremental backup file list and prepare parms for DistCp
List<String> incrBackupFileList = new ArrayList<String>();
// Add Bulk output
incrBackupFileList.add(getBulkOutputDir().toString());
String[] strArr = incrBackupFileList.toArray(new String[incrBackupFileList.size() + 1]);
strArr[strArr.length - 1] = backupInfo.getBackupRootDir();
String[] strArr = new String[files.length + 1];
System.arraycopy(files, 0, strArr, 0, files.length);
strArr[strArr.length - 1] = backupDest;
BackupCopyJob copyService = BackupRestoreFactory.getBackupCopyJob(conf);
int res = copyService.copy(backupInfo, backupManager, conf, BackupType.INCREMENTAL, strArr);
if (res != 0) {
LOG.error("Copy incremental HFile files failed with return code: " + res + ".");
throw new IOException("Failed copy from " + StringUtils.join(incrBackupFileList, ',')
+ " to " + backupInfo.getHLogTargetDir());
throw new IOException("Failed copy from " + StringUtils.join(files, ',')
+ " to " + backupDest);
}
LOG.debug("Incremental copy HFiles from " + StringUtils.join(incrBackupFileList, ',')
+ " to " + backupInfo.getBackupRootDir() + " finished.");
LOG.debug("Incremental copy HFiles from " + StringUtils.join(files, ',')
+ " to " + backupDest + " finished.");
} finally {
deleteBulkLoadDirectory();
}
@ -306,7 +323,7 @@ public class IncrementalTableBackupClient extends TableBackupClient {
}
protected void convertWALsToHFiles(BackupInfo backupInfo) throws IOException {
protected void convertWALsToHFiles() throws IOException {
// get incremental backup file list and prepare parameters for DistCp
List<String> incrBackupFileList = backupInfo.getIncrBackupFileList();
// Get list of tables in incremental backup set

View File

@ -142,6 +142,7 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
* Only the argument "src1, [src2, [...]] dst" is supported,
* no more DistCp options.
*/
class BackupDistCp extends DistCp {
private BackupInfo backupInfo;
@ -154,6 +155,7 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
this.backupManager = backupManager;
}
@Override
public Job execute() throws Exception {
@ -249,7 +251,7 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
LOG.debug("Backup progress data updated to backup system table: \"Progress: "
+ newProgressStr + " - " + bytesCopied + " bytes copied.\"");
} catch (Throwable t) {
LOG.error("distcp " + job == null ? "" : job.getJobID() + " encountered error", t);
LOG.error(t);
throw t;
} finally {
if (!fieldSubmitted.getBoolean(this)) {

View File

@ -0,0 +1,288 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.backup.util;
import java.io.IOException;
import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.tools.SimpleCopyListing;
import org.apache.hadoop.tools.util.DistCpUtils;
import org.apache.yetus.audience.InterfaceAudience;
import com.google.common.annotations.VisibleForTesting;
/**
* The FixedRelativePathCopyListing is responsible for making the exhaustive list of
* all files/directories under its specified list of input-paths.
* These are written into the specified copy-listing file.
* This CopyListing implementation allows to preserve fixed -level structures
* between source and destination paths for every file being copied.
* Note: The FixedRelativePathCopyListing doesn't handle wild-cards in the input-paths.
*/
@InterfaceAudience.Private
public class FixedRelativePathCopyListing extends SimpleCopyListing {
public static final String NUMBER_OF_LEVELS_TO_PRESERVE_KEY = "num.levels.preserve";
private static final Log LOG = LogFactory.getLog(FixedRelativePathCopyListing.class);
private long totalPaths = 0;
private long totalBytesToCopy = 0;
private int numLevelsToPreserve = 1;
/**
* Public constructor, to initialize configuration.
*
* @param configuration The input configuration, with which the source/target FileSystems may be accessed.
* @param credentials - Credentials object on which the FS delegation tokens are cached. If null
* delegation token caching is skipped
*/
public FixedRelativePathCopyListing(Configuration configuration, Credentials credentials) {
super(configuration, credentials);
this.numLevelsToPreserve = configuration.getInt(NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 1);
}
/** {@inheritDoc} */
@Override
public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
doBuildListing(getWriter(pathToListingFile), options);
}
/**
* Collect the list of
* {@literal <sourceRelativePath, sourceFileStatus>}
* to be copied and write to the sequence file. In essence, any file or
* directory that need to be copied or sync-ed is written as an entry to the
* sequence file, with the possible exception of the source root:
* when either -update (sync) or -overwrite switch is specified, and if
* the the source root is a directory, then the source root entry is not
* written to the sequence file, because only the contents of the source
* directory need to be copied in this case.
* See {@link org.apache.hadoop.tools.util.DistCpUtils#getRelativePath} for
* how relative path is computed.
* See computeSourceRootPath method for how the root path of the source is
* computed.
* @param fileListWriter
* @param options
* @throws IOException
*/
@Override
@VisibleForTesting
public void doBuildListing(SequenceFile.Writer fileListWriter,
DistCpOptions options) throws IOException {
try {
for (Path path: options.getSourcePaths()) {
FileSystem sourceFS = path.getFileSystem(getConf());
final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
final boolean preserveRawXAttrs = options.shouldPreserveRawXattrs();
path = makeQualified(path);
FileStatus rootStatus = sourceFS.getFileStatus(path);
Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
FileStatus[] sourceFiles = sourceFS.listStatus(path);
boolean explore = (sourceFiles != null && sourceFiles.length > 0);
if (!explore || rootStatus.isDirectory()) {
CopyListingFileStatus rootCopyListingStatus =
DistCpUtils.toCopyListingFileStatus(sourceFS, rootStatus,
preserveAcls, preserveXAttrs, preserveRawXAttrs);
writeToFileListingRoot(fileListWriter, rootCopyListingStatus,
sourcePathRoot, options);
}
if (explore) {
for (FileStatus sourceStatus: sourceFiles) {
if (LOG.isDebugEnabled()) {
LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
}
CopyListingFileStatus sourceCopyListingStatus =
DistCpUtils.toCopyListingFileStatus(sourceFS, sourceStatus,
preserveAcls && sourceStatus.isDirectory(),
preserveXAttrs && sourceStatus.isDirectory(),
preserveRawXAttrs && sourceStatus.isDirectory());
writeToFileListing(fileListWriter, sourceCopyListingStatus,
sourcePathRoot, options);
if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
}
traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
options);
}
}
}
}
fileListWriter.close();
fileListWriter = null;
} finally {
IOUtils.cleanup(LOG, fileListWriter);
}
}
private Path computeSourceRootPath(FileStatus sourceStatus,
DistCpOptions options) throws IOException {
Path path = sourceStatus.getPath();
int level = 0;
while (level++ < numLevelsToPreserve) {
path = path.getParent();
}
return path;
}
/**
* Provide an option to skip copy of a path, Allows for exclusion
* of files such as {@link org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#SUCCEEDED_FILE_NAME}
* @param path - Path being considered for copy while building the file listing
* @param options - Input options passed during DistCp invocation
* @return - True if the path should be considered for copy, false otherwise
*/
@Override
protected boolean shouldCopy(Path path, DistCpOptions options) {
return true;
}
/** {@inheritDoc} */
@Override
protected long getBytesToCopy() {
return totalBytesToCopy;
}
/** {@inheritDoc} */
@Override
protected long getNumberOfPaths() {
return totalPaths;
}
private Path makeQualified(Path path) throws IOException {
final FileSystem fs = path.getFileSystem(getConf());
return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
}
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
FileSystem fs = pathToListFile.getFileSystem(getConf());
if (fs.exists(pathToListFile)) {
fs.delete(pathToListFile, false);
}
return SequenceFile.createWriter(getConf(),
SequenceFile.Writer.file(pathToListFile),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(CopyListingFileStatus.class),
SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
}
private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
FileStatus fileStatus) throws IOException {
return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
}
private static FileStatus[] getChildren(FileSystem fileSystem,
FileStatus parent) throws IOException {
return fileSystem.listStatus(parent.getPath());
}
private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
FileStatus sourceStatus,
Path sourcePathRoot,
DistCpOptions options)
throws IOException {
FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
final boolean preserveRawXattrs = options.shouldPreserveRawXattrs();
Stack<FileStatus> pathStack = new Stack<FileStatus>();
pathStack.push(sourceStatus);
while (!pathStack.isEmpty()) {
for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
if (LOG.isDebugEnabled())
LOG.debug("Recording source-path: "
+ sourceStatus.getPath() + " for copy.");
CopyListingFileStatus childCopyListingStatus =
DistCpUtils.toCopyListingFileStatus(sourceFS, child,
preserveAcls && child.isDirectory(),
preserveXAttrs && child.isDirectory(),
preserveRawXattrs && child.isDirectory());
writeToFileListing(fileListWriter, childCopyListingStatus,
sourcePathRoot, options);
if (isDirectoryAndNotEmpty(sourceFS, child)) {
if (LOG.isDebugEnabled())
LOG.debug("Traversing non-empty source dir: "
+ sourceStatus.getPath());
pathStack.push(child);
}
}
}
}
private void writeToFileListingRoot(SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus, Path sourcePathRoot,
DistCpOptions options) throws IOException {
boolean syncOrOverwrite = options.shouldSyncFolder() ||
options.shouldOverwrite();
if (fileStatus.getPath().equals(sourcePathRoot) &&
fileStatus.isDirectory() && syncOrOverwrite) {
// Skip the root-paths when syncOrOverwrite
if (LOG.isDebugEnabled()) {
LOG.debug("Skip " + fileStatus.getPath());
}
return;
}
writeToFileListing(fileListWriter, fileStatus, sourcePathRoot, options);
}
private void writeToFileListing(SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus,
Path sourcePathRoot,
DistCpOptions options) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
}
FileStatus status = fileStatus;
if (!shouldCopy(fileStatus.getPath(), options)) {
return;
}
fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath())), status);
fileListWriter.sync();
if (!fileStatus.isDirectory()) {
totalBytesToCopy += fileStatus.getLen();
}
totalPaths++;
}
}

View File

@ -136,8 +136,9 @@ public class TestBackupBase {
// copy out the table and region info files for each table
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
convertWALsToHFiles(backupInfo);
incrementalCopyHFiles(backupInfo);
convertWALsToHFiles();
incrementalCopyHFiles(new String[] {getBulkOutputDir().toString()},
backupInfo.getBackupRootDir());
failStageIf(Stage.stage_2);
// Save list of WAL files copied
backupManager.recordWALFiles(backupInfo.getIncrBackupFileList());