HBASE-18843 Add DistCp support to incremental backup with bulk loading
This commit is contained in:
parent
845b83b8b6
commit
5df2fe7063
|
@ -18,7 +18,6 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.backup.impl;
|
package org.apache.hadoop.hbase.backup.impl;
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
|
@ -33,17 +32,15 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.backup.BackupCopyJob;
|
import org.apache.hadoop.hbase.backup.BackupCopyJob;
|
||||||
import org.apache.hadoop.hbase.backup.BackupInfo;
|
|
||||||
import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
|
import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
|
||||||
import org.apache.hadoop.hbase.backup.BackupRequest;
|
import org.apache.hadoop.hbase.backup.BackupRequest;
|
||||||
import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
|
import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
|
||||||
import org.apache.hadoop.hbase.backup.BackupType;
|
import org.apache.hadoop.hbase.backup.BackupType;
|
||||||
import org.apache.hadoop.hbase.backup.util.BackupUtils;
|
import org.apache.hadoop.hbase.backup.util.BackupUtils;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.hadoop.hbase.backup.util.FixedRelativePathCopyListing;
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
import org.apache.hadoop.hbase.client.Connection;
|
import org.apache.hadoop.hbase.client.Connection;
|
||||||
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
|
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
|
||||||
|
@ -52,7 +49,9 @@ import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||||
import org.apache.hadoop.hbase.util.Pair;
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
|
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
|
||||||
|
import org.apache.hadoop.tools.DistCpConstants;
|
||||||
import org.apache.hadoop.util.Tool;
|
import org.apache.hadoop.util.Tool;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Incremental backup implementation.
|
* Incremental backup implementation.
|
||||||
|
@ -112,6 +111,8 @@ public class IncrementalTableBackupClient extends TableBackupClient {
|
||||||
*/
|
*/
|
||||||
protected Map<byte[], List<Path>>[] handleBulkLoad(List<TableName> sTableList) throws IOException {
|
protected Map<byte[], List<Path>>[] handleBulkLoad(List<TableName> sTableList) throws IOException {
|
||||||
Map<byte[], List<Path>>[] mapForSrc = new Map[sTableList.size()];
|
Map<byte[], List<Path>>[] mapForSrc = new Map[sTableList.size()];
|
||||||
|
List<String> activeFiles = new ArrayList<String>();
|
||||||
|
List<String> archiveFiles = new ArrayList<String>();
|
||||||
Pair<Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>>, List<byte[]>> pair =
|
Pair<Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>>, List<byte[]>> pair =
|
||||||
backupManager.readBulkloadRows(sTableList);
|
backupManager.readBulkloadRows(sTableList);
|
||||||
Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>> map = pair.getFirst();
|
Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>> map = pair.getFirst();
|
||||||
|
@ -127,6 +128,7 @@ public class IncrementalTableBackupClient extends TableBackupClient {
|
||||||
for (Map.Entry<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>> tblEntry :
|
for (Map.Entry<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>> tblEntry :
|
||||||
map.entrySet()) {
|
map.entrySet()) {
|
||||||
TableName srcTable = tblEntry.getKey();
|
TableName srcTable = tblEntry.getKey();
|
||||||
|
|
||||||
int srcIdx = getIndex(srcTable, sTableList);
|
int srcIdx = getIndex(srcTable, sTableList);
|
||||||
if (srcIdx < 0) {
|
if (srcIdx < 0) {
|
||||||
LOG.warn("Couldn't find " + srcTable + " in source table List");
|
LOG.warn("Couldn't find " + srcTable + " in source table List");
|
||||||
|
@ -162,7 +164,6 @@ public class IncrementalTableBackupClient extends TableBackupClient {
|
||||||
}
|
}
|
||||||
for (Pair<String, Boolean> fileWithState : famEntry.getValue()) {
|
for (Pair<String, Boolean> fileWithState : famEntry.getValue()) {
|
||||||
String file = fileWithState.getFirst();
|
String file = fileWithState.getFirst();
|
||||||
boolean raw = fileWithState.getSecond();
|
|
||||||
int idx = file.lastIndexOf("/");
|
int idx = file.lastIndexOf("/");
|
||||||
String filename = file;
|
String filename = file;
|
||||||
if (idx > 0) {
|
if (idx > 0) {
|
||||||
|
@ -175,37 +176,55 @@ public class IncrementalTableBackupClient extends TableBackupClient {
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.trace("found bulk hfile " + file + " in " + famDir + " for " + tblName);
|
LOG.trace("found bulk hfile " + file + " in " + famDir + " for " + tblName);
|
||||||
}
|
}
|
||||||
try {
|
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.trace("copying " + p + " to " + tgt);
|
LOG.trace("copying " + p + " to " + tgt);
|
||||||
}
|
}
|
||||||
FileUtil.copy(fs, p, tgtFs, tgt, false,conf);
|
activeFiles.add(p.toString());
|
||||||
} catch (FileNotFoundException e) {
|
} else if (fs.exists(archive)){
|
||||||
LOG.debug("copying archive " + archive + " to " + tgt);
|
LOG.debug("copying archive " + archive + " to " + tgt);
|
||||||
try {
|
archiveFiles.add(archive.toString());
|
||||||
FileUtil.copy(fs, archive, tgtFs, tgt, false, conf);
|
|
||||||
} catch (FileNotFoundException fnfe) {
|
|
||||||
if (!raw) throw fnfe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG.debug("copying archive " + archive + " to " + tgt);
|
|
||||||
try {
|
|
||||||
FileUtil.copy(fs, archive, tgtFs, tgt, false, conf);
|
|
||||||
} catch (FileNotFoundException fnfe) {
|
|
||||||
if (!raw) throw fnfe;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
files.add(tgt);
|
files.add(tgt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
copyBulkLoadedFiles(activeFiles, archiveFiles);
|
||||||
|
|
||||||
backupManager.writeBulkLoadedFiles(sTableList, mapForSrc);
|
backupManager.writeBulkLoadedFiles(sTableList, mapForSrc);
|
||||||
backupManager.removeBulkLoadedRows(sTableList, pair.getSecond());
|
backupManager.removeBulkLoadedRows(sTableList, pair.getSecond());
|
||||||
return mapForSrc;
|
return mapForSrc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void copyBulkLoadedFiles(List<String> activeFiles, List<String> archiveFiles)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
|
||||||
|
try {
|
||||||
|
conf.set(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS,
|
||||||
|
FixedRelativePathCopyListing.class.getName());
|
||||||
|
conf.setInt(FixedRelativePathCopyListing.NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 5);
|
||||||
|
|
||||||
|
// Copy active files
|
||||||
|
String tgtDest = backupInfo.getBackupRootDir() + Path.SEPARATOR + backupInfo.getBackupId();
|
||||||
|
if (activeFiles.size() > 0) {
|
||||||
|
String[] toCopy = new String[activeFiles.size()];
|
||||||
|
activeFiles.toArray(toCopy);
|
||||||
|
incrementalCopyHFiles(toCopy, tgtDest);
|
||||||
|
}
|
||||||
|
if (archiveFiles.size() > 0) {
|
||||||
|
String[] toCopy = new String[archiveFiles.size()];
|
||||||
|
archiveFiles.toArray(toCopy);
|
||||||
|
incrementalCopyHFiles(toCopy, tgtDest);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
conf.unset(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS);
|
||||||
|
conf.unset(FixedRelativePathCopyListing.NUMBER_OF_LEVELS_TO_PRESERVE_KEY);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void execute() throws IOException {
|
public void execute() throws IOException {
|
||||||
|
|
||||||
|
@ -229,8 +248,8 @@ public class IncrementalTableBackupClient extends TableBackupClient {
|
||||||
// copy out the table and region info files for each table
|
// copy out the table and region info files for each table
|
||||||
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
|
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
|
||||||
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
|
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
|
||||||
convertWALsToHFiles(backupInfo);
|
convertWALsToHFiles();
|
||||||
incrementalCopyHFiles(backupInfo);
|
incrementalCopyHFiles(new String[] {getBulkOutputDir().toString()}, backupInfo.getBackupRootDir());
|
||||||
// Save list of WAL files copied
|
// Save list of WAL files copied
|
||||||
backupManager.recordWALFiles(backupInfo.getIncrBackupFileList());
|
backupManager.recordWALFiles(backupInfo.getIncrBackupFileList());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -269,27 +288,25 @@ public class IncrementalTableBackupClient extends TableBackupClient {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void incrementalCopyHFiles(BackupInfo backupInfo) throws Exception {
|
protected void incrementalCopyHFiles(String[] files, String backupDest) throws IOException {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
LOG.debug("Incremental copy HFiles is starting.");
|
LOG.debug("Incremental copy HFiles is starting. dest="+backupDest);
|
||||||
// set overall backup phase: incremental_copy
|
// set overall backup phase: incremental_copy
|
||||||
backupInfo.setPhase(BackupPhase.INCREMENTAL_COPY);
|
backupInfo.setPhase(BackupPhase.INCREMENTAL_COPY);
|
||||||
// get incremental backup file list and prepare parms for DistCp
|
// get incremental backup file list and prepare parms for DistCp
|
||||||
List<String> incrBackupFileList = new ArrayList<String>();
|
String[] strArr = new String[files.length + 1];
|
||||||
// Add Bulk output
|
System.arraycopy(files, 0, strArr, 0, files.length);
|
||||||
incrBackupFileList.add(getBulkOutputDir().toString());
|
strArr[strArr.length - 1] = backupDest;
|
||||||
String[] strArr = incrBackupFileList.toArray(new String[incrBackupFileList.size() + 1]);
|
|
||||||
strArr[strArr.length - 1] = backupInfo.getBackupRootDir();
|
|
||||||
BackupCopyJob copyService = BackupRestoreFactory.getBackupCopyJob(conf);
|
BackupCopyJob copyService = BackupRestoreFactory.getBackupCopyJob(conf);
|
||||||
int res = copyService.copy(backupInfo, backupManager, conf, BackupType.INCREMENTAL, strArr);
|
int res = copyService.copy(backupInfo, backupManager, conf, BackupType.INCREMENTAL, strArr);
|
||||||
if (res != 0) {
|
if (res != 0) {
|
||||||
LOG.error("Copy incremental HFile files failed with return code: " + res + ".");
|
LOG.error("Copy incremental HFile files failed with return code: " + res + ".");
|
||||||
throw new IOException("Failed copy from " + StringUtils.join(incrBackupFileList, ',')
|
throw new IOException("Failed copy from " + StringUtils.join(files, ',')
|
||||||
+ " to " + backupInfo.getHLogTargetDir());
|
+ " to " + backupDest);
|
||||||
}
|
}
|
||||||
LOG.debug("Incremental copy HFiles from " + StringUtils.join(incrBackupFileList, ',')
|
LOG.debug("Incremental copy HFiles from " + StringUtils.join(files, ',')
|
||||||
+ " to " + backupInfo.getBackupRootDir() + " finished.");
|
+ " to " + backupDest + " finished.");
|
||||||
} finally {
|
} finally {
|
||||||
deleteBulkLoadDirectory();
|
deleteBulkLoadDirectory();
|
||||||
}
|
}
|
||||||
|
@ -306,7 +323,7 @@ public class IncrementalTableBackupClient extends TableBackupClient {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void convertWALsToHFiles(BackupInfo backupInfo) throws IOException {
|
protected void convertWALsToHFiles() throws IOException {
|
||||||
// get incremental backup file list and prepare parameters for DistCp
|
// get incremental backup file list and prepare parameters for DistCp
|
||||||
List<String> incrBackupFileList = backupInfo.getIncrBackupFileList();
|
List<String> incrBackupFileList = backupInfo.getIncrBackupFileList();
|
||||||
// Get list of tables in incremental backup set
|
// Get list of tables in incremental backup set
|
||||||
|
|
|
@ -142,6 +142,7 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
|
||||||
* Only the argument "src1, [src2, [...]] dst" is supported,
|
* Only the argument "src1, [src2, [...]] dst" is supported,
|
||||||
* no more DistCp options.
|
* no more DistCp options.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class BackupDistCp extends DistCp {
|
class BackupDistCp extends DistCp {
|
||||||
|
|
||||||
private BackupInfo backupInfo;
|
private BackupInfo backupInfo;
|
||||||
|
@ -154,6 +155,7 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
|
||||||
this.backupManager = backupManager;
|
this.backupManager = backupManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Job execute() throws Exception {
|
public Job execute() throws Exception {
|
||||||
|
|
||||||
|
@ -249,7 +251,7 @@ public class MapReduceBackupCopyJob implements BackupCopyJob {
|
||||||
LOG.debug("Backup progress data updated to backup system table: \"Progress: "
|
LOG.debug("Backup progress data updated to backup system table: \"Progress: "
|
||||||
+ newProgressStr + " - " + bytesCopied + " bytes copied.\"");
|
+ newProgressStr + " - " + bytesCopied + " bytes copied.\"");
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
LOG.error("distcp " + job == null ? "" : job.getJobID() + " encountered error", t);
|
LOG.error(t);
|
||||||
throw t;
|
throw t;
|
||||||
} finally {
|
} finally {
|
||||||
if (!fieldSubmitted.getBoolean(this)) {
|
if (!fieldSubmitted.getBoolean(this)) {
|
||||||
|
|
|
@ -0,0 +1,288 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.backup.util;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Stack;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.security.Credentials;
|
||||||
|
import org.apache.hadoop.tools.CopyListingFileStatus;
|
||||||
|
import org.apache.hadoop.tools.DistCpOptions;
|
||||||
|
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
|
||||||
|
import org.apache.hadoop.tools.SimpleCopyListing;
|
||||||
|
import org.apache.hadoop.tools.util.DistCpUtils;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The FixedRelativePathCopyListing is responsible for making the exhaustive list of
|
||||||
|
* all files/directories under its specified list of input-paths.
|
||||||
|
* These are written into the specified copy-listing file.
|
||||||
|
* This CopyListing implementation allows to preserve fixed -level structures
|
||||||
|
* between source and destination paths for every file being copied.
|
||||||
|
* Note: The FixedRelativePathCopyListing doesn't handle wild-cards in the input-paths.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class FixedRelativePathCopyListing extends SimpleCopyListing {
|
||||||
|
public static final String NUMBER_OF_LEVELS_TO_PRESERVE_KEY = "num.levels.preserve";
|
||||||
|
private static final Log LOG = LogFactory.getLog(FixedRelativePathCopyListing.class);
|
||||||
|
|
||||||
|
private long totalPaths = 0;
|
||||||
|
private long totalBytesToCopy = 0;
|
||||||
|
private int numLevelsToPreserve = 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public constructor, to initialize configuration.
|
||||||
|
*
|
||||||
|
* @param configuration The input configuration, with which the source/target FileSystems may be accessed.
|
||||||
|
* @param credentials - Credentials object on which the FS delegation tokens are cached. If null
|
||||||
|
* delegation token caching is skipped
|
||||||
|
*/
|
||||||
|
public FixedRelativePathCopyListing(Configuration configuration, Credentials credentials) {
|
||||||
|
super(configuration, credentials);
|
||||||
|
this.numLevelsToPreserve = configuration.getInt(NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
@Override
|
||||||
|
public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
|
||||||
|
doBuildListing(getWriter(pathToListingFile), options);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Collect the list of
|
||||||
|
* {@literal <sourceRelativePath, sourceFileStatus>}
|
||||||
|
* to be copied and write to the sequence file. In essence, any file or
|
||||||
|
* directory that need to be copied or sync-ed is written as an entry to the
|
||||||
|
* sequence file, with the possible exception of the source root:
|
||||||
|
* when either -update (sync) or -overwrite switch is specified, and if
|
||||||
|
* the the source root is a directory, then the source root entry is not
|
||||||
|
* written to the sequence file, because only the contents of the source
|
||||||
|
* directory need to be copied in this case.
|
||||||
|
* See {@link org.apache.hadoop.tools.util.DistCpUtils#getRelativePath} for
|
||||||
|
* how relative path is computed.
|
||||||
|
* See computeSourceRootPath method for how the root path of the source is
|
||||||
|
* computed.
|
||||||
|
* @param fileListWriter
|
||||||
|
* @param options
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
@VisibleForTesting
|
||||||
|
public void doBuildListing(SequenceFile.Writer fileListWriter,
|
||||||
|
DistCpOptions options) throws IOException {
|
||||||
|
try {
|
||||||
|
for (Path path: options.getSourcePaths()) {
|
||||||
|
FileSystem sourceFS = path.getFileSystem(getConf());
|
||||||
|
final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
|
||||||
|
final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
|
||||||
|
final boolean preserveRawXAttrs = options.shouldPreserveRawXattrs();
|
||||||
|
path = makeQualified(path);
|
||||||
|
|
||||||
|
FileStatus rootStatus = sourceFS.getFileStatus(path);
|
||||||
|
Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
|
||||||
|
|
||||||
|
FileStatus[] sourceFiles = sourceFS.listStatus(path);
|
||||||
|
boolean explore = (sourceFiles != null && sourceFiles.length > 0);
|
||||||
|
if (!explore || rootStatus.isDirectory()) {
|
||||||
|
CopyListingFileStatus rootCopyListingStatus =
|
||||||
|
DistCpUtils.toCopyListingFileStatus(sourceFS, rootStatus,
|
||||||
|
preserveAcls, preserveXAttrs, preserveRawXAttrs);
|
||||||
|
writeToFileListingRoot(fileListWriter, rootCopyListingStatus,
|
||||||
|
sourcePathRoot, options);
|
||||||
|
}
|
||||||
|
if (explore) {
|
||||||
|
for (FileStatus sourceStatus: sourceFiles) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
|
||||||
|
}
|
||||||
|
CopyListingFileStatus sourceCopyListingStatus =
|
||||||
|
DistCpUtils.toCopyListingFileStatus(sourceFS, sourceStatus,
|
||||||
|
preserveAcls && sourceStatus.isDirectory(),
|
||||||
|
preserveXAttrs && sourceStatus.isDirectory(),
|
||||||
|
preserveRawXAttrs && sourceStatus.isDirectory());
|
||||||
|
writeToFileListing(fileListWriter, sourceCopyListingStatus,
|
||||||
|
sourcePathRoot, options);
|
||||||
|
|
||||||
|
if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
|
||||||
|
}
|
||||||
|
traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
|
||||||
|
options);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fileListWriter.close();
|
||||||
|
fileListWriter = null;
|
||||||
|
} finally {
|
||||||
|
IOUtils.cleanup(LOG, fileListWriter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path computeSourceRootPath(FileStatus sourceStatus,
|
||||||
|
DistCpOptions options) throws IOException {
|
||||||
|
|
||||||
|
Path path = sourceStatus.getPath();
|
||||||
|
int level = 0;
|
||||||
|
while (level++ < numLevelsToPreserve) {
|
||||||
|
path = path.getParent();
|
||||||
|
}
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provide an option to skip copy of a path, Allows for exclusion
|
||||||
|
* of files such as {@link org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#SUCCEEDED_FILE_NAME}
|
||||||
|
* @param path - Path being considered for copy while building the file listing
|
||||||
|
* @param options - Input options passed during DistCp invocation
|
||||||
|
* @return - True if the path should be considered for copy, false otherwise
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected boolean shouldCopy(Path path, DistCpOptions options) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
@Override
|
||||||
|
protected long getBytesToCopy() {
|
||||||
|
return totalBytesToCopy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
@Override
|
||||||
|
protected long getNumberOfPaths() {
|
||||||
|
return totalPaths;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path makeQualified(Path path) throws IOException {
|
||||||
|
final FileSystem fs = path.getFileSystem(getConf());
|
||||||
|
return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
|
||||||
|
}
|
||||||
|
|
||||||
|
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
|
||||||
|
FileSystem fs = pathToListFile.getFileSystem(getConf());
|
||||||
|
if (fs.exists(pathToListFile)) {
|
||||||
|
fs.delete(pathToListFile, false);
|
||||||
|
}
|
||||||
|
return SequenceFile.createWriter(getConf(),
|
||||||
|
SequenceFile.Writer.file(pathToListFile),
|
||||||
|
SequenceFile.Writer.keyClass(Text.class),
|
||||||
|
SequenceFile.Writer.valueClass(CopyListingFileStatus.class),
|
||||||
|
SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
|
||||||
|
FileStatus fileStatus) throws IOException {
|
||||||
|
return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static FileStatus[] getChildren(FileSystem fileSystem,
|
||||||
|
FileStatus parent) throws IOException {
|
||||||
|
return fileSystem.listStatus(parent.getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
|
||||||
|
FileStatus sourceStatus,
|
||||||
|
Path sourcePathRoot,
|
||||||
|
DistCpOptions options)
|
||||||
|
throws IOException {
|
||||||
|
FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
|
||||||
|
final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
|
||||||
|
final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
|
||||||
|
final boolean preserveRawXattrs = options.shouldPreserveRawXattrs();
|
||||||
|
Stack<FileStatus> pathStack = new Stack<FileStatus>();
|
||||||
|
pathStack.push(sourceStatus);
|
||||||
|
|
||||||
|
while (!pathStack.isEmpty()) {
|
||||||
|
for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
|
||||||
|
if (LOG.isDebugEnabled())
|
||||||
|
LOG.debug("Recording source-path: "
|
||||||
|
+ sourceStatus.getPath() + " for copy.");
|
||||||
|
CopyListingFileStatus childCopyListingStatus =
|
||||||
|
DistCpUtils.toCopyListingFileStatus(sourceFS, child,
|
||||||
|
preserveAcls && child.isDirectory(),
|
||||||
|
preserveXAttrs && child.isDirectory(),
|
||||||
|
preserveRawXattrs && child.isDirectory());
|
||||||
|
writeToFileListing(fileListWriter, childCopyListingStatus,
|
||||||
|
sourcePathRoot, options);
|
||||||
|
if (isDirectoryAndNotEmpty(sourceFS, child)) {
|
||||||
|
if (LOG.isDebugEnabled())
|
||||||
|
LOG.debug("Traversing non-empty source dir: "
|
||||||
|
+ sourceStatus.getPath());
|
||||||
|
pathStack.push(child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeToFileListingRoot(SequenceFile.Writer fileListWriter,
|
||||||
|
CopyListingFileStatus fileStatus, Path sourcePathRoot,
|
||||||
|
DistCpOptions options) throws IOException {
|
||||||
|
boolean syncOrOverwrite = options.shouldSyncFolder() ||
|
||||||
|
options.shouldOverwrite();
|
||||||
|
if (fileStatus.getPath().equals(sourcePathRoot) &&
|
||||||
|
fileStatus.isDirectory() && syncOrOverwrite) {
|
||||||
|
// Skip the root-paths when syncOrOverwrite
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Skip " + fileStatus.getPath());
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
writeToFileListing(fileListWriter, fileStatus, sourcePathRoot, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeToFileListing(SequenceFile.Writer fileListWriter,
|
||||||
|
CopyListingFileStatus fileStatus,
|
||||||
|
Path sourcePathRoot,
|
||||||
|
DistCpOptions options) throws IOException {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
|
||||||
|
fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
FileStatus status = fileStatus;
|
||||||
|
|
||||||
|
if (!shouldCopy(fileStatus.getPath(), options)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
|
||||||
|
fileStatus.getPath())), status);
|
||||||
|
fileListWriter.sync();
|
||||||
|
|
||||||
|
if (!fileStatus.isDirectory()) {
|
||||||
|
totalBytesToCopy += fileStatus.getLen();
|
||||||
|
}
|
||||||
|
totalPaths++;
|
||||||
|
}
|
||||||
|
}
|
|
@ -136,8 +136,9 @@ public class TestBackupBase {
|
||||||
// copy out the table and region info files for each table
|
// copy out the table and region info files for each table
|
||||||
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
|
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
|
||||||
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
|
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
|
||||||
convertWALsToHFiles(backupInfo);
|
convertWALsToHFiles();
|
||||||
incrementalCopyHFiles(backupInfo);
|
incrementalCopyHFiles(new String[] {getBulkOutputDir().toString()},
|
||||||
|
backupInfo.getBackupRootDir());
|
||||||
failStageIf(Stage.stage_2);
|
failStageIf(Stage.stage_2);
|
||||||
// Save list of WAL files copied
|
// Save list of WAL files copied
|
||||||
backupManager.recordWALFiles(backupInfo.getIncrBackupFileList());
|
backupManager.recordWALFiles(backupInfo.getIncrBackupFileList());
|
||||||
|
|
Loading…
Reference in New Issue