HBASE-27541 Backups should be able to be restored to a separate filesystem (#4933)

Signed-off-by: Bryan Beaudreault <bbeaudreault@apache.org>
This commit is contained in:
Jarryd Lee 2023-01-17 17:09:01 -08:00 committed by Bryan Beaudreault
parent 140e0de748
commit cfab38a510
7 changed files with 123 additions and 26 deletions

View File

@ -34,10 +34,11 @@ public interface RestoreJob extends Configurable {
* Run restore operation
* @param dirPaths path array of WAL log directories
* @param fromTables from tables
* @param restoreRootDir output file system
* @param toTables to tables
* @param fullBackupRestore full backup restore
* @throws IOException if running the job fails
*/
void run(Path[] dirPaths, TableName[] fromTables, TableName[] toTables, boolean fullBackupRestore)
throws IOException;
void run(Path[] dirPaths, TableName[] fromTables, Path restoreRootDir, TableName[] toTables,
boolean fullBackupRestore) throws IOException;
}

View File

@ -37,6 +37,11 @@ public final class RestoreRequest {
return this;
}
public Builder withRestoreRootDir(String restoreRootDir) {
request.setRestoreRootDir(restoreRootDir);
return this;
}
public Builder withBackupId(String backupId) {
request.setBackupId(backupId);
return this;
@ -68,6 +73,7 @@ public final class RestoreRequest {
}
private String backupRootDir;
private String restoreRootDir;
private String backupId;
private boolean check = false;
private TableName[] fromTables;
@ -86,6 +92,15 @@ public final class RestoreRequest {
return this;
}
public String getRestoreRootDir() {
return restoreRootDir;
}
private RestoreRequest setRestoreRootDir(String restoreRootDir) {
this.restoreRootDir = restoreRootDir;
return this;
}
public String getBackupId() {
return backupId;
}

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.backup.BackupType;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
import org.apache.hadoop.hbase.backup.RestoreRequest;
import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.backup.util.RestoreTool;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
@ -55,11 +56,12 @@ public class RestoreTablesClient {
private String backupId;
private TableName[] sTableArray;
private TableName[] tTableArray;
private String targetRootDir;
private String backupRootDir;
private Path restoreRootDir;
private boolean isOverwrite;
public RestoreTablesClient(Connection conn, RestoreRequest request) {
this.targetRootDir = request.getBackupRootDir();
public RestoreTablesClient(Connection conn, RestoreRequest request) throws IOException {
this.backupRootDir = request.getBackupRootDir();
this.backupId = request.getBackupId();
this.sTableArray = request.getFromTables();
this.tTableArray = request.getToTables();
@ -69,6 +71,12 @@ public class RestoreTablesClient {
this.isOverwrite = request.isOverwrite();
this.conn = conn;
this.conf = conn.getConfiguration();
if (request.getRestoreRootDir() != null) {
restoreRootDir = new Path(request.getRestoreRootDir());
} else {
FileSystem fs = FileSystem.get(conf);
this.restoreRootDir = BackupUtils.getTmpRestoreOutputDir(fs, conf);
}
}
/**
@ -131,7 +139,7 @@ public class RestoreTablesClient {
String rootDir = image.getRootDir();
String backupId = image.getBackupId();
Path backupRoot = new Path(rootDir);
RestoreTool restoreTool = new RestoreTool(conf, backupRoot, backupId);
RestoreTool restoreTool = new RestoreTool(conf, backupRoot, restoreRootDir, backupId);
Path tableBackupPath = HBackupFileSystem.getTableBackupPath(sTable, backupRoot, backupId);
String lastIncrBackupId = images.length == 1 ? null : images[images.length - 1].getBackupId();
// We need hFS only for full restore (see the code)
@ -249,7 +257,7 @@ public class RestoreTablesClient {
// case RESTORE_IMAGES:
HashMap<TableName, BackupManifest> backupManifestMap = new HashMap<>();
// check and load backup image manifest for the tables
Path rootPath = new Path(targetRootDir);
Path rootPath = new Path(backupRootDir);
HBackupFileSystem.checkImageManifestExist(backupManifestMap, sTableArray, conf, rootPath,
backupId);

View File

@ -50,8 +50,8 @@ public class MapReduceRestoreJob implements RestoreJob {
}
@Override
public void run(Path[] dirPaths, TableName[] tableNames, TableName[] newTableNames,
boolean fullBackupRestore) throws IOException {
public void run(Path[] dirPaths, TableName[] tableNames, Path restoreRootDir,
TableName[] newTableNames, boolean fullBackupRestore) throws IOException {
String bulkOutputConfKey;
player = new MapReduceHFileSplitterJob();
@ -70,9 +70,8 @@ public class MapReduceRestoreJob implements RestoreJob {
for (int i = 0; i < tableNames.length; i++) {
LOG.info("Restore " + tableNames[i] + " into " + newTableNames[i]);
Path bulkOutputPath = BackupUtils
.getBulkOutputDir(BackupUtils.getFileNameCompatibleString(newTableNames[i]), getConf());
Path bulkOutputPath = BackupUtils.getBulkOutputDir(restoreRootDir,
BackupUtils.getFileNameCompatibleString(newTableNames[i]), getConf());
Configuration conf = getConf();
conf.set(bulkOutputConfKey, bulkOutputPath.toString());
String[] playerArgs = { dirs,

View File

@ -689,21 +689,38 @@ public final class BackupUtils {
return isValid;
}
public static Path getBulkOutputDir(String tableName, Configuration conf, boolean deleteOnExit)
throws IOException {
FileSystem fs = FileSystem.get(conf);
String tmp =
conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, fs.getHomeDirectory() + "/hbase-staging");
Path path = new Path(tmp + Path.SEPARATOR + "bulk_output-" + tableName + "-"
+ EnvironmentEdgeManager.currentTime());
public static Path getBulkOutputDir(Path restoreRootDir, String tableName, Configuration conf,
boolean deleteOnExit) throws IOException {
FileSystem fs = restoreRootDir.getFileSystem(conf);
Path path = new Path(restoreRootDir,
"bulk_output-" + tableName + "-" + EnvironmentEdgeManager.currentTime());
if (deleteOnExit) {
fs.deleteOnExit(path);
}
return path;
}
public static Path getBulkOutputDir(String tableName, Configuration conf) throws IOException {
return getBulkOutputDir(tableName, conf, true);
public static Path getBulkOutputDir(Path restoreRootDir, String tableName, Configuration conf)
throws IOException {
return getBulkOutputDir(restoreRootDir, tableName, conf, true);
}
public static Path getBulkOutputDir(String tableName, Configuration conf, boolean deleteOnExit)
throws IOException {
FileSystem fs = FileSystem.get(conf);
return getBulkOutputDir(getTmpRestoreOutputDir(fs, conf), tableName, conf, deleteOnExit);
}
/**
* Build temporary output path
* @param fs filesystem for default output dir
* @param conf configuration
* @return output path
*/
public static Path getTmpRestoreOutputDir(FileSystem fs, Configuration conf) {
String tmp =
conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, fs.getHomeDirectory() + "/hbase-staging");
return new Path(tmp);
}
public static String getFileNameCompatibleString(TableName table) {

View File

@ -67,18 +67,20 @@ public class RestoreTool {
private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR };
protected Configuration conf;
protected Path backupRootPath;
protected Path restoreRootDir;
protected String backupId;
protected FileSystem fs;
// store table name and snapshot dir mapping
private final HashMap<TableName, Path> snapshotMap = new HashMap<>();
public RestoreTool(Configuration conf, final Path backupRootPath, final String backupId)
throws IOException {
public RestoreTool(Configuration conf, final Path backupRootPath, final Path restoreRootDir,
final String backupId) throws IOException {
this.conf = conf;
this.backupRootPath = backupRootPath;
this.backupId = backupId;
this.fs = backupRootPath.getFileSystem(conf);
this.restoreRootDir = restoreRootDir;
}
/**
@ -200,7 +202,7 @@ public class RestoreTool {
}
RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
restoreService.run(logDirs, tableNames, newTableNames, false);
restoreService.run(logDirs, tableNames, restoreRootDir, newTableNames, false);
}
}
@ -350,8 +352,8 @@ public class RestoreTool {
RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
Path[] paths = new Path[regionPathList.size()];
regionPathList.toArray(paths);
restoreService.run(paths, new TableName[] { tableName }, new TableName[] { newTableName },
true);
restoreService.run(paths, new TableName[] { tableName }, restoreRootDir,
new TableName[] { newTableName }, true);
} catch (Exception e) {
LOG.error(e.toString(), e);

View File

@ -17,13 +17,21 @@
*/
package org.apache.hadoop.hbase.backup;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl;
import org.apache.hadoop.hbase.backup.mapreduce.MapReduceHFileSplitterJob;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.junit.BeforeClass;
import org.junit.ClassRule;
@ -72,4 +80,51 @@ public class TestRemoteRestore extends TestBackupBase {
TEST_UTIL.deleteTable(table1_restore);
hba.close();
}
/**
* Verify that restore jobs can be run on a standalone mapreduce cluster. Ensures hfiles output
* via {@link MapReduceHFileSplitterJob} exist on correct filesystem.
* @throws Exception if doing the backup or an operation on the tables fails
*/
@Test
public void testFullRestoreRemoteWithAlternateRestoreOutputDir() throws Exception {
LOG.info("test remote full backup on a single table with alternate restore output dir");
String backupId =
backupTables(BackupType.FULL, toList(table1.getNameAsString()), BACKUP_REMOTE_ROOT_DIR);
LOG.info("backup complete");
TableName[] tableset = new TableName[] { table1 };
TableName[] tablemap = new TableName[] { table1_restore };
HBaseTestingUtility mrTestUtil = new HBaseTestingUtility();
mrTestUtil.setZkCluster(TEST_UTIL.getZkCluster());
mrTestUtil.startMiniDFSCluster(3);
mrTestUtil.startMiniMapReduceCluster();
Configuration testUtilConf = TEST_UTIL.getConnection().getConfiguration();
Configuration conf = new Configuration(mrTestUtil.getConfiguration());
conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT,
testUtilConf.get(HConstants.ZOOKEEPER_ZNODE_PARENT));
conf.set(HConstants.MASTER_ADDRS_KEY, testUtilConf.get(HConstants.MASTER_ADDRS_KEY));
new BackupAdminImpl(ConnectionFactory.createConnection(conf))
.restore(new RestoreRequest.Builder().withBackupRootDir(BACKUP_REMOTE_ROOT_DIR)
.withRestoreRootDir(BACKUP_ROOT_DIR).withBackupId(backupId).withCheck(false)
.withFromTables(tableset).withToTables(tablemap).withOvewrite(false).build());
Path hfileOutputPath = new Path(
new Path(conf.get(MapReduceHFileSplitterJob.BULK_OUTPUT_CONF_KEY)).toUri().getPath());
// files exist on hbase cluster
FileSystem fileSystem = FileSystem.get(TEST_UTIL.getConfiguration());
assertTrue(fileSystem.exists(hfileOutputPath));
// files don't exist on MR cluster
fileSystem = FileSystem.get(conf);
assertFalse(fileSystem.exists(hfileOutputPath));
Admin hba = TEST_UTIL.getAdmin();
assertTrue(hba.tableExists(table1_restore));
TEST_UTIL.deleteTable(table1_restore);
hba.close();
}
}