From d799c3c3c3d53e5c53fa1120f13aac77d868725b Mon Sep 17 00:00:00 2001 From: Jarryd Lee Date: Tue, 17 Jan 2023 17:09:01 -0800 Subject: [PATCH] HBASE-27541 Backups should be able to be restored to a separate filesystem (#4933) Signed-off-by: Bryan Beaudreault --- .../hadoop/hbase/backup/RestoreJob.java | 5 +- .../hadoop/hbase/backup/RestoreRequest.java | 15 +++++ .../backup/impl/RestoreTablesClient.java | 18 ++++-- .../backup/mapreduce/MapReduceRestoreJob.java | 9 ++- .../hadoop/hbase/backup/util/BackupUtils.java | 35 +++++++++--- .../hadoop/hbase/backup/util/RestoreTool.java | 12 ++-- .../hbase/backup/TestRemoteRestore.java | 55 +++++++++++++++++++ 7 files changed, 123 insertions(+), 26 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreJob.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreJob.java index b014e6693bb..831e097cb92 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreJob.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreJob.java @@ -34,10 +34,11 @@ public interface RestoreJob extends Configurable { * Run restore operation * @param dirPaths path array of WAL log directories * @param fromTables from tables + * @param restoreRootDir output file system * @param toTables to tables * @param fullBackupRestore full backup restore * @throws IOException if running the job fails */ - void run(Path[] dirPaths, TableName[] fromTables, TableName[] toTables, boolean fullBackupRestore) - throws IOException; + void run(Path[] dirPaths, TableName[] fromTables, Path restoreRootDir, TableName[] toTables, + boolean fullBackupRestore) throws IOException; } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreRequest.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreRequest.java index eb4786f5786..f7f1d848d95 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreRequest.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreRequest.java @@ -37,6 +37,11 @@ public final class RestoreRequest { return this; } + public Builder withRestoreRootDir(String restoreRootDir) { + request.setRestoreRootDir(restoreRootDir); + return this; + } + public Builder withBackupId(String backupId) { request.setBackupId(backupId); return this; @@ -68,6 +73,7 @@ public final class RestoreRequest { } private String backupRootDir; + private String restoreRootDir; private String backupId; private boolean check = false; private TableName[] fromTables; @@ -86,6 +92,15 @@ public final class RestoreRequest { return this; } + public String getRestoreRootDir() { + return restoreRootDir; + } + + private RestoreRequest setRestoreRootDir(String restoreRootDir) { + this.restoreRootDir = restoreRootDir; + return this; + } + public String getBackupId() { return backupId; } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java index 9ec2442a3d9..05685c8e091 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.backup.BackupType; import org.apache.hadoop.hbase.backup.HBackupFileSystem; import org.apache.hadoop.hbase.backup.RestoreRequest; import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage; +import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.backup.util.RestoreTool; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; @@ -55,11 +56,12 @@ public class RestoreTablesClient { private String backupId; private TableName[] sTableArray; private TableName[] tTableArray; - private String targetRootDir; + private String backupRootDir; + private Path restoreRootDir; private boolean isOverwrite; - public RestoreTablesClient(Connection conn, RestoreRequest request) { - this.targetRootDir = request.getBackupRootDir(); + public RestoreTablesClient(Connection conn, RestoreRequest request) throws IOException { + this.backupRootDir = request.getBackupRootDir(); this.backupId = request.getBackupId(); this.sTableArray = request.getFromTables(); this.tTableArray = request.getToTables(); @@ -69,6 +71,12 @@ public class RestoreTablesClient { this.isOverwrite = request.isOverwrite(); this.conn = conn; this.conf = conn.getConfiguration(); + if (request.getRestoreRootDir() != null) { + restoreRootDir = new Path(request.getRestoreRootDir()); + } else { + FileSystem fs = FileSystem.get(conf); + this.restoreRootDir = BackupUtils.getTmpRestoreOutputDir(fs, conf); + } } /** @@ -131,7 +139,7 @@ public class RestoreTablesClient { String rootDir = image.getRootDir(); String backupId = image.getBackupId(); Path backupRoot = new Path(rootDir); - RestoreTool restoreTool = new RestoreTool(conf, backupRoot, backupId); + RestoreTool restoreTool = new RestoreTool(conf, backupRoot, restoreRootDir, backupId); Path tableBackupPath = HBackupFileSystem.getTableBackupPath(sTable, backupRoot, backupId); String lastIncrBackupId = images.length == 1 ? null : images[images.length - 1].getBackupId(); // We need hFS only for full restore (see the code) @@ -249,7 +257,7 @@ public class RestoreTablesClient { // case RESTORE_IMAGES: HashMap backupManifestMap = new HashMap<>(); // check and load backup image manifest for the tables - Path rootPath = new Path(targetRootDir); + Path rootPath = new Path(backupRootDir); HBackupFileSystem.checkImageManifestExist(backupManifestMap, sTableArray, conf, rootPath, backupId); diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreJob.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreJob.java index e6046bf5fb9..55f6bff04cb 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreJob.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceRestoreJob.java @@ -50,8 +50,8 @@ public class MapReduceRestoreJob implements RestoreJob { } @Override - public void run(Path[] dirPaths, TableName[] tableNames, TableName[] newTableNames, - boolean fullBackupRestore) throws IOException { + public void run(Path[] dirPaths, TableName[] tableNames, Path restoreRootDir, + TableName[] newTableNames, boolean fullBackupRestore) throws IOException { String bulkOutputConfKey; player = new MapReduceHFileSplitterJob(); @@ -70,9 +70,8 @@ public class MapReduceRestoreJob implements RestoreJob { for (int i = 0; i < tableNames.length; i++) { LOG.info("Restore " + tableNames[i] + " into " + newTableNames[i]); - - Path bulkOutputPath = BackupUtils - .getBulkOutputDir(BackupUtils.getFileNameCompatibleString(newTableNames[i]), getConf()); + Path bulkOutputPath = BackupUtils.getBulkOutputDir(restoreRootDir, + BackupUtils.getFileNameCompatibleString(newTableNames[i]), getConf()); Configuration conf = getConf(); conf.set(bulkOutputConfKey, bulkOutputPath.toString()); String[] playerArgs = { dirs, diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java index 7afc7a840f7..d4e849f610a 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java @@ -690,21 +690,38 @@ public final class BackupUtils { return isValid; } - public static Path getBulkOutputDir(String tableName, Configuration conf, boolean deleteOnExit) - throws IOException { - FileSystem fs = FileSystem.get(conf); - String tmp = - conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, fs.getHomeDirectory() + "/hbase-staging"); - Path path = new Path(tmp + Path.SEPARATOR + "bulk_output-" + tableName + "-" - + EnvironmentEdgeManager.currentTime()); + public static Path getBulkOutputDir(Path restoreRootDir, String tableName, Configuration conf, + boolean deleteOnExit) throws IOException { + FileSystem fs = restoreRootDir.getFileSystem(conf); + Path path = new Path(restoreRootDir, + "bulk_output-" + tableName + "-" + EnvironmentEdgeManager.currentTime()); if (deleteOnExit) { fs.deleteOnExit(path); } return path; } - public static Path getBulkOutputDir(String tableName, Configuration conf) throws IOException { - return getBulkOutputDir(tableName, conf, true); + public static Path getBulkOutputDir(Path restoreRootDir, String tableName, Configuration conf) + throws IOException { + return getBulkOutputDir(restoreRootDir, tableName, conf, true); + } + + public static Path getBulkOutputDir(String tableName, Configuration conf, boolean deleteOnExit) + throws IOException { + FileSystem fs = FileSystem.get(conf); + return getBulkOutputDir(getTmpRestoreOutputDir(fs, conf), tableName, conf, deleteOnExit); + } + + /** + * Build temporary output path + * @param fs filesystem for default output dir + * @param conf configuration + * @return output path + */ + public static Path getTmpRestoreOutputDir(FileSystem fs, Configuration conf) { + String tmp = + conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, fs.getHomeDirectory() + "/hbase-staging"); + return new Path(tmp); } public static String getFileNameCompatibleString(TableName table) { diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/RestoreTool.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/RestoreTool.java index bf2aa14046d..8ca80d1301f 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/RestoreTool.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/RestoreTool.java @@ -67,18 +67,20 @@ public class RestoreTool { private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR }; protected Configuration conf; protected Path backupRootPath; + protected Path restoreRootDir; protected String backupId; protected FileSystem fs; // store table name and snapshot dir mapping private final HashMap snapshotMap = new HashMap<>(); - public RestoreTool(Configuration conf, final Path backupRootPath, final String backupId) - throws IOException { + public RestoreTool(Configuration conf, final Path backupRootPath, final Path restoreRootDir, + final String backupId) throws IOException { this.conf = conf; this.backupRootPath = backupRootPath; this.backupId = backupId; this.fs = backupRootPath.getFileSystem(conf); + this.restoreRootDir = restoreRootDir; } /** @@ -200,7 +202,7 @@ public class RestoreTool { } RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); - restoreService.run(logDirs, tableNames, newTableNames, false); + restoreService.run(logDirs, tableNames, restoreRootDir, newTableNames, false); } } @@ -350,8 +352,8 @@ public class RestoreTool { RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); Path[] paths = new Path[regionPathList.size()]; regionPathList.toArray(paths); - restoreService.run(paths, new TableName[] { tableName }, new TableName[] { newTableName }, - true); + restoreService.run(paths, new TableName[] { tableName }, restoreRootDir, + new TableName[] { newTableName }, true); } catch (Exception e) { LOG.error(e.toString(), e); diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestRemoteRestore.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestRemoteRestore.java index ce8c6497c9e..b3a2872c709 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestRemoteRestore.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestRemoteRestore.java @@ -17,13 +17,21 @@ */ package org.apache.hadoop.hbase.backup; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl; +import org.apache.hadoop.hbase.backup.mapreduce.MapReduceHFileSplitterJob; import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -72,4 +80,51 @@ public class TestRemoteRestore extends TestBackupBase { TEST_UTIL.deleteTable(table1_restore); hba.close(); } + + /** + * Verify that restore jobs can be run on a standalone mapreduce cluster. Ensures hfiles output + * via {@link MapReduceHFileSplitterJob} exist on correct filesystem. + * @throws Exception if doing the backup or an operation on the tables fails + */ + @Test + public void testFullRestoreRemoteWithAlternateRestoreOutputDir() throws Exception { + LOG.info("test remote full backup on a single table with alternate restore output dir"); + String backupId = + backupTables(BackupType.FULL, toList(table1.getNameAsString()), BACKUP_REMOTE_ROOT_DIR); + LOG.info("backup complete"); + TableName[] tableset = new TableName[] { table1 }; + TableName[] tablemap = new TableName[] { table1_restore }; + + HBaseTestingUtil mrTestUtil = new HBaseTestingUtil(); + mrTestUtil.setZkCluster(TEST_UTIL.getZkCluster()); + mrTestUtil.startMiniDFSCluster(3); + mrTestUtil.startMiniMapReduceCluster(); + + Configuration testUtilConf = TEST_UTIL.getConnection().getConfiguration(); + Configuration conf = new Configuration(mrTestUtil.getConfiguration()); + conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, + testUtilConf.get(HConstants.ZOOKEEPER_ZNODE_PARENT)); + conf.set(HConstants.MASTER_ADDRS_KEY, testUtilConf.get(HConstants.MASTER_ADDRS_KEY)); + + new BackupAdminImpl(ConnectionFactory.createConnection(conf)) + .restore(new RestoreRequest.Builder().withBackupRootDir(BACKUP_REMOTE_ROOT_DIR) + .withRestoreRootDir(BACKUP_ROOT_DIR).withBackupId(backupId).withCheck(false) + .withFromTables(tableset).withToTables(tablemap).withOvewrite(false).build()); + + Path hfileOutputPath = new Path( + new Path(conf.get(MapReduceHFileSplitterJob.BULK_OUTPUT_CONF_KEY)).toUri().getPath()); + + // files exist on hbase cluster + FileSystem fileSystem = FileSystem.get(TEST_UTIL.getConfiguration()); + assertTrue(fileSystem.exists(hfileOutputPath)); + + // files don't exist on MR cluster + fileSystem = FileSystem.get(conf); + assertFalse(fileSystem.exists(hfileOutputPath)); + + Admin hba = TEST_UTIL.getAdmin(); + assertTrue(hba.tableExists(table1_restore)); + TEST_UTIL.deleteTable(table1_restore); + hba.close(); + } }