From eacbe002e51ed9ebac53313a9bdb9392a5b7c4fd Mon Sep 17 00:00:00 2001 From: Pankaj Kumar Date: Mon, 9 Apr 2018 07:48:34 -0700 Subject: [PATCH] HBASE-19343 Restore snapshot makes split parent region online --- .../hbase/snapshot/RestoreSnapshotHelper.java | 4 +- .../client/TestRestoreSnapshotFromClient.java | 68 ++++++++++++++++++- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java index fb535dec536..4ff89857d2c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java @@ -203,7 +203,9 @@ public class RestoreSnapshotHelper { if (regionNames.contains(regionName)) { LOG.info("region to restore: " + regionName); regionNames.remove(regionName); - metaChanges.addRegionToRestore(regionInfo); + // Add the regionInfo from snapshot manifest, so that will not miss parent region details + metaChanges.addRegionToRestore( + HRegionInfo.convert(regionManifests.get(regionName).getRegionInfo())); } else { LOG.info("region to remove: " + regionName); metaChanges.addRegionToRemove(regionInfo); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java index 6f566096761..d5ef6e2e5c3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; import org.apache.hadoop.hbase.coprocessor.ObserverContext; import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; @@ -43,10 +44,14 @@ import org.apache.hadoop.hbase.regionserver.ScanType; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; +import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException; import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; +import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException; import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; @@ -90,8 +95,9 @@ public class TestRestoreSnapshotFromClient { TEST_UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 100); TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 250); TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6); - TEST_UTIL.getConfiguration().setBoolean( - "hbase.master.enabletable.roundrobin", true); + TEST_UTIL.getConfiguration().setBoolean("hbase.master.enabletable.roundrobin", true); + // Setting bigger value to avoid catalog janitor execution (parent region cleanup) + TEST_UTIL.getConfiguration().setLong("hbase.catalogjanitor.interval", 1800000); TEST_UTIL.startMiniCluster(3); } @@ -182,6 +188,64 @@ public class TestRestoreSnapshotFromClient { SnapshotTestingUtils.verifyReplicasCameOnline(tableName, admin, getNumReplicas()); } + @Test(timeout = 300000) + public void testRestoreSnapshotAfterSplit() throws Exception { + Admin admin = null; + try { + admin = TEST_UTIL.getHBaseAdmin(); + final int regionReplication = admin.getTableDescriptor(tableName).getRegionReplication(); + // Region count before split + final int primaryRegionCountBeforeSplit = MetaTableAccessor + .getTableRegions(TEST_UTIL.getZooKeeperWatcher(), TEST_UTIL.getConnection(), tableName) + .size() / regionReplication; + + admin.split(tableName, "m".getBytes()); + final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); + // Wait for replica region to become online + TEST_UTIL.waitFor(60000, 500, new Waiter.Predicate() { + @Override + public boolean evaluate() throws IOException { + return am.getRegionStates().getRegionByStateOfTable(tableName).get(RegionState.State.OPEN) + .size() == ((primaryRegionCountBeforeSplit + 1) * regionReplication); + } + }); + + int regionCountAfterSplit = MetaTableAccessor + .getTableRegions(TEST_UTIL.getZooKeeperWatcher(), TEST_UTIL.getConnection(), tableName) + .size() / regionReplication; + // regionCountAfterSplit will contain parent region, so primaryregionCountBeforeSplit + 2 + assertEquals(primaryRegionCountBeforeSplit + 2, regionCountAfterSplit); + + String snapshotName = "testRestoreSnapshotAfterSplit-snap"; + // Create snapshot after table split + admin.snapshot(snapshotName, tableName); + assertEquals(1, + admin.listTableSnapshots("testtb-.*", "testRestoreSnapshotAfterSplit-snap*").size()); + // Restore snapshot + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName); + + int regionCountAfterRestoreSnapshot = MetaTableAccessor + .getTableRegions(TEST_UTIL.getZooKeeperWatcher(), TEST_UTIL.getConnection(), tableName) + .size(); + assertEquals(primaryRegionCountBeforeSplit + 2, regionCountAfterRestoreSnapshot); + + // Enable the table + admin.enableTable(tableName); + assertEquals((primaryRegionCountBeforeSplit + 1) * regionReplication, + am.getRegionStates().getRegionByStateOfTable(tableName).get(RegionState.State.OPEN).size()); + + } finally { + if (admin != null) { + try { + admin.deleteTableSnapshots("testtb-.*", "testRestoreSnapshotAfterSplit-snap*"); + } catch (SnapshotDoesNotExistException ignore) { + } + admin.close(); + } + } + } + protected int getNumReplicas() { return 1; }