From f7e6143f4977b92ed21bd08803bc8dac71648258 Mon Sep 17 00:00:00 2001 From: Toshihiro Suzuki Date: Sat, 24 Oct 2020 23:49:14 +0900 Subject: [PATCH] HBASE-25206 Data loss can happen if a cloned table loses original split region(delete table) (#2569) Signed-off-by: Duo Zhang --- .../hbase/master/assignment/RegionStates.java | 12 +++++++ .../TransitRegionStateProcedure.java | 1 + .../procedure/DeleteTableProcedure.java | 3 +- ...romClientAfterSplittingRegionTestBase.java | 36 +++++++++++++++++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index 84f32fc0276..54765402fbf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -344,6 +344,18 @@ public class RegionStates { regionNode -> !regionNode.isInState(State.SPLIT) && !regionNode.getRegionInfo().isSplit()); } + /** + * Get the regions for deleting a table. + *

+ * Here we need to return all the regions irrespective of the states in order to archive them + * all. This is because if we don't archive OFFLINE/SPLIT regions and if a snapshot or a cloned + * table references to the regions, we will lose the data of the regions. + */ + public List getRegionsOfTableForDeleting(TableName table) { + return getTableRegionStateNodes(table).stream().map(RegionStateNode::getRegionInfo) + .collect(Collectors.toList()); + } + /** * @return Return the regions of the table and filter them. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java index b0a697deaa9..63bb345cffe 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java @@ -348,6 +348,7 @@ public class TransitRegionStateProcedure LOG.error( "Cannot assign replica region {} because its primary region {} does not exist.", regionNode.getRegionInfo(), defaultRI); + regionNode.unsetProcedure(this); return Flow.NO_MORE_STATE; } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java index 9cfce0ce363..80dddc7ccda 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java @@ -99,7 +99,8 @@ public class DeleteTableProcedure // TODO: Move out... in the acquireLock() LOG.debug("Waiting for RIT for {}", this); - regions = env.getAssignmentManager().getRegionStates().getRegionsOfTable(getTableName()); + regions = env.getAssignmentManager().getRegionStates() + .getRegionsOfTableForDeleting(getTableName()); assert regions != null && !regions.isEmpty() : "unexpected 0 regions"; ProcedureSyncWait.waitRegionInTransition(env, regions); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java index 5ed100f6d29..e8c01677728 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java @@ -80,4 +80,40 @@ public class CloneSnapshotFromClientAfterSplittingRegionTestBase admin.catalogJanitorSwitch(true); } } + + @Test + public void testCloneSnapshotBeforeSplittingRegionAndDroppingTable() + throws IOException, InterruptedException { + // Turn off the CatalogJanitor + admin.catalogJanitorSwitch(false); + + try { + // Take a snapshot + admin.snapshot(snapshotName2, tableName); + + // Clone the snapshot to another table + TableName clonedTableName = + TableName.valueOf(getValidMethodName() + "-" + System.currentTimeMillis()); + admin.cloneSnapshot(snapshotName2, clonedTableName); + SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName); + + // Split the first region of the original table + List regionInfos = admin.getRegions(tableName); + RegionReplicaUtil.removeNonDefaultRegions(regionInfos); + splitRegion(regionInfos.get(0)); + + // Drop the original table + admin.disableTable(tableName); + admin.deleteTable(tableName); + + // Disable and enable the cloned table. This should be successful + admin.disableTable(clonedTableName); + admin.enableTable(clonedTableName); + SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName); + + verifyRowCount(TEST_UTIL, clonedTableName, snapshot1Rows); + } finally { + admin.catalogJanitorSwitch(true); + } + } }