HBASE-25206 Data loss can happen if a cloned table loses original split region(delete table) (#2569)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
Toshihiro Suzuki 2020-10-24 23:49:14 +09:00 committed by Duo Zhang
parent 3862dfc930
commit f7e6143f49
4 changed files with 51 additions and 1 deletions

View File

@ -344,6 +344,18 @@ public class RegionStates {
regionNode -> !regionNode.isInState(State.SPLIT) && !regionNode.getRegionInfo().isSplit());
}
/**
* Get the regions for deleting a table.
* <p/>
* Here we need to return all the regions irrespective of the states in order to archive them
* all. This is because if we don't archive OFFLINE/SPLIT regions and if a snapshot or a cloned
* table references to the regions, we will lose the data of the regions.
*/
public List<RegionInfo> getRegionsOfTableForDeleting(TableName table) {
return getTableRegionStateNodes(table).stream().map(RegionStateNode::getRegionInfo)
.collect(Collectors.toList());
}
/**
* @return Return the regions of the table and filter them.
*/

View File

@ -348,6 +348,7 @@ public class TransitRegionStateProcedure
LOG.error(
"Cannot assign replica region {} because its primary region {} does not exist.",
regionNode.getRegionInfo(), defaultRI);
regionNode.unsetProcedure(this);
return Flow.NO_MORE_STATE;
}
}

View File

@ -99,7 +99,8 @@ public class DeleteTableProcedure
// TODO: Move out... in the acquireLock()
LOG.debug("Waiting for RIT for {}", this);
regions = env.getAssignmentManager().getRegionStates().getRegionsOfTable(getTableName());
regions = env.getAssignmentManager().getRegionStates()
.getRegionsOfTableForDeleting(getTableName());
assert regions != null && !regions.isEmpty() : "unexpected 0 regions";
ProcedureSyncWait.waitRegionInTransition(env, regions);

View File

@ -80,4 +80,40 @@ public class CloneSnapshotFromClientAfterSplittingRegionTestBase
admin.catalogJanitorSwitch(true);
}
}
@Test
public void testCloneSnapshotBeforeSplittingRegionAndDroppingTable()
throws IOException, InterruptedException {
// Turn off the CatalogJanitor
admin.catalogJanitorSwitch(false);
try {
// Take a snapshot
admin.snapshot(snapshotName2, tableName);
// Clone the snapshot to another table
TableName clonedTableName =
TableName.valueOf(getValidMethodName() + "-" + System.currentTimeMillis());
admin.cloneSnapshot(snapshotName2, clonedTableName);
SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName);
// Split the first region of the original table
List<RegionInfo> regionInfos = admin.getRegions(tableName);
RegionReplicaUtil.removeNonDefaultRegions(regionInfos);
splitRegion(regionInfos.get(0));
// Drop the original table
admin.disableTable(tableName);
admin.deleteTable(tableName);
// Disable and enable the cloned table. This should be successful
admin.disableTable(clonedTableName);
admin.enableTable(clonedTableName);
SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName);
verifyRowCount(TEST_UTIL, clonedTableName, snapshot1Rows);
} finally {
admin.catalogJanitorSwitch(true);
}
}
}