HBASE-25206 Data loss can happen if a cloned table loses original split region(delete table) (#2569)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
parent
5e3ffb1db6
commit
881c92b892
|
@ -362,6 +362,18 @@ public class RegionStates {
|
|||
regionNode -> !regionNode.isInState(State.SPLIT) && !regionNode.getRegionInfo().isSplit());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the regions for deleting a table.
|
||||
* <p/>
|
||||
* Here we need to return all the regions irrespective of the states in order to archive them
|
||||
* all. This is because if we don't archive OFFLINE/SPLIT regions and if a snapshot or a cloned
|
||||
* table references to the regions, we will lose the data of the regions.
|
||||
*/
|
||||
public List<RegionInfo> getRegionsOfTableForDeleting(TableName table) {
|
||||
return getTableRegionStateNodes(table).stream().map(RegionStateNode::getRegionInfo)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Return the regions of the table and filter them.
|
||||
*/
|
||||
|
|
|
@ -348,6 +348,7 @@ public class TransitRegionStateProcedure
|
|||
LOG.error(
|
||||
"Cannot assign replica region {} because its primary region {} does not exist.",
|
||||
regionNode.getRegionInfo(), defaultRI);
|
||||
regionNode.unsetProcedure(this);
|
||||
return Flow.NO_MORE_STATE;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,7 +99,8 @@ public class DeleteTableProcedure
|
|||
|
||||
// TODO: Move out... in the acquireLock()
|
||||
LOG.debug("Waiting for RIT for {}", this);
|
||||
regions = env.getAssignmentManager().getRegionStates().getRegionsOfTable(getTableName());
|
||||
regions = env.getAssignmentManager().getRegionStates()
|
||||
.getRegionsOfTableForDeleting(getTableName());
|
||||
assert regions != null && !regions.isEmpty() : "unexpected 0 regions";
|
||||
ProcedureSyncWait.waitRegionInTransition(env, regions);
|
||||
|
||||
|
|
|
@ -80,4 +80,40 @@ public class CloneSnapshotFromClientAfterSplittingRegionTestBase
|
|||
admin.catalogJanitorSwitch(true);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCloneSnapshotBeforeSplittingRegionAndDroppingTable()
|
||||
throws IOException, InterruptedException {
|
||||
// Turn off the CatalogJanitor
|
||||
admin.catalogJanitorSwitch(false);
|
||||
|
||||
try {
|
||||
// Take a snapshot
|
||||
admin.snapshot(snapshotName2, tableName);
|
||||
|
||||
// Clone the snapshot to another table
|
||||
TableName clonedTableName =
|
||||
TableName.valueOf(getValidMethodName() + "-" + System.currentTimeMillis());
|
||||
admin.cloneSnapshot(snapshotName2, clonedTableName);
|
||||
SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName);
|
||||
|
||||
// Split the first region of the original table
|
||||
List<RegionInfo> regionInfos = admin.getRegions(tableName);
|
||||
RegionReplicaUtil.removeNonDefaultRegions(regionInfos);
|
||||
splitRegion(regionInfos.get(0));
|
||||
|
||||
// Drop the original table
|
||||
admin.disableTable(tableName);
|
||||
admin.deleteTable(tableName);
|
||||
|
||||
// Disable and enable the cloned table. This should be successful
|
||||
admin.disableTable(clonedTableName);
|
||||
admin.enableTable(clonedTableName);
|
||||
SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName);
|
||||
|
||||
verifyRowCount(TEST_UTIL, clonedTableName, snapshot1Rows);
|
||||
} finally {
|
||||
admin.catalogJanitorSwitch(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue