diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 2b8c5210622..f7cfc4cc047 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -1661,6 +1661,19 @@ public class AssignmentManager extends ZooKeeperListener { regionOffline(regionInfo, null); } + /** + * Marks the region as offline. In addition whether removing it from + * replicas and master in-memory server holding map. + *

+ * @param regionInfo - region info. + * @param force - setting to true to force this region to be removed from replicas and master + * in-memory server holding map, to make this region not be re-opened on any other region + * servers. The only use case is hbck for now. + */ + public void regionOffline(final HRegionInfo regionInfo, boolean force) { + regionOffline(regionInfo, null, force); + } + public void offlineDisabledRegion(HRegionInfo regionInfo) { if (useZKForAssignment) { // Disabling so should not be reassigned, just delete the CLOSED node @@ -4551,13 +4564,20 @@ public class AssignmentManager extends ZooKeeperListener { public Map getFailedOpenTracker() {return failedOpenTracker;} + private void regionOffline(final HRegionInfo regionInfo, final State state) { + regionOffline(regionInfo, state, false); + } + /** * A region is offline. The new state should be the specified one, * if not null. If the specified state is null, the new state is Offline. * The specified state can be Split/Merged/Offline/null only. + * + * If region offline is initiated by rpc call from admin, we force offline it. */ - private void regionOffline(final HRegionInfo regionInfo, final State state) { - regionStates.regionOffline(regionInfo, state); + private void regionOffline(final HRegionInfo regionInfo, final State state, + final boolean force) { + regionStates.regionOffline(regionInfo, state, force); removeClosedRegion(regionInfo); // remove the region plan as well just in case. clearRegionPlan(regionInfo); @@ -4566,7 +4586,7 @@ public class AssignmentManager extends ZooKeeperListener { // Tell our listeners that a region was closed sendRegionClosedNotification(regionInfo); // also note that all the replicas of the primary should be closed - if (state != null && state.equals(State.SPLIT)) { + if (force || (state != null && state.equals(State.SPLIT))) { Collection c = new ArrayList(1); c.add(regionInfo); Map> map = regionStates.getRegionAssignments(c); @@ -4575,7 +4595,7 @@ public class AssignmentManager extends ZooKeeperListener { replicasToClose.addAll(list); } } - else if (state != null && state.equals(State.MERGED)) { + else if (force || (state != null && state.equals(State.MERGED))) { Collection c = new ArrayList(1); c.add(regionInfo); Map> map = regionStates.getRegionAssignments(c); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 0b82613f082..e87f664b507 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -1401,7 +1401,7 @@ public class MasterRpcServices extends RSRpcServices master.cpHost.preRegionOffline(hri); } LOG.info(master.getClientIdAuditPrefix() + " offline " + hri.getRegionNameAsString()); - master.assignmentManager.regionOffline(hri); + master.assignmentManager.regionOffline(hri, true); if (master.cpHost != null) { master.cpHost.postRegionOffline(hri); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 0d7904b73ff..4fc4c58ceea 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -669,7 +669,7 @@ public class RegionStates { * A region is offline, won't be in transition any more. */ public void regionOffline(final HRegionInfo hri) { - regionOffline(hri, null); + regionOffline(hri, null, false); } /** @@ -678,7 +678,7 @@ public class RegionStates { * Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew. */ public void regionOffline( - final HRegionInfo hri, final State expectedState) { + final HRegionInfo hri, final State expectedState, final boolean force) { Preconditions.checkArgument(expectedState == null || RegionState.isUnassignable(expectedState), "Offlined region should not be " + expectedState); @@ -713,9 +713,9 @@ public class RegionStates { regionsInTransition.remove(encodedName); ServerName oldServerName = regionAssignments.remove(hri); if (oldServerName != null && serverHoldings.containsKey(oldServerName)) { - if (newState == State.MERGED || newState == State.SPLIT + if (force || (newState == State.MERGED || newState == State.SPLIT || hri.isMetaRegion() || tableStateManager.isTableState(hri.getTable(), - TableState.State.DISABLED, TableState.State.DISABLING)) { + TableState.State.DISABLED, TableState.State.DISABLING))) { // Offline the region only if it's merged/split, or the table is disabled/disabling. // Otherwise, offline it from this server only when it is online on a different server. LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 7b6a4b367cb..28d355693e0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -882,6 +882,76 @@ public class TestHBaseFsck { assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); + + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + long totalRegions = cluster.countServedRegions(); + + // stop a region servers and run fsck again + cluster.stopRegionServer(server); + cluster.waitForRegionServerToStop(server, 60); + + // wait for all regions to come online. + while (cluster.countServedRegions() < totalRegions) { + Thread.sleep(100); + } + + // check again after stopping a region server. + HBaseFsck hbck3 = doFsck(conf,false); + assertNoErrors(hbck3); + } finally { + cleanupTable(table); + } + } + + /** + * This create and fixes a bad table with regions that have overlap regions. + */ + @Test(timeout=180000) + public void testOverlapRegions() throws Exception { + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + TableName table = + TableName.valueOf("tableOverlapRegions"); + HRegionInfo hri; + ServerName server; + try { + setupTable(table); + assertNoErrors(doFsck(conf, false)); + assertEquals(ROWKEYS.length, countRows()); + + // Now let's mess it up, by adding a region which overlaps with others + hri = createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2")); + TEST_UTIL.assignRegion(hri); + server = regionStates.getRegionServerOfRegion(hri); + TEST_UTIL.assertRegionOnServer(hri, server, REGION_ONLINE_TIMEOUT); + + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN, + ERROR_CODE.OVERLAP_IN_REGION_CHAIN }); + assertEquals(3, hbck.getOverlapGroups(table).size()); + assertEquals(ROWKEYS.length, countRows()); + + // fix the overlap regions. + doFsck(conf, true); + + // check that the overlap regions are gone and no data loss + HBaseFsck hbck2 = doFsck(conf,false); + assertNoErrors(hbck2); + assertEquals(0, hbck2.getOverlapGroups(table).size()); + assertEquals(ROWKEYS.length, countRows()); + + long totalRegions = cluster.countServedRegions(); + + // stop a region servers and run fsck again + cluster.stopRegionServer(server); + cluster.waitForRegionServerToStop(server, 60); + + // wait for all regions to come online. + while (cluster.countServedRegions() < totalRegions) { + Thread.sleep(100); + } + + HBaseFsck hbck3 = doFsck(conf,false); + assertNoErrors(hbck3); } finally { cleanupTable(table); }