diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 2b8c5210622..f7cfc4cc047 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -1661,6 +1661,19 @@ public class AssignmentManager extends ZooKeeperListener {
regionOffline(regionInfo, null);
}
+ /**
+ * Marks the region as offline. In addition whether removing it from
+ * replicas and master in-memory server holding map.
+ *
+ * @param regionInfo - region info.
+ * @param force - setting to true to force this region to be removed from replicas and master
+ * in-memory server holding map, to make this region not be re-opened on any other region
+ * servers. The only use case is hbck for now.
+ */
+ public void regionOffline(final HRegionInfo regionInfo, boolean force) {
+ regionOffline(regionInfo, null, force);
+ }
+
public void offlineDisabledRegion(HRegionInfo regionInfo) {
if (useZKForAssignment) {
// Disabling so should not be reassigned, just delete the CLOSED node
@@ -4551,13 +4564,20 @@ public class AssignmentManager extends ZooKeeperListener {
public Map getFailedOpenTracker() {return failedOpenTracker;}
+ private void regionOffline(final HRegionInfo regionInfo, final State state) {
+ regionOffline(regionInfo, state, false);
+ }
+
/**
* A region is offline. The new state should be the specified one,
* if not null. If the specified state is null, the new state is Offline.
* The specified state can be Split/Merged/Offline/null only.
+ *
+ * If region offline is initiated by rpc call from admin, we force offline it.
*/
- private void regionOffline(final HRegionInfo regionInfo, final State state) {
- regionStates.regionOffline(regionInfo, state);
+ private void regionOffline(final HRegionInfo regionInfo, final State state,
+ final boolean force) {
+ regionStates.regionOffline(regionInfo, state, force);
removeClosedRegion(regionInfo);
// remove the region plan as well just in case.
clearRegionPlan(regionInfo);
@@ -4566,7 +4586,7 @@ public class AssignmentManager extends ZooKeeperListener {
// Tell our listeners that a region was closed
sendRegionClosedNotification(regionInfo);
// also note that all the replicas of the primary should be closed
- if (state != null && state.equals(State.SPLIT)) {
+ if (force || (state != null && state.equals(State.SPLIT))) {
Collection c = new ArrayList(1);
c.add(regionInfo);
Map> map = regionStates.getRegionAssignments(c);
@@ -4575,7 +4595,7 @@ public class AssignmentManager extends ZooKeeperListener {
replicasToClose.addAll(list);
}
}
- else if (state != null && state.equals(State.MERGED)) {
+ else if (force || (state != null && state.equals(State.MERGED))) {
Collection c = new ArrayList(1);
c.add(regionInfo);
Map> map = regionStates.getRegionAssignments(c);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index 0b82613f082..e87f664b507 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -1401,7 +1401,7 @@ public class MasterRpcServices extends RSRpcServices
master.cpHost.preRegionOffline(hri);
}
LOG.info(master.getClientIdAuditPrefix() + " offline " + hri.getRegionNameAsString());
- master.assignmentManager.regionOffline(hri);
+ master.assignmentManager.regionOffline(hri, true);
if (master.cpHost != null) {
master.cpHost.postRegionOffline(hri);
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 0d7904b73ff..4fc4c58ceea 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -669,7 +669,7 @@ public class RegionStates {
* A region is offline, won't be in transition any more.
*/
public void regionOffline(final HRegionInfo hri) {
- regionOffline(hri, null);
+ regionOffline(hri, null, false);
}
/**
@@ -678,7 +678,7 @@ public class RegionStates {
* Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
*/
public void regionOffline(
- final HRegionInfo hri, final State expectedState) {
+ final HRegionInfo hri, final State expectedState, final boolean force) {
Preconditions.checkArgument(expectedState == null
|| RegionState.isUnassignable(expectedState),
"Offlined region should not be " + expectedState);
@@ -713,9 +713,9 @@ public class RegionStates {
regionsInTransition.remove(encodedName);
ServerName oldServerName = regionAssignments.remove(hri);
if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
- if (newState == State.MERGED || newState == State.SPLIT
+ if (force || (newState == State.MERGED || newState == State.SPLIT
|| hri.isMetaRegion() || tableStateManager.isTableState(hri.getTable(),
- TableState.State.DISABLED, TableState.State.DISABLING)) {
+ TableState.State.DISABLED, TableState.State.DISABLING))) {
// Offline the region only if it's merged/split, or the table is disabled/disabling.
// Otherwise, offline it from this server only when it is online on a different server.
LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
index 7b6a4b367cb..28d355693e0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
@@ -882,6 +882,76 @@ public class TestHBaseFsck {
assertNoErrors(hbck2);
assertEquals(0, hbck2.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
+
+ MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+ long totalRegions = cluster.countServedRegions();
+
+ // stop a region servers and run fsck again
+ cluster.stopRegionServer(server);
+ cluster.waitForRegionServerToStop(server, 60);
+
+ // wait for all regions to come online.
+ while (cluster.countServedRegions() < totalRegions) {
+ Thread.sleep(100);
+ }
+
+ // check again after stopping a region server.
+ HBaseFsck hbck3 = doFsck(conf,false);
+ assertNoErrors(hbck3);
+ } finally {
+ cleanupTable(table);
+ }
+ }
+
+ /**
+ * This create and fixes a bad table with regions that have overlap regions.
+ */
+ @Test(timeout=180000)
+ public void testOverlapRegions() throws Exception {
+ MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+ TableName table =
+ TableName.valueOf("tableOverlapRegions");
+ HRegionInfo hri;
+ ServerName server;
+ try {
+ setupTable(table);
+ assertNoErrors(doFsck(conf, false));
+ assertEquals(ROWKEYS.length, countRows());
+
+ // Now let's mess it up, by adding a region which overlaps with others
+ hri = createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
+ TEST_UTIL.assignRegion(hri);
+ server = regionStates.getRegionServerOfRegion(hri);
+ TEST_UTIL.assertRegionOnServer(hri, server, REGION_ONLINE_TIMEOUT);
+
+ HBaseFsck hbck = doFsck(conf, false);
+ assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
+ ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
+ assertEquals(3, hbck.getOverlapGroups(table).size());
+ assertEquals(ROWKEYS.length, countRows());
+
+ // fix the overlap regions.
+ doFsck(conf, true);
+
+ // check that the overlap regions are gone and no data loss
+ HBaseFsck hbck2 = doFsck(conf,false);
+ assertNoErrors(hbck2);
+ assertEquals(0, hbck2.getOverlapGroups(table).size());
+ assertEquals(ROWKEYS.length, countRows());
+
+ long totalRegions = cluster.countServedRegions();
+
+ // stop a region servers and run fsck again
+ cluster.stopRegionServer(server);
+ cluster.waitForRegionServerToStop(server, 60);
+
+ // wait for all regions to come online.
+ while (cluster.countServedRegions() < totalRegions) {
+ Thread.sleep(100);
+ }
+
+ HBaseFsck hbck3 = doFsck(conf,false);
+ assertNoErrors(hbck3);
} finally {
cleanupTable(table);
}