diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java index 62b4e9cfbfb..517b1324f12 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java @@ -126,6 +126,10 @@ public class RegionState implements org.apache.hadoop.io.Writable { return state == State.SPLITTING; } + public boolean isSplit() { + return state == State.SPLIT; + } + public boolean isFailedOpen() { return state == State.FAILED_OPEN; } @@ -138,6 +142,10 @@ public class RegionState implements org.apache.hadoop.io.Writable { return state == State.MERGING; } + public boolean isMerged() { + return state == State.MERGED; + } + public boolean isOpenOrMergingOnServer(final ServerName sn) { return isOnServer(sn) && (isOpened() || isMerging()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 51c0709a437..9fe9da606c7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hbase.exceptions.TableNotFoundException; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; +import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper; import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; @@ -1415,10 +1416,7 @@ public class AssignmentManager extends ZooKeeperListener { * @param regionInfo */ public void regionOffline(final HRegionInfo regionInfo) { - regionStates.regionOffline(regionInfo); - removeClosedRegion(regionInfo); - // remove the region plan as well just in case. - clearRegionPlan(regionInfo); + regionOffline(regionInfo, null); } public void offlineDisabledRegion(HRegionInfo regionInfo) { @@ -2355,7 +2353,7 @@ public class AssignmentManager extends ZooKeeperListener { public boolean waitForAssignment(HRegionInfo regionInfo) throws InterruptedException { while (!regionStates.isRegionAssigned(regionInfo)) { - if (regionStates.isRegionFailedToOpen(regionInfo) + if (regionStates.isRegionInState(regionInfo, State.FAILED_OPEN) || this.server.isStopped()) { return false; } @@ -3102,7 +3100,7 @@ public class AssignmentManager extends ZooKeeperListener { */ public void handleSplitReport(final ServerName sn, final HRegionInfo parent, final HRegionInfo a, final HRegionInfo b) { - regionOffline(parent); + regionOffline(parent, State.SPLIT); regionOnline(a, sn); regionOnline(b, sn); @@ -3126,8 +3124,8 @@ public class AssignmentManager extends ZooKeeperListener { */ public void handleRegionsMergeReport(final ServerName sn, final HRegionInfo merged, final HRegionInfo a, final HRegionInfo b) { - regionOffline(a); - regionOffline(b); + regionOffline(a, State.MERGED); + regionOffline(b, State.MERGED); regionOnline(merged, sn); // There's a possibility that the region was merging while a user asked @@ -3238,4 +3236,16 @@ public class AssignmentManager extends ZooKeeperListener { regionStates.updateRegionState(merging_b, RegionState.State.MERGING); return true; } + + /** + * A region is offline. The new state should be the specified one, + * if not null. If the specified state is null, the new state is Offline. + * The specified state can be Split/Merged/Offline/null only. + */ + private void regionOffline(final HRegionInfo regionInfo, final State state) { + regionStates.regionOffline(regionInfo, state); + removeClosedRegion(regionInfo); + // remove the region plan as well just in case. + clearRegionPlan(regionInfo); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 258ea441b42..d7ed0644b66 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -42,6 +42,8 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.zookeeper.KeeperException; +import com.google.common.base.Preconditions; + /** * Region state accountant. It holds the states of all regions in the memory. * In normal scenario, it should match the meta table and the true region states. @@ -136,21 +138,13 @@ public class RegionStates { } /** - * @return True if specified region failed to open. + * @return True if specified region is in specified state */ - public synchronized boolean isRegionFailedToOpen(final HRegionInfo hri) { - RegionState regionState = getRegionTransitionState(hri); - State state = regionState != null ? regionState.getState() : null; - return state == State.FAILED_OPEN; - } - - /** - * @return True if specified region failed to close. - */ - public synchronized boolean isRegionFailedToClose(final HRegionInfo hri) { - RegionState regionState = getRegionTransitionState(hri); - State state = regionState != null ? regionState.getState() : null; - return state == State.FAILED_CLOSE; + public synchronized boolean isRegionInState( + final HRegionInfo hri, final State state) { + RegionState regionState = getRegionState(hri); + State s = regionState != null ? regionState.getState() : null; + return s == state; } /** @@ -315,20 +309,42 @@ public class RegionStates { /** * A region is offline, won't be in transition any more. */ - public synchronized void regionOffline(final HRegionInfo hri) { + public void regionOffline(final HRegionInfo hri) { + regionOffline(hri, null); + } + + /** + * A region is offline, won't be in transition any more. + * Its state should be the specified expected state, which + * can be Split/Merged/Offline/null(=Offline) only. + */ + public synchronized void regionOffline( + final HRegionInfo hri, final State expectedState) { + Preconditions.checkArgument(expectedState == null + || expectedState == State.OFFLINE || expectedState == State.SPLIT + || expectedState == State.MERGED, "Offlined region should be in state" + + " OFFLINE/SPLIT/MERGED instead of " + expectedState); String regionName = hri.getEncodedName(); RegionState oldState = regionStates.get(regionName); if (oldState == null) { LOG.warn("Offline a region not in RegionStates: " + hri.getShortNameToLog()); - } else { + } else if (LOG.isDebugEnabled()) { State state = oldState.getState(); ServerName sn = oldState.getServerName(); - if (state != State.OFFLINE || sn != null) { - LOG.debug("Offline a region " + hri.getShortNameToLog() + " with current state=" + state + - ", expected state=OFFLINE" + ", assigned to server: " + sn + ", expected null"); + if (state != State.OFFLINE + && state != State.SPLITTING && state != State.MERGING) { + LOG.debug("Offline a region " + hri.getShortNameToLog() + " with current state=" + + state + ", expected state=OFFLINE/SPLITTING/MERGING"); + } + if (sn != null && state == State.OFFLINE) { + LOG.debug("Offline a region " + hri.getShortNameToLog() + + " with current state=OFFLINE, assigned to server: " + + sn + ", expected null"); } } - updateRegionState(hri, State.OFFLINE); + State newState = expectedState; + if (newState == null) newState = State.OFFLINE; + updateRegionState(hri, newState); regionsInTransition.remove(regionName); ServerName oldServerName = regionAssignments.remove(hri); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java index 5c64ed3a00d..8bebfcbada8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.RegionStates; +import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; import org.apache.zookeeper.KeeperException; @@ -73,7 +74,7 @@ public class DeleteTableHandler extends TableEventHandler { for (HRegionInfo region : regions) { long done = System.currentTimeMillis() + waitTime; while (System.currentTimeMillis() < done) { - if (states.isRegionFailedToOpen(region)) { + if (states.isRegionInState(region, State.FAILED_OPEN)) { am.regionOffline(region); } if (!states.isRegionInTransition(region)) break; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java index cecafbc9bf4..94aab98a7b0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.RegionStates; import org.apache.hadoop.hbase.master.TableLockManager; +import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; import org.apache.hadoop.hbase.util.Bytes; import org.apache.zookeeper.KeeperException; @@ -203,7 +204,9 @@ public class DisableTableHandler extends EventHandler { RegionStates regionStates = assignmentManager.getRegionStates(); for (HRegionInfo region: regions) { if (regionStates.isRegionInTransition(region) - && !regionStates.isRegionFailedToClose(region)) continue; + && !regionStates.isRegionInState(region, State.FAILED_CLOSE)) { + continue; + } final HRegionInfo hri = region; pool.execute(Trace.wrap(new Runnable() { public void run() { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java index cd778539d87..532f1220f4d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java @@ -380,10 +380,12 @@ public class TestAssignmentManager { RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B); am.balance(plan); + RegionStates regionStates = am.getRegionStates(); // Must be failed to close since the server is fake - assertTrue(am.getRegionStates().isRegionFailedToClose(REGIONINFO)); + assertTrue(regionStates.isRegionInTransition(REGIONINFO) + && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE)); // Move it back to pending_close - am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE); + regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE); // Now fake the region closing successfully over on the regionserver; the // regionserver will have set the region in CLOSED state. This will @@ -413,7 +415,7 @@ public class TestAssignmentManager { ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid); assertNotSame(-1, versionid); // Wait on the handler removing the OPENED znode. - while(am.getRegionStates().isRegionInTransition(REGIONINFO)) Threads.sleep(1); + while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1); } finally { executor.shutdown(); am.shutdown(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java index 49d77873052..a285f0c7e6a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java @@ -26,6 +26,7 @@ import static org.junit.Assert.fail; import java.io.IOException; import java.util.List; +import org.apache.commons.lang.math.RandomUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; @@ -46,10 +47,14 @@ import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.exceptions.MergeRegionException; import org.apache.hadoop.hbase.exceptions.UnknownRegionException; +import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.RegionStates; +import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.PairOfSameType; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -98,7 +103,7 @@ public class TestRegionMergeTransactionOnCluster { public static void afterAllTests() throws Exception { TEST_UTIL.shutdownMiniCluster(); } - + @Test public void testWholesomeMerge() throws Exception { LOG.info("Starting testWholesomeMerge"); @@ -111,11 +116,31 @@ public class TestRegionMergeTransactionOnCluster { INITIAL_REGION_NUM - 1); // Merge 2nd and 3th region - mergeRegionsAndVerifyRegionNum(master, tableName, 1, 2, + PairOfSameType mergedRegions = + mergeRegionsAndVerifyRegionNum(master, tableName, 1, 2, INITIAL_REGION_NUM - 2); verifyRowCount(table, ROWSIZE); + // Randomly choose one of the two merged regions + HRegionInfo hri = RandomUtils.nextBoolean() ? + mergedRegions.getFirst() : mergedRegions.getSecond(); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + AssignmentManager am = cluster.getMaster().getAssignmentManager(); + RegionStates regionStates = am.getRegionStates(); + long start = EnvironmentEdgeManager.currentTimeMillis(); + while (!regionStates.isRegionInState(hri, State.MERGED)) { + assertFalse("Timed out in waiting one merged region to be in state MERGED", + EnvironmentEdgeManager.currentTimeMillis() - start > 60000); + Thread.sleep(500); + } + + // We should not be able to assign it again + am.assign(hri, true, true); + assertFalse("Merged region should not be in transition again", + regionStates.isRegionInTransition(hri) + && regionStates.isRegionInState(hri, State.MERGED)); + table.close(); } @@ -246,20 +271,27 @@ public class TestRegionMergeTransactionOnCluster { } } - private void mergeRegionsAndVerifyRegionNum(HMaster master, byte[] tablename, + private PairOfSameType mergeRegionsAndVerifyRegionNum( + HMaster master, byte[] tablename, int regionAnum, int regionBnum, int expectedRegionNum) throws Exception { - requestMergeRegion(master, tablename, regionAnum, regionBnum); + PairOfSameType mergedRegions = + requestMergeRegion(master, tablename, regionAnum, regionBnum); waitAndVerifyRegionNum(master, tablename, expectedRegionNum); + return mergedRegions; } - private void requestMergeRegion(HMaster master, byte[] tablename, + private PairOfSameType requestMergeRegion( + HMaster master, byte[] tablename, int regionAnum, int regionBnum) throws Exception { List> tableRegions = MetaReader .getTableRegionsAndLocations(master.getCatalogTracker(), Bytes.toString(tablename)); + HRegionInfo regionA = tableRegions.get(regionAnum).getFirst(); + HRegionInfo regionB = tableRegions.get(regionBnum).getFirst(); TEST_UTIL.getHBaseAdmin().mergeRegions( - tableRegions.get(regionAnum).getFirst().getEncodedNameAsBytes(), - tableRegions.get(regionBnum).getFirst().getEncodedNameAsBytes(), false); + regionA.getEncodedNameAsBytes(), + regionB.getEncodedNameAsBytes(), false); + return new PairOfSameType(regionA, regionB); } private void waitAndVerifyRegionNum(HMaster master, byte[] tablename, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index 4169892fedf..afa104d1104 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -61,12 +61,15 @@ import org.apache.hadoop.hbase.exceptions.MasterNotRunningException; import org.apache.hadoop.hbase.exceptions.UnknownRegionException; import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException; import org.apache.hadoop.hbase.executor.EventType; +import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionStates; +import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.handler.SplitRegionHandler; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.HBaseFsck; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; @@ -811,6 +814,22 @@ public class TestSplitTransactionOnCluster { FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); assertEquals("Expected nothing but found " + storefilesAfter.toString(), storefilesAfter.size(), 0); + + hri = region.getRegionInfo(); // split parent + AssignmentManager am = cluster.getMaster().getAssignmentManager(); + RegionStates regionStates = am.getRegionStates(); + long start = EnvironmentEdgeManager.currentTimeMillis(); + while (!regionStates.isRegionInState(hri, State.SPLIT)) { + assertFalse("Timed out in waiting split parent to be in state SPLIT", + EnvironmentEdgeManager.currentTimeMillis() - start > 60000); + Thread.sleep(500); + } + + // We should not be able to assign it again + am.assign(hri, true, true); + assertFalse("Split region should not be in transition again", + regionStates.isRegionInTransition(hri) + && regionStates.isRegionInState(hri, State.SPLIT)); } finally { admin.setBalancerRunning(true, false); cluster.getMaster().setCatalogJanitorEnabled(true);