From b0e1395fc62b829658ebfa2b9b53297d7e7bfcb3 Mon Sep 17 00:00:00 2001 From: jxiang Date: Thu, 28 Mar 2013 17:02:00 +0000 Subject: [PATCH] HBASE-8144 Limit number of attempts to assign a region git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1462213 13f79535-47bb-0310-9956-ffa450edef68 --- .../hbase/master/AssignmentManager.java | 88 ++++++++--- .../hadoop/hbase/master/RegionStates.java | 4 + .../TestAssignmentManagerOnCluster.java | 141 +++++++++++++++--- 3 files changed, 194 insertions(+), 39 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 5641db40fdc..8c083da44c6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -186,6 +186,16 @@ public class AssignmentManager extends ZooKeeperListener { /** Is the TimeOutManagement activated **/ private final boolean tomActivated; + /** + * A map to track the count a region fails to open in a row. + * So that we don't try to open a region forever if the failure is + * unrecoverable. We don't put this information in region states + * because we don't expect this to happen frequently; we don't + * want to copy this information over during each state transition either. + */ + private final ConcurrentHashMap + failedOpenTracker = new ConcurrentHashMap(); + /** * Constructs a new assignment manager. * @@ -541,7 +551,7 @@ public class AssignmentManager extends ZooKeeperListener { public void process() throws IOException { ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName()); try { - unassign(regionInfo, rs, expectedVersion, sn, true); + unassign(regionInfo, rs, expectedVersion, sn, true, null); } finally { lock.unlock(); } @@ -880,9 +890,25 @@ public class AssignmentManager extends ZooKeeperListener { // When there are more than one region server a new RS is selected as the // destination and the same is updated in the regionplan. (HBASE-5546) if (regionState != null) { - getRegionPlan(regionState.getRegion(), sn, true); - this.executorService.submit(new ClosedRegionHandler(server, - this, regionState.getRegion())); + AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName); + if (failedOpenCount == null) { + failedOpenCount = new AtomicInteger(); + // No need to use putIfAbsent, or extra synchronization since + // this whole handleRegion block is locked on the encoded region + // name, and failedOpenTracker is updated only in this block + failedOpenTracker.put(encodedName, failedOpenCount); + } + if (failedOpenCount.incrementAndGet() >= maximumAttempts) { + regionStates.updateRegionState( + regionState.getRegion(), RegionState.State.FAILED_OPEN); + // remove the tracking info to save memory, also reset + // the count for next open initiative + failedOpenTracker.remove(encodedName); + } else { + getRegionPlan(regionState.getRegion(), sn, true); + this.executorService.submit(new ClosedRegionHandler(server, + this, regionState.getRegion())); + } } break; @@ -909,11 +935,16 @@ public class AssignmentManager extends ZooKeeperListener { + " from server " + sn + " but region was in the state " + regionState + " and not in expected PENDING_OPEN or OPENING states," + " or not on the expected server"); + // Close it without updating the internal region states, + // so as not to create double assignments in unlucky scenarios + // mentioned in OpenRegionHandler#process + unassign(regionState.getRegion(), null, -1, null, false, sn); return; } // Handle OPENED by removing from transition and deleted zk node regionState = regionStates.updateRegionState(rt, RegionState.State.OPEN); if (regionState != null) { + failedOpenTracker.remove(encodedName); // reset the count, if any this.executorService.submit(new OpenedRegionHandler( server, this, regionState.getRegion(), sn, expectedVersion)); } @@ -1571,12 +1602,23 @@ public class AssignmentManager extends ZooKeeperListener { } /** - * Send CLOSE RPC if the server is online, otherwise, offline the region + * Send CLOSE RPC if the server is online, otherwise, offline the region. + * + * The RPC will be sent only to the region sever found in the region state + * if it is passed in, otherwise, to the src server specified. If region + * state is not specified, we don't update region state at all, instead + * we just send the RPC call. This is useful for some cleanup without + * messing around the region states (see handleRegion, on region opened + * on an unexpected server scenario, for an example) */ private void unassign(final HRegionInfo region, final RegionState state, final int versionOfClosingNode, - final ServerName dest, final boolean transitionInZK) { - ServerName server = state.getServerName(); + final ServerName dest, final boolean transitionInZK, + final ServerName src) { + ServerName server = src; + if (state != null) { + server = state.getServerName(); + } for (int i = 1; i <= this.maximumAttempts; i++) { // ClosedRegionhandler can remove the server from this.regions if (!serverManager.isServerOnline(server)) { @@ -1584,7 +1626,9 @@ public class AssignmentManager extends ZooKeeperListener { // delete the node. if no node exists need not bother. deleteClosingOrClosedNode(region); } - regionOffline(region); + if (state != null) { + regionOffline(region); + } return; } try { @@ -1608,9 +1652,12 @@ public class AssignmentManager extends ZooKeeperListener { if (transitionInZK) { deleteClosingOrClosedNode(region); } - regionOffline(region); + if (state != null) { + regionOffline(region); + } return; - } else if (t instanceof RegionAlreadyInTransitionException) { + } else if (state != null + && t instanceof RegionAlreadyInTransitionException) { // RS is already processing this region, only need to update the timestamp LOG.debug("update " + state + " the timestamp."); state.updateTimestampToNow(); @@ -1622,7 +1669,7 @@ public class AssignmentManager extends ZooKeeperListener { } } // Run out of attempts - if (!tomActivated) { + if (!tomActivated && state != null) { regionStates.updateRegionState(region, RegionState.State.FAILED_CLOSE); } } @@ -1649,7 +1696,7 @@ public class AssignmentManager extends ZooKeeperListener { case CLOSING: case PENDING_CLOSE: case FAILED_CLOSE: - unassign(region, state, -1, null, false); + unassign(region, state, -1, null, false, null); state = regionStates.getRegionState(region); if (state.isOffline()) break; case FAILED_OPEN: @@ -1717,14 +1764,17 @@ public class AssignmentManager extends ZooKeeperListener { } } if (setOfflineInZK && versionOfOfflineNode == -1) { - LOG.warn("Unable to set offline in ZooKeeper to assign " + region); - if (!tomActivated) { - regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN); + LOG.info("Unable to set offline in ZooKeeper to assign " + region); + // Setting offline in ZK must have been failed due to ZK racing or some + // exception which may make the server to abort. If it is ZK racing, + // we should retry since we already reset the region state, + // existing (re)assignment will fail anyway. + if (!server.isAborted()) { + continue; } - return; } - if (this.server.isStopped()) { - LOG.debug("Server stopped; skipping assign of " + region); + if (this.server.isStopped() || this.server.isAborted()) { + LOG.debug("Server stopped/aborted; skipping assign of " + region); return; } LOG.info("Assigning region " + region.getRegionNameAsString() + @@ -2144,7 +2194,7 @@ public class AssignmentManager extends ZooKeeperListener { return; } - unassign(region, state, versionOfClosingNode, dest, true); + unassign(region, state, versionOfClosingNode, dest, true, null); } finally { lock.unlock(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 5c9c6bbaf30..4824ca686dc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -253,6 +253,10 @@ public class RegionStates { newServerName = null; } + if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) { + LOG.warn("Failed to transition " + hri + " on " + serverName + ": " + state); + } + String regionName = hri.getEncodedName(); RegionState regionState = new RegionState( hri, state, System.currentTimeMillis(), newServerName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index 79696563192..033cb347de8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -26,6 +26,8 @@ import java.io.IOException; import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; @@ -44,6 +46,7 @@ import org.apache.hadoop.hbase.coprocessor.RegionObserver; import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -61,11 +64,13 @@ public class TestAssignmentManagerOnCluster { @BeforeClass public static void setUpBeforeClass() throws Exception { - // Using the test load balancer to control region plans + // Using the mock load balancer to control region plans conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, - TestLoadBalancer.class, LoadBalancer.class); + MockLoadBalancer.class, LoadBalancer.class); conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, - TestRegionObserver.class, RegionObserver.class); + MockRegionObserver.class, RegionObserver.class); + // Reduce the maximum attempts to speed up the test + conf.setInt("hbase.assignment.maximum.attempts", 3); TEST_UTIL.startMiniCluster(3); admin = TEST_UTIL.getHBaseAdmin(); @@ -141,7 +146,7 @@ public class TestAssignmentManagerOnCluster { /** * This tests moving a region */ - @Test + @Test (timeout=50000) public void testMoveRegion() throws Exception { String table = "testMoveRegion"; try { @@ -163,7 +168,7 @@ public class TestAssignmentManagerOnCluster { TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(), Bytes.toBytes(destServerName.getServerName())); - long timeoutTime = System.currentTimeMillis() + 800; + long timeoutTime = System.currentTimeMillis() + 30000; while (true) { ServerName sn = regionStates.getRegionServerOfRegion(hri); if (sn != null && sn.equals(destServerName)) { @@ -172,7 +177,8 @@ public class TestAssignmentManagerOnCluster { } long now = System.currentTimeMillis(); if (now > timeoutTime) { - fail("Failed to move the region in time"); + fail("Failed to move the region in time: " + + regionStates.getRegionState(hri)); } regionStates.waitForUpdate(50); } @@ -205,10 +211,60 @@ public class TestAssignmentManagerOnCluster { } } + /** + * This tests forcefully assign a region + * while it's closing and re-assigned. + * + * This test should not be flaky. If it is flaky, it means something + * wrong with AssignmentManager which should be reported and fixed + */ + @Test (timeout=60000) + public void testForceAssignWhileClosing() throws Exception { + String table = "testForceAssignWhileClosing"; + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.addFamily(new HColumnDescriptor(FAMILY)); + admin.createTable(desc); + + HTable meta = new HTable(conf, HConstants.META_TABLE_NAME); + HRegionInfo hri = new HRegionInfo( + desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); + MetaEditor.addRegionToMeta(meta, hri); + + HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + master.assignRegion(hri); + AssignmentManager am = master.getAssignmentManager(); + assertTrue(am.waitForAssignment(hri)); + + MockRegionObserver.enabled = true; + am.unassign(hri); + RegionState state = am.getRegionStates().getRegionState(hri); + assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); + + MockRegionObserver.enabled = false; + am.unassign(hri, true); + + // region is closing now, will be re-assigned automatically. + // now, let's forcefully assign it again. it should be + // assigned properly and no double-assignment + am.assign(hri, true, true); + + // region should be closed and re-assigned + assertTrue(am.waitForAssignment(hri)); + + ServerName serverName = master.getAssignmentManager(). + getRegionStates().getRegionServerOfRegion(hri); + TEST_UTIL.assertRegionOnServer(hri, serverName, 200); + } finally { + MockRegionObserver.enabled = false; + TEST_UTIL.deleteTable(Bytes.toBytes(table)); + } + } + /** * This tests region close failed */ - @Test + @Test (timeout=30000) public void testCloseFailed() throws Exception { String table = "testCloseFailed"; try { @@ -226,24 +282,25 @@ public class TestAssignmentManagerOnCluster { AssignmentManager am = master.getAssignmentManager(); assertTrue(am.waitForAssignment(hri)); - TestRegionObserver.enabled = true; + MockRegionObserver.enabled = true; am.unassign(hri); RegionState state = am.getRegionStates().getRegionState(hri); assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); - TestRegionObserver.enabled = false; + MockRegionObserver.enabled = false; am.unassign(hri, true); - state = am.getRegionStates().getRegionState(hri); - assertTrue(RegionState.State.FAILED_CLOSE != state.getState()); - am.assign(hri, true, true); + // region may still be assigned now since it's closing, + // let's check if it's assigned after it's out of transition + am.waitOnRegionToClearRegionsInTransition(hri); + + // region should be closed and re-assigned assertTrue(am.waitForAssignment(hri)); - ServerName serverName = master.getAssignmentManager(). getRegionStates().getRegionServerOfRegion(hri); TEST_UTIL.assertRegionOnServer(hri, serverName, 200); } finally { - TestRegionObserver.enabled = false; + MockRegionObserver.enabled = false; TEST_UTIL.deleteTable(Bytes.toBytes(table)); } } @@ -251,7 +308,7 @@ public class TestAssignmentManagerOnCluster { /** * This tests region open failed */ - @Test + @Test (timeout=30000) public void testOpenFailed() throws Exception { String table = "testOpenFailed"; try { @@ -264,7 +321,7 @@ public class TestAssignmentManagerOnCluster { desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); MetaEditor.addRegionToMeta(meta, hri); - TestLoadBalancer.controledRegion = hri.getEncodedName(); + MockLoadBalancer.controledRegion = hri.getEncodedName(); HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); master.assignRegion(hri); @@ -274,7 +331,7 @@ public class TestAssignmentManagerOnCluster { RegionState state = am.getRegionStates().getRegionState(hri); assertEquals(RegionState.State.FAILED_OPEN, state.getState()); - TestLoadBalancer.controledRegion = null; + MockLoadBalancer.controledRegion = null; master.assignRegion(hri); assertTrue(am.waitForAssignment(hri)); @@ -282,12 +339,56 @@ public class TestAssignmentManagerOnCluster { getRegionStates().getRegionServerOfRegion(hri); TEST_UTIL.assertRegionOnServer(hri, serverName, 200); } finally { - TestLoadBalancer.controledRegion = null; + MockLoadBalancer.controledRegion = null; TEST_UTIL.deleteTable(Bytes.toBytes(table)); } } - static class TestLoadBalancer extends StochasticLoadBalancer { + /** + * This tests region open failure which is not recoverable + */ + @Test (timeout=30000) + public void testOpenFailedUnrecoverable() throws Exception { + String table = "testOpenFailedUnrecoverable"; + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.addFamily(new HColumnDescriptor(FAMILY)); + admin.createTable(desc); + + HTable meta = new HTable(conf, HConstants.META_TABLE_NAME); + HRegionInfo hri = new HRegionInfo( + desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); + MetaEditor.addRegionToMeta(meta, hri); + + FileSystem fs = FileSystem.get(conf); + Path tableDir= FSUtils.getTablePath(FSUtils.getRootDir(conf), table); + Path regionDir = new Path(tableDir, hri.getEncodedName()); + // create a file named the same as the region dir to + // mess up with region opening + fs.create(regionDir, true); + + HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + master.assignRegion(hri); + AssignmentManager am = master.getAssignmentManager(); + assertFalse(am.waitForAssignment(hri)); + + RegionState state = am.getRegionStates().getRegionState(hri); + assertEquals(RegionState.State.FAILED_OPEN, state.getState()); + + // remove the blocking file, so that region can be opened + fs.delete(regionDir, true); + master.assignRegion(hri); + assertTrue(am.waitForAssignment(hri)); + + ServerName serverName = master.getAssignmentManager(). + getRegionStates().getRegionServerOfRegion(hri); + TEST_UTIL.assertRegionOnServer(hri, serverName, 200); + } finally { + TEST_UTIL.deleteTable(Bytes.toBytes(table)); + } + } + + static class MockLoadBalancer extends StochasticLoadBalancer { // For this region, if specified, always assign to nowhere static volatile String controledRegion = null; @@ -301,7 +402,7 @@ public class TestAssignmentManagerOnCluster { } } - public static class TestRegionObserver extends BaseRegionObserver { + public static class MockRegionObserver extends BaseRegionObserver { // If enabled, fail all preClose calls static volatile boolean enabled = false;