HBASE-9184 Ignore zk assign event if region is not known in transition

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1513294 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
jxiang 2013-08-12 23:35:38 +00:00
parent 7939a64b70
commit 7d49b07b9c
3 changed files with 82 additions and 28 deletions

View File

@ -751,7 +751,7 @@ public class AssignmentManager extends ZooKeeperListener {
* @param rt
* @param expectedVersion
*/
private void handleRegion(final RegionTransition rt, int expectedVersion) {
void handleRegion(final RegionTransition rt, int expectedVersion) {
if (rt == null) {
LOG.warn("Unexpected NULL input for RegionTransition rt");
return;
@ -892,12 +892,11 @@ public class AssignmentManager extends ZooKeeperListener {
case M_ZK_REGION_CLOSING:
// Should see CLOSING after we have asked it to CLOSE or additional
// times after already being in state of CLOSING
if (regionState != null
&& !regionState.isPendingCloseOrClosingOnServer(sn)) {
if (regionState == null
|| !regionState.isPendingCloseOrClosingOnServer(sn)) {
LOG.warn("Received CLOSING for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_CLOSE or CLOSING states,"
+ " or not on the expected server");
+ " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
+ regionStates.getRegionState(encodedName));
return;
}
// Transition to CLOSING (or update stamp if already CLOSING)
@ -906,12 +905,11 @@ public class AssignmentManager extends ZooKeeperListener {
case RS_ZK_REGION_CLOSED:
// Should see CLOSED after CLOSING but possible after PENDING_CLOSE
if (regionState != null
&& !regionState.isPendingCloseOrClosingOnServer(sn)) {
if (regionState == null
|| !regionState.isPendingCloseOrClosingOnServer(sn)) {
LOG.warn("Received CLOSED for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_CLOSE or CLOSING states,"
+ " or not on the expected server");
+ " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
+ regionStates.getRegionState(encodedName));
return;
}
// Handle CLOSED by assigning elsewhere or stopping if a disable
@ -926,12 +924,11 @@ public class AssignmentManager extends ZooKeeperListener {
break;
case RS_ZK_REGION_FAILED_OPEN:
if (regionState != null
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) {
if (regionState == null
|| !regionState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received FAILED_OPEN for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server");
+ " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
+ regionStates.getRegionState(encodedName));
return;
}
AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
@ -962,12 +959,11 @@ public class AssignmentManager extends ZooKeeperListener {
case RS_ZK_REGION_OPENING:
// Should see OPENING after we have asked it to OPEN or additional
// times after already being in state of OPENING
if (regionState != null
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) {
if (regionState == null
|| !regionState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received OPENING for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server");
+ " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
+ regionStates.getRegionState(encodedName));
return;
}
// Transition to OPENING (or update stamp if already OPENING)
@ -975,13 +971,13 @@ public class AssignmentManager extends ZooKeeperListener {
break;
case RS_ZK_REGION_OPENED:
// Should see OPENED after OPENING but possible after PENDING_OPEN
if (regionState != null
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) {
// Should see OPENED after OPENING but possible after PENDING_OPEN.
if (regionState == null
|| !regionState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received OPENED for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server");
+ " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
+ regionStates.getRegionState(encodedName));
// Close it without updating the internal region states,
// so as not to create double assignments in unlucky scenarios
// mentioned in OpenRegionHandler#process

View File

@ -32,7 +32,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.hbase.CellScannable;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
@ -43,6 +42,7 @@ import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
@ -69,6 +69,8 @@ import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
@ -1241,4 +1243,58 @@ public class TestAssignmentManager {
am.shutdown();
}
}
/**
* Test assignment related ZK events are ignored by AM if the region is not known
* by AM to be in transition. During normal operation, all assignments are started
* by AM (not considering split/merge), if an event is received but the region
* is not in transition, the event must be a very late one. So it can be ignored.
* During master failover, since AM watches assignment znodes after failover cleanup
* is completed, when an event comes in, AM should already have the region in transition
* if ZK is used during the assignment action (only hbck doesn't use ZK for region
* assignment). So during master failover, we can ignored such events too.
*/
@Test
public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
// Region to use in test.
final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
// Need a mocked catalog tracker.
CatalogTracker ct = Mockito.mock(CatalogTracker.class);
LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
server.getConfiguration());
final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
// Create an AM.
AssignmentManager am = new AssignmentManager(this.server,
this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
@Override
void handleRegion(final RegionTransition rt, int expectedVersion) {
super.handleRegion(rt, expectedVersion);
if (rt != null && Bytes.equals(hri.getRegionName(),
rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
zkEventProcessed.set(true);
}
}
};
try {
// First make sure the region is not in transition
am.getRegionStates().regionOffline(hri);
zkEventProcessed.set(false); // Reset it before faking zk transition
this.watcher.registerListenerFirst(am);
assertFalse("The region should not be in transition",
am.getRegionStates().isRegionInTransition(hri));
ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
// Trigger a transition event
ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
long startTime = EnvironmentEdgeManager.currentTimeMillis();
while (!zkEventProcessed.get()) {
assertTrue("Timed out in waiting for ZK event to be processed",
EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
Threads.sleepWithoutInterrupt(100);
}
assertFalse(am.getRegionStates().isRegionInTransition(hri));
} finally {
am.shutdown();
}
}
}

View File

@ -49,6 +49,7 @@ import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
@ -485,6 +486,7 @@ public class TestAssignmentManagerOnCluster {
}
am.regionOffline(hri);
ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
am.getRegionStates().updateRegionState(hri, State.OFFLINE);
ZKAssign.createNodeOffline(zkw, hri, destServerName);
ZKAssign.transitionNodeOpening(zkw, hri, destServerName);