HBASE-9184 Ignore zk assign event if region is not known in transition
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1513294 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7939a64b70
commit
7d49b07b9c
|
@ -751,7 +751,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
* @param rt
|
||||
* @param expectedVersion
|
||||
*/
|
||||
private void handleRegion(final RegionTransition rt, int expectedVersion) {
|
||||
void handleRegion(final RegionTransition rt, int expectedVersion) {
|
||||
if (rt == null) {
|
||||
LOG.warn("Unexpected NULL input for RegionTransition rt");
|
||||
return;
|
||||
|
@ -892,12 +892,11 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
case M_ZK_REGION_CLOSING:
|
||||
// Should see CLOSING after we have asked it to CLOSE or additional
|
||||
// times after already being in state of CLOSING
|
||||
if (regionState != null
|
||||
&& !regionState.isPendingCloseOrClosingOnServer(sn)) {
|
||||
if (regionState == null
|
||||
|| !regionState.isPendingCloseOrClosingOnServer(sn)) {
|
||||
LOG.warn("Received CLOSING for " + prettyPrintedRegionName
|
||||
+ " from server " + sn + " but region was in the state " + regionState
|
||||
+ " and not in expected PENDING_CLOSE or CLOSING states,"
|
||||
+ " or not on the expected server");
|
||||
+ " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
|
||||
+ regionStates.getRegionState(encodedName));
|
||||
return;
|
||||
}
|
||||
// Transition to CLOSING (or update stamp if already CLOSING)
|
||||
|
@ -906,12 +905,11 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
|
||||
case RS_ZK_REGION_CLOSED:
|
||||
// Should see CLOSED after CLOSING but possible after PENDING_CLOSE
|
||||
if (regionState != null
|
||||
&& !regionState.isPendingCloseOrClosingOnServer(sn)) {
|
||||
if (regionState == null
|
||||
|| !regionState.isPendingCloseOrClosingOnServer(sn)) {
|
||||
LOG.warn("Received CLOSED for " + prettyPrintedRegionName
|
||||
+ " from server " + sn + " but region was in the state " + regionState
|
||||
+ " and not in expected PENDING_CLOSE or CLOSING states,"
|
||||
+ " or not on the expected server");
|
||||
+ " from " + sn + " but the region isn't PENDING_CLOSE/CLOSING here: "
|
||||
+ regionStates.getRegionState(encodedName));
|
||||
return;
|
||||
}
|
||||
// Handle CLOSED by assigning elsewhere or stopping if a disable
|
||||
|
@ -926,12 +924,11 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
break;
|
||||
|
||||
case RS_ZK_REGION_FAILED_OPEN:
|
||||
if (regionState != null
|
||||
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) {
|
||||
if (regionState == null
|
||||
|| !regionState.isPendingOpenOrOpeningOnServer(sn)) {
|
||||
LOG.warn("Received FAILED_OPEN for " + prettyPrintedRegionName
|
||||
+ " from server " + sn + " but region was in the state " + regionState
|
||||
+ " and not in expected PENDING_OPEN or OPENING states,"
|
||||
+ " or not on the expected server");
|
||||
+ " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
|
||||
+ regionStates.getRegionState(encodedName));
|
||||
return;
|
||||
}
|
||||
AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
|
||||
|
@ -962,12 +959,11 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
case RS_ZK_REGION_OPENING:
|
||||
// Should see OPENING after we have asked it to OPEN or additional
|
||||
// times after already being in state of OPENING
|
||||
if (regionState != null
|
||||
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) {
|
||||
if (regionState == null
|
||||
|| !regionState.isPendingOpenOrOpeningOnServer(sn)) {
|
||||
LOG.warn("Received OPENING for " + prettyPrintedRegionName
|
||||
+ " from server " + sn + " but region was in the state " + regionState
|
||||
+ " and not in expected PENDING_OPEN or OPENING states,"
|
||||
+ " or not on the expected server");
|
||||
+ " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
|
||||
+ regionStates.getRegionState(encodedName));
|
||||
return;
|
||||
}
|
||||
// Transition to OPENING (or update stamp if already OPENING)
|
||||
|
@ -975,13 +971,13 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
break;
|
||||
|
||||
case RS_ZK_REGION_OPENED:
|
||||
// Should see OPENED after OPENING but possible after PENDING_OPEN
|
||||
if (regionState != null
|
||||
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) {
|
||||
// Should see OPENED after OPENING but possible after PENDING_OPEN.
|
||||
if (regionState == null
|
||||
|| !regionState.isPendingOpenOrOpeningOnServer(sn)) {
|
||||
LOG.warn("Received OPENED for " + prettyPrintedRegionName
|
||||
+ " from server " + sn + " but region was in the state " + regionState
|
||||
+ " and not in expected PENDING_OPEN or OPENING states,"
|
||||
+ " or not on the expected server");
|
||||
+ " from " + sn + " but the region isn't PENDING_OPEN/OPENING here: "
|
||||
+ regionStates.getRegionState(encodedName));
|
||||
|
||||
// Close it without updating the internal region states,
|
||||
// so as not to create double assignments in unlucky scenarios
|
||||
// mentioned in OpenRegionHandler#process
|
||||
|
|
|
@ -32,7 +32,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
|
||||
import org.apache.hadoop.hbase.CellScannable;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
|
@ -43,6 +42,7 @@ import org.apache.hadoop.hbase.RegionTransition;
|
|||
import org.apache.hadoop.hbase.Server;
|
||||
import org.apache.hadoop.hbase.ServerLoad;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||
import org.apache.hadoop.hbase.catalog.CatalogTracker;
|
||||
import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
|
||||
|
@ -69,6 +69,8 @@ import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
|
|||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
|
||||
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
|
||||
import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
|
@ -1241,4 +1243,58 @@ public class TestAssignmentManager {
|
|||
am.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test assignment related ZK events are ignored by AM if the region is not known
|
||||
* by AM to be in transition. During normal operation, all assignments are started
|
||||
* by AM (not considering split/merge), if an event is received but the region
|
||||
* is not in transition, the event must be a very late one. So it can be ignored.
|
||||
* During master failover, since AM watches assignment znodes after failover cleanup
|
||||
* is completed, when an event comes in, AM should already have the region in transition
|
||||
* if ZK is used during the assignment action (only hbck doesn't use ZK for region
|
||||
* assignment). So during master failover, we can ignored such events too.
|
||||
*/
|
||||
@Test
|
||||
public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
|
||||
// Region to use in test.
|
||||
final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
|
||||
// Need a mocked catalog tracker.
|
||||
CatalogTracker ct = Mockito.mock(CatalogTracker.class);
|
||||
LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
|
||||
server.getConfiguration());
|
||||
final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
|
||||
// Create an AM.
|
||||
AssignmentManager am = new AssignmentManager(this.server,
|
||||
this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
|
||||
|
||||
@Override
|
||||
void handleRegion(final RegionTransition rt, int expectedVersion) {
|
||||
super.handleRegion(rt, expectedVersion);
|
||||
if (rt != null && Bytes.equals(hri.getRegionName(),
|
||||
rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
|
||||
zkEventProcessed.set(true);
|
||||
}
|
||||
}
|
||||
};
|
||||
try {
|
||||
// First make sure the region is not in transition
|
||||
am.getRegionStates().regionOffline(hri);
|
||||
zkEventProcessed.set(false); // Reset it before faking zk transition
|
||||
this.watcher.registerListenerFirst(am);
|
||||
assertFalse("The region should not be in transition",
|
||||
am.getRegionStates().isRegionInTransition(hri));
|
||||
ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
|
||||
// Trigger a transition event
|
||||
ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
|
||||
long startTime = EnvironmentEdgeManager.currentTimeMillis();
|
||||
while (!zkEventProcessed.get()) {
|
||||
assertTrue("Timed out in waiting for ZK event to be processed",
|
||||
EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
|
||||
Threads.sleepWithoutInterrupt(100);
|
||||
}
|
||||
assertFalse(am.getRegionStates().isRegionInTransition(hri));
|
||||
} finally {
|
||||
am.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
|||
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
|
||||
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
|
||||
import org.apache.hadoop.hbase.executor.EventType;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
@ -485,6 +486,7 @@ public class TestAssignmentManagerOnCluster {
|
|||
}
|
||||
am.regionOffline(hri);
|
||||
ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
|
||||
am.getRegionStates().updateRegionState(hri, State.OFFLINE);
|
||||
ZKAssign.createNodeOffline(zkw, hri, destServerName);
|
||||
ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
|
||||
|
||||
|
|
Loading…
Reference in New Issue