HBASE-8920 TestZKBasedOpenCloseRegion.testReOpenRegion fails occasionally

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1502132 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2013-07-11 07:13:26 +00:00
parent e11ff8a5fd
commit f92be02b93
3 changed files with 49 additions and 39 deletions

View File

@ -257,7 +257,7 @@ public class AssignmentManager extends ZooKeeperListener {
this.balancer = balancer; this.balancer = balancer;
int maxThreads = conf.getInt("hbase.assignment.threads.max", 30); int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool( this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("hbase-am")); maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM."));
this.metricsMaster = metricsMaster;// can be null only with tests. this.metricsMaster = metricsMaster;// can be null only with tests.
this.regionStates = new RegionStates(server, serverManager); this.regionStates = new RegionStates(server, serverManager);
@ -267,7 +267,7 @@ public class AssignmentManager extends ZooKeeperListener {
this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3); this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3);
int workers = conf.getInt("hbase.assignment.zkevent.workers", 20); int workers = conf.getInt("hbase.assignment.zkevent.workers", 20);
ThreadFactory threadFactory = Threads.newDaemonThreadFactory("hbase-am-zkevent-worker"); ThreadFactory threadFactory = Threads.newDaemonThreadFactory("AM.ZK.Worker");
zkEventWorkers = Threads.getBoundedCachedThreadPool(workers, 60L, zkEventWorkers = Threads.getBoundedCachedThreadPool(workers, 60L,
TimeUnit.SECONDS, threadFactory); TimeUnit.SECONDS, threadFactory);
this.tableLockManager = tableLockManager; this.tableLockManager = tableLockManager;
@ -544,7 +544,7 @@ public class AssignmentManager extends ZooKeeperListener {
final ServerName sn = rt.getServerName(); final ServerName sn = rt.getServerName();
final String encodedRegionName = regionInfo.getEncodedName(); final String encodedRegionName = regionInfo.getEncodedName();
final String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedRegionName); final String prettyPrintedRegionName = HRegionInfo.prettyPrint(encodedRegionName);
LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et); LOG.info("Processing " + regionInfo.getRegionNameAsString() + " in state " + et);
if (regionStates.isRegionInTransition(encodedRegionName)) { if (regionStates.isRegionInTransition(encodedRegionName)) {
@ -640,16 +640,15 @@ public class AssignmentManager extends ZooKeeperListener {
" on a dead regionserver: " + sn + " doing nothing"); " on a dead regionserver: " + sn + " doing nothing");
} else { } else {
regionStates.updateRegionState(rt, RegionState.State.SPLITTING); regionStates.updateRegionState(rt, RegionState.State.SPLITTING);
LOG.info("Processed region " + prettyPrintedRegionName LOG.info("Processed " + prettyPrintedRegionName + " in state : " + et);
+ " in state : " + et);
} }
break; break;
case RS_ZK_REGION_SPLIT: case RS_ZK_REGION_SPLIT:
if (!serverManager.isServerOnline(sn)) { if (!serverManager.isServerOnline(sn)) {
forceOffline(regionInfo, rt); forceOffline(regionInfo, rt);
} else { } else {
LOG.info("Processed region " + prettyPrintedRegionName + " in state : " + LOG.info("Processed " + prettyPrintedRegionName + " in state : " + et +
et + " nothing to do."); " nothing to do.");
// We don't do anything. The regionserver is supposed to update the znode // We don't do anything. The regionserver is supposed to update the znode
// multiple times so if it's still up we will receive an update soon. // multiple times so if it's still up we will receive an update soon.
} }
@ -659,7 +658,7 @@ public class AssignmentManager extends ZooKeeperListener {
// The regionserver started the merge, but died before updating the status. // The regionserver started the merge, but died before updating the status.
// It means (hopefully) that the merge was not finished // It means (hopefully) that the merge was not finished
// This node should be gone soon since it is ephemeral. // This node should be gone soon since it is ephemeral.
LOG.warn("Processed region " + prettyPrintedRegionName + " in state : " + et + LOG.warn("Processed " + prettyPrintedRegionName + " in state : " + et +
" on a dead regionserver: " + sn + " doing nothing"); " on a dead regionserver: " + sn + " doing nothing");
} else { } else {
handleRegionMerging(rt, prettyPrintedRegionName, sn); handleRegionMerging(rt, prettyPrintedRegionName, sn);
@ -670,11 +669,11 @@ public class AssignmentManager extends ZooKeeperListener {
case RS_ZK_REGION_MERGED: case RS_ZK_REGION_MERGED:
if (!serverManager.isServerOnline(sn)) { if (!serverManager.isServerOnline(sn)) {
// ServerShutdownHandler would handle this region // ServerShutdownHandler would handle this region
LOG.warn("Processed region " + prettyPrintedRegionName LOG.warn("Processed " + prettyPrintedRegionName
+ " in state : " + et + " on a dead regionserver: " + sn + " in state : " + et + " on a dead regionserver: " + sn
+ " doing nothing"); + " doing nothing");
} else { } else {
LOG.info("Processed region " + prettyPrintedRegionName + " in state : " + LOG.info("Processed " + prettyPrintedRegionName + " in state : " +
et + " nothing to do."); et + " nothing to do.");
// We don't do anything. The regionserver is supposed to update the znode // We don't do anything. The regionserver is supposed to update the znode
// multiple times so if it's still up we will receive an update soon. // multiple times so if it's still up we will receive an update soon.
@ -881,7 +880,7 @@ public class AssignmentManager extends ZooKeeperListener {
// times after already being in state of CLOSING // times after already being in state of CLOSING
if (regionState != null if (regionState != null
&& !regionState.isPendingCloseOrClosingOnServer(sn)) { && !regionState.isPendingCloseOrClosingOnServer(sn)) {
LOG.warn("Received CLOSING for region " + prettyPrintedRegionName LOG.warn("Received CLOSING for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState + " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_CLOSE or CLOSING states," + " and not in expected PENDING_CLOSE or CLOSING states,"
+ " or not on the expected server"); + " or not on the expected server");
@ -895,7 +894,7 @@ public class AssignmentManager extends ZooKeeperListener {
// Should see CLOSED after CLOSING but possible after PENDING_CLOSE // Should see CLOSED after CLOSING but possible after PENDING_CLOSE
if (regionState != null if (regionState != null
&& !regionState.isPendingCloseOrClosingOnServer(sn)) { && !regionState.isPendingCloseOrClosingOnServer(sn)) {
LOG.warn("Received CLOSED for region " + prettyPrintedRegionName LOG.warn("Received CLOSED for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState + " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_CLOSE or CLOSING states," + " and not in expected PENDING_CLOSE or CLOSING states,"
+ " or not on the expected server"); + " or not on the expected server");
@ -915,7 +914,7 @@ public class AssignmentManager extends ZooKeeperListener {
case RS_ZK_REGION_FAILED_OPEN: case RS_ZK_REGION_FAILED_OPEN:
if (regionState != null if (regionState != null
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) { && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received FAILED_OPEN for region " + prettyPrintedRegionName LOG.warn("Received FAILED_OPEN for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState + " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_OPEN or OPENING states," + " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server"); + " or not on the expected server");
@ -953,7 +952,7 @@ public class AssignmentManager extends ZooKeeperListener {
// times after already being in state of OPENING // times after already being in state of OPENING
if (regionState != null if (regionState != null
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) { && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received OPENING for region " + prettyPrintedRegionName LOG.warn("Received OPENING for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState + " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_OPEN or OPENING states," + " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server"); + " or not on the expected server");
@ -967,7 +966,7 @@ public class AssignmentManager extends ZooKeeperListener {
// Should see OPENED after OPENING but possible after PENDING_OPEN // Should see OPENED after OPENING but possible after PENDING_OPEN
if (regionState != null if (regionState != null
&& !regionState.isPendingOpenOrOpeningOnServer(sn)) { && !regionState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received OPENED for region " + prettyPrintedRegionName LOG.warn("Received OPENED for " + prettyPrintedRegionName
+ " from server " + sn + " but region was in the state " + regionState + " from server " + sn + " but region was in the state " + regionState
+ " and not in expected PENDING_OPEN or OPENING states," + " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server"); + " or not on the expected server");
@ -1216,22 +1215,22 @@ public class AssignmentManager extends ZooKeeperListener {
HRegionInfo regionInfo = rs.getRegion(); HRegionInfo regionInfo = rs.getRegion();
String regionNameStr = regionInfo.getRegionNameAsString(); String regionNameStr = regionInfo.getRegionNameAsString();
LOG.debug("The znode of region " + regionNameStr LOG.debug("The znode of " + regionNameStr
+ " has been deleted, region state: " + rs); + " has been deleted, region state: " + rs);
if (rs.isOpened()) { if (rs.isOpened()) {
ServerName serverName = rs.getServerName(); ServerName serverName = rs.getServerName();
regionOnline(regionInfo, serverName); regionOnline(regionInfo, serverName);
LOG.info("The master has opened the region " LOG.info("The master has opened "
+ regionNameStr + " that was online on " + serverName); + regionNameStr + " that was online on " + serverName);
boolean disabled = getZKTable().isDisablingOrDisabledTable( boolean disabled = getZKTable().isDisablingOrDisabledTable(
regionInfo.getTableNameAsString()); regionInfo.getTableNameAsString());
if (!serverManager.isServerOnline(serverName) && !disabled) { if (!serverManager.isServerOnline(serverName) && !disabled) {
LOG.info("Opened region " + regionNameStr LOG.info("Opened " + regionNameStr
+ "but the region server is offline, reassign the region"); + "but the region server is offline, reassign the region");
assign(regionInfo, true); assign(regionInfo, true);
} else if (disabled) { } else if (disabled) {
// if server is offline, no hurt to unassign again // if server is offline, no hurt to unassign again
LOG.info("Opened region " + regionNameStr LOG.info("Opened " + regionNameStr
+ "but this table is disabled, triggering close of region"); + "but this table is disabled, triggering close of region");
unassign(regionInfo); unassign(regionInfo);
} }
@ -1991,7 +1990,7 @@ public class AssignmentManager extends ZooKeeperListener {
// Remove region from in-memory transition and unassigned node from ZK // Remove region from in-memory transition and unassigned node from ZK
// While trying to enable the table the regions of the table were // While trying to enable the table the regions of the table were
// already enabled. // already enabled.
LOG.debug("ALREADY_OPENED region " + region.getRegionNameAsString() LOG.debug("ALREADY_OPENED " + region.getRegionNameAsString()
+ " to " + sn); + " to " + sn);
String encodedRegionName = region.getEncodedName(); String encodedRegionName = region.getEncodedName();
try { try {
@ -2081,7 +2080,7 @@ public class AssignmentManager extends ZooKeeperListener {
serverManager.createDestinationServersList(serverToExclude); serverManager.createDestinationServersList(serverToExclude);
if (destServers.isEmpty()){ if (destServers.isEmpty()){
LOG.warn("Can't move the region " + encodedName + LOG.warn("Can't move " + encodedName +
", there is no destination server available."); ", there is no destination server available.");
return null; return null;
} }
@ -2121,7 +2120,7 @@ public class AssignmentManager extends ZooKeeperListener {
if (newPlan) { if (newPlan) {
if (randomPlan.getDestination() == null) { if (randomPlan.getDestination() == null) {
LOG.warn("Can't find a destination for region" + encodedName); LOG.warn("Can't find a destination for " + encodedName);
return null; return null;
} }
LOG.debug("No previous transition plan was found (or we are ignoring " + LOG.debug("No previous transition plan was found (or we are ignoring " +
@ -2133,7 +2132,7 @@ public class AssignmentManager extends ZooKeeperListener {
", forceNewPlan=" + forceNewPlan); ", forceNewPlan=" + forceNewPlan);
return randomPlan; return randomPlan;
} }
LOG.debug("Using pre-existing plan for region " + LOG.debug("Using pre-existing plan for " +
region.getRegionNameAsString() + "; plan=" + existingPlan); region.getRegionNameAsString() + "; plan=" + existingPlan);
return existingPlan; return existingPlan;
} }
@ -2203,8 +2202,7 @@ public class AssignmentManager extends ZooKeeperListener {
*/ */
public void unassign(HRegionInfo region, boolean force, ServerName dest) { public void unassign(HRegionInfo region, boolean force, ServerName dest) {
// TODO: Method needs refactoring. Ugly buried returns throughout. Beware! // TODO: Method needs refactoring. Ugly buried returns throughout. Beware!
LOG.debug("Starting unassignment of region " + LOG.debug("Starting unassign of " + region.getRegionNameAsString() + " (offlining)");
region.getRegionNameAsString() + " (offlining)");
String encodedName = region.getEncodedName(); String encodedName = region.getEncodedName();
// Grab the state of this region and synchronize on it // Grab the state of this region and synchronize on it
@ -2227,7 +2225,7 @@ public class AssignmentManager extends ZooKeeperListener {
versionOfClosingNode = ZKAssign.createNodeClosing( versionOfClosingNode = ZKAssign.createNodeClosing(
watcher, region, state.getServerName()); watcher, region, state.getServerName());
if (versionOfClosingNode == -1) { if (versionOfClosingNode == -1) {
LOG.debug("Attempting to unassign region " + LOG.debug("Attempting to unassign " +
region.getRegionNameAsString() + " but ZK closing node " region.getRegionNameAsString() + " but ZK closing node "
+ "can't be created."); + "can't be created.");
return; return;
@ -2268,7 +2266,7 @@ public class AssignmentManager extends ZooKeeperListener {
return; return;
} else if (force && (state.isPendingClose() } else if (force && (state.isPendingClose()
|| state.isClosing() || state.isFailedClose())) { || state.isClosing() || state.isFailedClose())) {
LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() + LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
" which is already " + state.getState() + " which is already " + state.getState() +
" but forcing to send a CLOSE RPC again "); " but forcing to send a CLOSE RPC again ");
if (state.isFailedClose()) { if (state.isFailedClose()) {
@ -2276,7 +2274,7 @@ public class AssignmentManager extends ZooKeeperListener {
} }
state.updateTimestampToNow(); state.updateTimestampToNow();
} else { } else {
LOG.debug("Attempting to unassign region " + LOG.debug("Attempting to unassign " +
region.getRegionNameAsString() + " but it is " + region.getRegionNameAsString() + " but it is " +
"already in transition (" + state.getState() + ", force=" + force + ")"); "already in transition (" + state.getState() + ", force=" + force + ")");
return; return;
@ -2305,12 +2303,12 @@ public class AssignmentManager extends ZooKeeperListener {
// TODO : We don't abort if the delete node returns false. Is there any // TODO : We don't abort if the delete node returns false. Is there any
// such corner case? // such corner case?
if (!deleteNode) { if (!deleteNode) {
LOG.error("The deletion of the CLOSED node for the region " LOG.error("The deletion of the CLOSED node for "
+ encodedName + " returned " + deleteNode); + encodedName + " returned " + deleteNode);
} }
} }
} catch (NoNodeException e) { } catch (NoNodeException e) {
LOG.debug("CLOSING/CLOSED node for the region " + encodedName LOG.debug("CLOSING/CLOSED node for " + encodedName
+ " already deleted"); + " already deleted");
} catch (KeeperException ke) { } catch (KeeperException ke) {
server.abort( server.abort(
@ -2796,7 +2794,7 @@ public class AssignmentManager extends ZooKeeperListener {
while (!this.server.isStopped() && regionStates.isRegionInTransition(hri)) { while (!this.server.isStopped() && regionStates.isRegionInTransition(hri)) {
regionStates.waitForUpdate(100); regionStates.waitForUpdate(100);
if (EnvironmentEdgeManager.currentTimeMillis() > end) { if (EnvironmentEdgeManager.currentTimeMillis() > end) {
LOG.info("Timed out on waiting for region:" + hri.getEncodedName() + " to be assigned."); LOG.info("Timed out on waiting for " + hri.getEncodedName() + " to be assigned.");
return false; return false;
} }
} }
@ -3069,7 +3067,7 @@ public class AssignmentManager extends ZooKeeperListener {
regionStates.getRegionTransitionState(encodedName); regionStates.getRegionTransitionState(encodedName);
if (regionState == null if (regionState == null
|| !regionState.isPendingOpenOrOpeningOnServer(sn)) { || !regionState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.info("Skip region " + hri LOG.info("Skip " + hri
+ " since it is not opening on the dead server any more: " + sn); + " since it is not opening on the dead server any more: " + sn);
it.remove(); it.remove();
} else { } else {

View File

@ -262,7 +262,7 @@ public class RegionStates {
hri, state, System.currentTimeMillis(), newServerName); hri, state, System.currentTimeMillis(), newServerName);
RegionState oldState = regionStates.put(regionName, regionState); RegionState oldState = regionStates.put(regionName, regionState);
if (oldState == null || oldState.getState() != regionState.getState()) { if (oldState == null || oldState.getState() != regionState.getState()) {
LOG.info("Region transitioned from " + oldState + " to " + regionState); LOG.info("Transitioned from " + oldState + " to " + regionState);
} }
if (newServerName != null || ( if (newServerName != null || (
state != State.PENDING_CLOSE && state != State.CLOSING)) { state != State.PENDING_CLOSE && state != State.CLOSING)) {
@ -298,7 +298,7 @@ public class RegionStates {
ServerName oldServerName = regionAssignments.put(hri, serverName); ServerName oldServerName = regionAssignments.put(hri, serverName);
if (!serverName.equals(oldServerName)) { if (!serverName.equals(oldServerName)) {
LOG.info("Onlined region " + hri.getShortNameToLog() + " on " + serverName); LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
Set<HRegionInfo> regions = serverHoldings.get(serverName); Set<HRegionInfo> regions = serverHoldings.get(serverName);
if (regions == null) { if (regions == null) {
regions = new HashSet<HRegionInfo>(); regions = new HashSet<HRegionInfo>();
@ -306,7 +306,7 @@ public class RegionStates {
} }
regions.add(hri); regions.add(hri);
if (oldServerName != null) { if (oldServerName != null) {
LOG.info("Offlined region " + hri.getShortNameToLog() + " from " + oldServerName); LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
serverHoldings.get(oldServerName).remove(hri); serverHoldings.get(oldServerName).remove(hri);
} }
} }
@ -333,7 +333,7 @@ public class RegionStates {
ServerName oldServerName = regionAssignments.remove(hri); ServerName oldServerName = regionAssignments.remove(hri);
if (oldServerName != null) { if (oldServerName != null) {
LOG.info("Offlined region " + hri.getShortNameToLog() + " from " + oldServerName); LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
serverHoldings.get(oldServerName).remove(hri); serverHoldings.get(oldServerName).remove(hri);
} }
} }
@ -361,7 +361,7 @@ public class RegionStates {
// Region is open on this region server, but in transition. // Region is open on this region server, but in transition.
// This region must be moving away from this server, or splitting/merging. // This region must be moving away from this server, or splitting/merging.
// SSH will handle it, either skip assigning, or re-assign. // SSH will handle it, either skip assigning, or re-assign.
LOG.info("Transitioning region " + state + " will be handled by SSH for " + sn); LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
if (state.isSplitting() || state.isMerging()) { if (state.isSplitting() || state.isMerging()) {
LOG.info("Offline splitting/merging region " + state); LOG.info("Offline splitting/merging region " + state);
try { try {

View File

@ -97,6 +97,7 @@ public class TestZKBasedOpenCloseRegion {
} }
waitUntilAllRegionsAssigned(); waitUntilAllRegionsAssigned();
waitOnRIT();
} }
/** /**
@ -317,6 +318,18 @@ public class TestZKBasedOpenCloseRegion {
LOG.info("Done with testCloseRegion"); LOG.info("Done with testCloseRegion");
} }
private void waitOnRIT() {
// Close worked but we are going to open the region elsewhere. Before going on, make sure
// this completes.
while (TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
getRegionStates().isRegionsInTransition()) {
LOG.info("Waiting on regions in transition: " +
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
getRegionStates().getRegionsInTransition());
Threads.sleep(10);
}
}
/** /**
* If region open fails with IOException in openRegion() while doing tableDescriptors.get() * If region open fails with IOException in openRegion() while doing tableDescriptors.get()
* the region should not add into regionsInTransitionInRS map * the region should not add into regionsInTransitionInRS map
@ -343,8 +356,7 @@ public class TestZKBasedOpenCloseRegion {
private static void waitUntilAllRegionsAssigned() private static void waitUntilAllRegionsAssigned()
throws IOException { throws IOException {
HTable meta = new HTable(TEST_UTIL.getConfiguration(), HTable meta = new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
HConstants.META_TABLE_NAME);
while (true) { while (true) {
int rows = 0; int rows = 0;
Scan scan = new Scan(); Scan scan = new Scan();