HBASE-4580 Some invalid zk nodes were created when a clean cluster restarts
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1187010 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0d9185a60d
commit
25385fe1e2
|
@ -384,6 +384,8 @@ Release 0.92.0 - Unreleased
|
||||||
HBASE-4510 Check and workaround usage of internal HDFS APIs in HBase
|
HBASE-4510 Check and workaround usage of internal HDFS APIs in HBase
|
||||||
(Harsh)
|
(Harsh)
|
||||||
HBASE-4595 HFilePrettyPrinter Scanned kv count always 0 (Matteo Bertozzi)
|
HBASE-4595 HFilePrettyPrinter Scanned kv count always 0 (Matteo Bertozzi)
|
||||||
|
HBASE-4580 Some invalid zk nodes were created when a clean cluster restarts
|
||||||
|
(Gaojinchao)
|
||||||
|
|
||||||
TESTS
|
TESTS
|
||||||
HBASE-4450 test for number of blocks read: to serve as baseline for expected
|
HBASE-4450 test for number of blocks read: to serve as baseline for expected
|
||||||
|
|
|
@ -314,11 +314,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// Returns servers who have not checked in (assumed dead) and their regions
|
// Returns servers who have not checked in (assumed dead) and their regions
|
||||||
Map<ServerName,List<Pair<HRegionInfo,Result>>> deadServers =
|
Map<ServerName,List<Pair<HRegionInfo,Result>>> deadServers =
|
||||||
rebuildUserRegions();
|
rebuildUserRegions();
|
||||||
// Process list of dead servers; note this will add regions to the RIT.
|
|
||||||
// processRegionsInTransition will read them and assign them out.
|
processDeadServersAndRegionsInTransition(deadServers);
|
||||||
processDeadServers(deadServers);
|
|
||||||
// Check existing regions in transition
|
|
||||||
processRegionsInTransition(deadServers);
|
|
||||||
|
|
||||||
// Recover the tables that were not fully moved to DISABLED state.
|
// Recover the tables that were not fully moved to DISABLED state.
|
||||||
// These tables are in DISABLING state when the master restarted/switched.
|
// These tables are in DISABLING state when the master restarted/switched.
|
||||||
|
@ -333,21 +330,23 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
void processRegionsInTransition()
|
void processDeadServersAndRegionsInTransition()
|
||||||
throws KeeperException, IOException, InterruptedException {
|
throws KeeperException, IOException, InterruptedException {
|
||||||
// Pass null to signify no dead servers in this context.
|
// Pass null to signify no dead servers in this context.
|
||||||
processRegionsInTransition(null);
|
processDeadServersAndRegionsInTransition(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process all regions that are in transition up in zookeeper. Used by
|
* Process all regions that are in transition in zookeeper and also
|
||||||
* master joining an already running cluster.
|
* processes the list of dead servers by scanning the META.
|
||||||
* @param deadServers Map of dead servers and their regions. Can be null.
|
* Used by master joining an cluster.
|
||||||
|
* @param deadServers
|
||||||
|
* Map of dead servers and their regions. Can be null.
|
||||||
* @throws KeeperException
|
* @throws KeeperException
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
void processRegionsInTransition(
|
void processDeadServersAndRegionsInTransition(
|
||||||
final Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers)
|
final Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers)
|
||||||
throws KeeperException, IOException, InterruptedException {
|
throws KeeperException, IOException, InterruptedException {
|
||||||
List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher,
|
List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher,
|
||||||
|
@ -374,11 +373,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// If we found user regions out on cluster, its a failover.
|
// If we found user regions out on cluster, its a failover.
|
||||||
if (regionsToProcess) {
|
if (regionsToProcess) {
|
||||||
LOG.info("Found regions out on cluster or in RIT; failover");
|
LOG.info("Found regions out on cluster or in RIT; failover");
|
||||||
if (!nodes.isEmpty()) {
|
// Process list of dead servers and regions in RIT.
|
||||||
for (String encodedRegionName: nodes) {
|
// See HBASE-4580 for more information.
|
||||||
processRegionInTransition(encodedRegionName, null, deadServers);
|
processDeadServersAndRecoverLostRegions(deadServers, nodes);
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Fresh cluster startup.
|
// Fresh cluster startup.
|
||||||
LOG.info("Clean cluster startup. Assigning userregions");
|
LOG.info("Clean cluster startup. Assigning userregions");
|
||||||
|
@ -2222,54 +2220,69 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes list of dead servers from result of META scan.
|
* Processes list of dead servers from result of META scan and regions in RIT
|
||||||
* <p>
|
* <p>
|
||||||
* This is used as part of failover to handle RegionServers which failed
|
* This is used for failover to recover the lost regions that belonged to
|
||||||
* while there was no active master.
|
* RegionServers which failed while there was no active master or regions
|
||||||
|
* that were in RIT.
|
||||||
* <p>
|
* <p>
|
||||||
* Method stubs in-memory data to be as expected by the normal server shutdown
|
*
|
||||||
* handler.
|
|
||||||
*
|
|
||||||
* @param deadServers
|
* @param deadServers
|
||||||
|
* The list of dead servers which failed while there was no active
|
||||||
|
* master. Can be null.
|
||||||
|
* @param nodes
|
||||||
|
* The regions in RIT
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws KeeperException
|
* @throws KeeperException
|
||||||
*/
|
*/
|
||||||
private void processDeadServers(
|
private void processDeadServersAndRecoverLostRegions(
|
||||||
Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers)
|
Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers,
|
||||||
throws IOException, KeeperException {
|
List<String> nodes) throws IOException, KeeperException {
|
||||||
for (Map.Entry<ServerName, List<Pair<HRegionInfo,Result>>> deadServer:
|
if (null != deadServers) {
|
||||||
|
for (Map.Entry<ServerName, List<Pair<HRegionInfo, Result>>> deadServer :
|
||||||
deadServers.entrySet()) {
|
deadServers.entrySet()) {
|
||||||
List<Pair<HRegionInfo,Result>> regions = deadServer.getValue();
|
List<Pair<HRegionInfo, Result>> regions = deadServer.getValue();
|
||||||
for (Pair<HRegionInfo,Result> region : regions) {
|
for (Pair<HRegionInfo, Result> region : regions) {
|
||||||
HRegionInfo regionInfo = region.getFirst();
|
HRegionInfo regionInfo = region.getFirst();
|
||||||
Result result = region.getSecond();
|
Result result = region.getSecond();
|
||||||
// If region was in transition (was in zk) force it offline for reassign
|
// If region was in transition (was in zk) force it offline for
|
||||||
try {
|
// reassign
|
||||||
RegionTransitionData data = ZKAssign.getData(watcher,
|
try {
|
||||||
regionInfo.getEncodedName());
|
RegionTransitionData data = ZKAssign.getData(watcher,
|
||||||
|
regionInfo.getEncodedName());
|
||||||
|
|
||||||
// If zk node of this region has been updated by a live server,
|
// If zk node of this region has been updated by a live server,
|
||||||
// we consider that this region is being handled.
|
// we consider that this region is being handled.
|
||||||
// So we should skip it and process it in processRegionsInTransition.
|
// So we should skip it and process it in
|
||||||
if (data != null && data.getOrigin() != null &&
|
// processRegionsInTransition.
|
||||||
serverManager.isServerOnline(data.getOrigin())) {
|
if (data != null && data.getOrigin() != null &&
|
||||||
LOG.info("The region " + regionInfo.getEncodedName()
|
serverManager.isServerOnline(data.getOrigin())) {
|
||||||
+ "is being handled on " + data.getOrigin());
|
LOG.info("The region " + regionInfo.getEncodedName()
|
||||||
continue;
|
+ "is being handled on " + data.getOrigin());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Process with existing RS shutdown code
|
||||||
|
boolean assign = ServerShutdownHandler.processDeadRegion(
|
||||||
|
regionInfo, result, this, this.catalogTracker);
|
||||||
|
if (assign) {
|
||||||
|
ZKAssign.createOrForceNodeOffline(watcher, regionInfo,
|
||||||
|
master.getServerName());
|
||||||
|
if (!nodes.contains(regionInfo.getEncodedName())) {
|
||||||
|
nodes.add(regionInfo.getEncodedName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (KeeperException.NoNodeException nne) {
|
||||||
|
// This is fine
|
||||||
}
|
}
|
||||||
// Process with existing RS shutdown code
|
|
||||||
boolean assign =
|
|
||||||
ServerShutdownHandler.processDeadRegion(regionInfo, result, this,
|
|
||||||
this.catalogTracker);
|
|
||||||
if (assign) {
|
|
||||||
ZKAssign.createOrForceNodeOffline(watcher, regionInfo,
|
|
||||||
master.getServerName());
|
|
||||||
}
|
|
||||||
} catch (KeeperException.NoNodeException nne) {
|
|
||||||
// This is fine
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!nodes.isEmpty()) {
|
||||||
|
for (String encodedRegionName : nodes) {
|
||||||
|
processRegionInTransition(encodedRegionName, null, deadServers);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1201,7 +1201,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
// process RIT if any
|
// process RIT if any
|
||||||
// TODO: Why does this not call AssignmentManager.joinCluster? Otherwise
|
// TODO: Why does this not call AssignmentManager.joinCluster? Otherwise
|
||||||
// we are not processing dead servers if any.
|
// we are not processing dead servers if any.
|
||||||
this.assignmentManager.processRegionsInTransition();
|
this.assignmentManager.processDeadServersAndRegionsInTransition();
|
||||||
return true;
|
return true;
|
||||||
} finally {
|
} finally {
|
||||||
status.cleanup();
|
status.cleanup();
|
||||||
|
|
Loading…
Reference in New Issue