HBASE-3946 HBASE-3946 broke TestMasterFailover
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1136345 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0a585ffbc8
commit
e636646e91
|
@ -126,6 +126,7 @@ Release 0.91.0 - Unreleased
|
||||||
HBASE-3963 Schedule all log-spliiting at startup all at once (mingjian)
|
HBASE-3963 Schedule all log-spliiting at startup all at once (mingjian)
|
||||||
HBASE-3983 list command in shell seems broken
|
HBASE-3983 list command in shell seems broken
|
||||||
HBASE-3793 HBASE-3468 Broke checkAndPut with null value (Ming Ma)
|
HBASE-3793 HBASE-3468 Broke checkAndPut with null value (Ming Ma)
|
||||||
|
HBASE-3995 HBASE-3946 broke TestMasterFailover
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
|
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
|
||||||
|
|
|
@ -64,8 +64,8 @@ import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
|
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
|
import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.Pair;
|
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
import org.apache.hadoop.hbase.util.Threads;
|
import org.apache.hadoop.hbase.util.Threads;
|
||||||
import org.apache.hadoop.hbase.util.Writables;
|
import org.apache.hadoop.hbase.util.Writables;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||||
|
@ -79,7 +79,6 @@ import org.apache.zookeeper.AsyncCallback;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
import org.apache.zookeeper.KeeperException.NoNodeException;
|
import org.apache.zookeeper.KeeperException.NoNodeException;
|
||||||
import org.apache.zookeeper.data.Stat;
|
import org.apache.zookeeper.data.Stat;
|
||||||
import org.apache.hadoop.hbase.client.Get;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Manages and performs region assignment.
|
* Manages and performs region assignment.
|
||||||
|
@ -238,13 +237,36 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// Returns servers who have not checked in (assumed dead) and their regions
|
// Returns servers who have not checked in (assumed dead) and their regions
|
||||||
Map<ServerName,List<Pair<HRegionInfo,Result>>> deadServers =
|
Map<ServerName,List<Pair<HRegionInfo,Result>>> deadServers =
|
||||||
rebuildUserRegions();
|
rebuildUserRegions();
|
||||||
// Process list of dead servers
|
// Process list of dead servers; note this will add regions to the RIT.
|
||||||
|
// processRegionsInTransition will read them and assign them out.
|
||||||
processDeadServers(deadServers);
|
processDeadServers(deadServers);
|
||||||
// Check existing regions in transition
|
// Check existing regions in transition
|
||||||
processRegionsInTransition();
|
processRegionsInTransition(deadServers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process all regions that are in transition up in zookeeper. Used by
|
||||||
|
* master joining an already running cluster.
|
||||||
|
* @throws KeeperException
|
||||||
|
* @throws IOException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
void processRegionsInTransition()
|
void processRegionsInTransition()
|
||||||
|
throws KeeperException, IOException, InterruptedException {
|
||||||
|
// Pass null to signify no dead servers in this context.
|
||||||
|
processRegionsInTransition(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process all regions that are in transition up in zookeeper. Used by
|
||||||
|
* master joining an already running cluster.
|
||||||
|
* @param deadServers Map of dead servers and their regions. Can be null.
|
||||||
|
* @throws KeeperException
|
||||||
|
* @throws IOException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
void processRegionsInTransition(
|
||||||
|
final Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers)
|
||||||
throws KeeperException, IOException, InterruptedException {
|
throws KeeperException, IOException, InterruptedException {
|
||||||
List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher,
|
List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher,
|
||||||
watcher.assignmentZNode);
|
watcher.assignmentZNode);
|
||||||
|
@ -260,7 +282,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (nodes.contains(e.getKey().getEncodedName())) {
|
if (nodes.contains(e.getKey().getEncodedName())) {
|
||||||
LOG.debug("Found " + e + " in RITs");
|
LOG.debug("Found " + e.getKey().getRegionNameAsString() + " in RITs");
|
||||||
// Could be a meta region.
|
// Could be a meta region.
|
||||||
regionsToProcess = true;
|
regionsToProcess = true;
|
||||||
break;
|
break;
|
||||||
|
@ -272,7 +294,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
LOG.info("Found regions out on cluster or in RIT; failover");
|
LOG.info("Found regions out on cluster or in RIT; failover");
|
||||||
if (!nodes.isEmpty()) {
|
if (!nodes.isEmpty()) {
|
||||||
for (String encodedRegionName: nodes) {
|
for (String encodedRegionName: nodes) {
|
||||||
processRegionInTransition(encodedRegionName, null);
|
processRegionInTransition(encodedRegionName, null, deadServers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -296,7 +318,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
*/
|
*/
|
||||||
boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
|
boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
|
||||||
throws InterruptedException, KeeperException, IOException {
|
throws InterruptedException, KeeperException, IOException {
|
||||||
boolean intransistion = processRegionInTransition(hri.getEncodedName(), hri);
|
boolean intransistion =
|
||||||
|
processRegionInTransition(hri.getEncodedName(), hri, null);
|
||||||
if (!intransistion) return intransistion;
|
if (!intransistion) return intransistion;
|
||||||
synchronized(this.regionsInTransition) {
|
synchronized(this.regionsInTransition) {
|
||||||
while (!this.master.isStopped() &&
|
while (!this.master.isStopped() &&
|
||||||
|
@ -308,15 +331,18 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process failover of <code>servername</code>. Look in RIT.
|
* Process failover of new master for region <code>encodedRegionName</code>
|
||||||
|
* up in zookeeper.
|
||||||
* @param encodedRegionName Region to process failover for.
|
* @param encodedRegionName Region to process failover for.
|
||||||
* @param regionInfo If null we'll go get it from meta table.
|
* @param regionInfo If null we'll go get it from meta table.
|
||||||
|
* @param deadServers Can be null
|
||||||
* @return True if we processed <code>regionInfo</code> as a RIT.
|
* @return True if we processed <code>regionInfo</code> as a RIT.
|
||||||
* @throws KeeperException
|
* @throws KeeperException
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
boolean processRegionInTransition(final String encodedRegionName,
|
boolean processRegionInTransition(final String encodedRegionName,
|
||||||
final HRegionInfo regionInfo)
|
final HRegionInfo regionInfo,
|
||||||
|
final Map<ServerName,List<Pair<HRegionInfo,Result>>> deadServers)
|
||||||
throws KeeperException, IOException {
|
throws KeeperException, IOException {
|
||||||
RegionTransitionData data = ZKAssign.getData(watcher, encodedRegionName);
|
RegionTransitionData data = ZKAssign.getData(watcher, encodedRegionName);
|
||||||
if (data == null) return false;
|
if (data == null) return false;
|
||||||
|
@ -327,12 +353,13 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
if (p == null) return false;
|
if (p == null) return false;
|
||||||
hri = p.getFirst();
|
hri = p.getFirst();
|
||||||
}
|
}
|
||||||
processRegionsInTransition(data, hri);
|
processRegionsInTransition(data, hri, deadServers);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void processRegionsInTransition(final RegionTransitionData data,
|
void processRegionsInTransition(final RegionTransitionData data,
|
||||||
final HRegionInfo regionInfo)
|
final HRegionInfo regionInfo,
|
||||||
|
final Map<ServerName,List<Pair<HRegionInfo,Result>>> deadServers)
|
||||||
throws KeeperException {
|
throws KeeperException {
|
||||||
String encodedRegionName = regionInfo.getEncodedName();
|
String encodedRegionName = regionInfo.getEncodedName();
|
||||||
LOG.info("Processing region " + regionInfo.getRegionNameAsString() +
|
LOG.info("Processing region " + regionInfo.getRegionNameAsString() +
|
||||||
|
@ -340,30 +367,33 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
synchronized (regionsInTransition) {
|
synchronized (regionsInTransition) {
|
||||||
switch (data.getEventType()) {
|
switch (data.getEventType()) {
|
||||||
case RS_ZK_REGION_CLOSING:
|
case RS_ZK_REGION_CLOSING:
|
||||||
// Just insert region into RIT.
|
if (isOnDeadServer(regionInfo, deadServers)) {
|
||||||
// If this never updates the timeout will trigger new assignment
|
// If was on dead server, its closed now. Force to OFFLINE and this
|
||||||
regionsInTransition.put(encodedRegionName, new RegionState(
|
// will get it reassigned if appropriate
|
||||||
|
forceOffline(regionInfo, data);
|
||||||
|
} else {
|
||||||
|
// Just insert region into RIT.
|
||||||
|
// If this never updates the timeout will trigger new assignment
|
||||||
|
regionsInTransition.put(encodedRegionName, new RegionState(
|
||||||
regionInfo, RegionState.State.CLOSING,
|
regionInfo, RegionState.State.CLOSING,
|
||||||
data.getStamp(), data.getOrigin()));
|
data.getStamp(), data.getOrigin()));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RS_ZK_REGION_CLOSED:
|
case RS_ZK_REGION_CLOSED:
|
||||||
// Region is closed, insert into RIT and handle it
|
// Region is closed, insert into RIT and handle it
|
||||||
regionsInTransition.put(encodedRegionName, new RegionState(
|
addToRITandCallClose(regionInfo, RegionState.State.CLOSED, data);
|
||||||
regionInfo, RegionState.State.CLOSED,
|
|
||||||
data.getStamp(), data.getOrigin()));
|
|
||||||
new ClosedRegionHandler(master, this, regionInfo).process();
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case M_ZK_REGION_OFFLINE:
|
case M_ZK_REGION_OFFLINE:
|
||||||
// Region is offline, insert into RIT and handle it like a closed
|
// Region is offline, insert into RIT and handle it like a closed
|
||||||
regionsInTransition.put(encodedRegionName, new RegionState(
|
addToRITandCallClose(regionInfo, RegionState.State.OFFLINE, data);
|
||||||
regionInfo, RegionState.State.OFFLINE,
|
|
||||||
data.getStamp(), data.getOrigin()));
|
|
||||||
new ClosedRegionHandler(master, this, regionInfo).process();
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RS_ZK_REGION_OPENING:
|
case RS_ZK_REGION_OPENING:
|
||||||
|
// TODO: Could check if it was on deadServers. If it was, then we could
|
||||||
|
// do what happens in TimeoutMonitor when it sees this condition.
|
||||||
|
|
||||||
// Just insert region into RIT
|
// Just insert region into RIT
|
||||||
// If this never updates the timeout will trigger new assignment
|
// If this never updates the timeout will trigger new assignment
|
||||||
regionsInTransition.put(encodedRegionName, new RegionState(
|
regionsInTransition.put(encodedRegionName, new RegionState(
|
||||||
|
@ -374,12 +404,11 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
case RS_ZK_REGION_OPENED:
|
case RS_ZK_REGION_OPENED:
|
||||||
// Region is opened, insert into RIT and handle it
|
// Region is opened, insert into RIT and handle it
|
||||||
regionsInTransition.put(encodedRegionName, new RegionState(
|
regionsInTransition.put(encodedRegionName, new RegionState(
|
||||||
regionInfo, RegionState.State.OPENING,
|
regionInfo, RegionState.State.OPEN,
|
||||||
data.getStamp(), data.getOrigin()));
|
data.getStamp(), data.getOrigin()));
|
||||||
ServerName sn =
|
ServerName sn = data.getOrigin() == null? null: data.getOrigin();
|
||||||
data.getOrigin() == null? null: data.getOrigin();
|
// sn could be null if this server is no longer online. If
|
||||||
// hsi could be null if this server is no longer online. If
|
// that is the case, just let this RIT timeout; it'll be assigned
|
||||||
// that the case, just let this RIT timeout; it'll be assigned
|
|
||||||
// to new server then.
|
// to new server then.
|
||||||
if (sn == null) {
|
if (sn == null) {
|
||||||
LOG.warn("Region in transition " + regionInfo.getEncodedName() +
|
LOG.warn("Region in transition " + regionInfo.getEncodedName() +
|
||||||
|
@ -387,12 +416,67 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
"assigned elsewhere");
|
"assigned elsewhere");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
new OpenedRegionHandler(master, this, regionInfo, sn).process();
|
if (isOnDeadServer(regionInfo, deadServers)) {
|
||||||
|
// If was on a dead server, then its not open any more; needs handling.
|
||||||
|
forceOffline(regionInfo, data);
|
||||||
|
} else {
|
||||||
|
new OpenedRegionHandler(master, this, regionInfo, sn).process();
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Put the region <code>hri</code> into an offline state up in zk.
|
||||||
|
* @param hri
|
||||||
|
* @param oldData
|
||||||
|
* @throws KeeperException
|
||||||
|
*/
|
||||||
|
private void forceOffline(final HRegionInfo hri,
|
||||||
|
final RegionTransitionData oldData)
|
||||||
|
throws KeeperException {
|
||||||
|
// If was on dead server, its closed now. Force to OFFLINE and then
|
||||||
|
// handle it like a close; this will get it reassigned if appropriate
|
||||||
|
LOG.debug("RIT " + hri.getEncodedName() + " in state=" +
|
||||||
|
oldData.getEventType() + " was on deadserver; forcing offline");
|
||||||
|
ZKAssign.createOrForceNodeOffline(this.watcher, hri,
|
||||||
|
this.master.getServerName());
|
||||||
|
addToRITandCallClose(hri, RegionState.State.OFFLINE, oldData);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add to the in-memory copy of regions in transition and then call close
|
||||||
|
* handler on passed region <code>hri</code>
|
||||||
|
* @param hri
|
||||||
|
* @param state
|
||||||
|
* @param oldData
|
||||||
|
*/
|
||||||
|
private void addToRITandCallClose(final HRegionInfo hri,
|
||||||
|
final RegionState.State state, final RegionTransitionData oldData) {
|
||||||
|
this.regionsInTransition.put(hri.getEncodedName(),
|
||||||
|
new RegionState(hri, state, oldData.getStamp(), oldData.getOrigin()));
|
||||||
|
new ClosedRegionHandler(this.master, this, hri).process();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param regionInfo
|
||||||
|
* @param deadServers Map of deadServers and the regions they were carrying;
|
||||||
|
* can be null.
|
||||||
|
* @return True if the passed regionInfo in the passed map of deadServers?
|
||||||
|
*/
|
||||||
|
private boolean isOnDeadServer(final HRegionInfo regionInfo,
|
||||||
|
final Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers) {
|
||||||
|
if (deadServers == null) return false;
|
||||||
|
for (Map.Entry<ServerName, List<Pair<HRegionInfo, Result>>> deadServer:
|
||||||
|
deadServers.entrySet()) {
|
||||||
|
for (Pair<HRegionInfo, Result> e: deadServer.getValue()) {
|
||||||
|
if (e.getFirst().equals(regionInfo)) return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles various states an unassigned node can be in.
|
* Handles various states an unassigned node can be in.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -418,7 +502,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
if (!serverManager.isServerOnline(sn) &&
|
if (!serverManager.isServerOnline(sn) &&
|
||||||
!this.master.getServerName().equals(sn)) {
|
!this.master.getServerName().equals(sn)) {
|
||||||
LOG.warn("Attempted to handle region transition for server but " +
|
LOG.warn("Attempted to handle region transition for server but " +
|
||||||
"server is not online: " + data.getRegionName());
|
"server is not online: " + Bytes.toString(data.getRegionName()));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
String encodedName = HRegionInfo.encodeRegionName(data.getRegionName());
|
String encodedName = HRegionInfo.encodeRegionName(data.getRegionName());
|
||||||
|
@ -1703,7 +1787,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers)
|
Map<ServerName, List<Pair<HRegionInfo, Result>>> deadServers)
|
||||||
throws IOException, KeeperException {
|
throws IOException, KeeperException {
|
||||||
for (Map.Entry<ServerName, List<Pair<HRegionInfo,Result>>> deadServer:
|
for (Map.Entry<ServerName, List<Pair<HRegionInfo,Result>>> deadServer:
|
||||||
deadServers.entrySet()) {
|
deadServers.entrySet()) {
|
||||||
List<Pair<HRegionInfo,Result>> regions = deadServer.getValue();
|
List<Pair<HRegionInfo,Result>> regions = deadServer.getValue();
|
||||||
for (Pair<HRegionInfo,Result> region : regions) {
|
for (Pair<HRegionInfo,Result> region : regions) {
|
||||||
HRegionInfo regionInfo = region.getFirst();
|
HRegionInfo regionInfo = region.getFirst();
|
||||||
|
@ -1711,12 +1795,12 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// If region was in transition (was in zk) force it offline for reassign
|
// If region was in transition (was in zk) force it offline for reassign
|
||||||
try {
|
try {
|
||||||
// Process with existing RS shutdown code
|
// Process with existing RS shutdown code
|
||||||
boolean isNotDisabledAndSplitted =
|
boolean assign =
|
||||||
ServerShutdownHandler.processDeadRegion(regionInfo, result, this,
|
ServerShutdownHandler.processDeadRegion(regionInfo, result, this,
|
||||||
this.catalogTracker);
|
this.catalogTracker);
|
||||||
if (isNotDisabledAndSplitted) {
|
if (assign) {
|
||||||
ZKAssign.createOrForceNodeOffline(watcher, regionInfo,
|
ZKAssign.createOrForceNodeOffline(watcher, regionInfo,
|
||||||
master.getServerName());
|
master.getServerName());
|
||||||
}
|
}
|
||||||
} catch (KeeperException.NoNodeException nne) {
|
} catch (KeeperException.NoNodeException nne) {
|
||||||
// This is fine
|
// This is fine
|
||||||
|
|
|
@ -1195,6 +1195,8 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
// Update in-memory structures to reflect our earlier Root/Meta assignment.
|
// Update in-memory structures to reflect our earlier Root/Meta assignment.
|
||||||
assignRootAndMeta(status);
|
assignRootAndMeta(status);
|
||||||
// process RIT if any
|
// process RIT if any
|
||||||
|
// TODO: Why does this not call AssignmentManager.joinCluster? Otherwise
|
||||||
|
// we are not processing dead servers if any.
|
||||||
this.assignmentManager.processRegionsInTransition();
|
this.assignmentManager.processRegionsInTransition();
|
||||||
return true;
|
return true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -390,9 +390,9 @@ public class ZKAssign {
|
||||||
throw KeeperException.create(Code.NONODE);
|
throw KeeperException.create(Code.NONODE);
|
||||||
}
|
}
|
||||||
RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
|
RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
|
||||||
if(!data.getEventType().equals(expectedState)) {
|
if (!data.getEventType().equals(expectedState)) {
|
||||||
LOG.warn(zkw.prefix("Attempting to delete unassigned " +
|
LOG.warn(zkw.prefix("Attempting to delete unassigned " +
|
||||||
"node in " + expectedState +
|
"node " + regionName + " in " + expectedState +
|
||||||
" state but node is in " + data.getEventType() + " state"));
|
" state but node is in " + data.getEventType() + " state"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue