HBASE-2525 If one-RS only and its restarted before its shutdown is completely processed, we'll never complete shutdown processing
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@943732 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7ba7a37639
commit
49d5f14dd6
|
@ -45,6 +45,8 @@ import java.util.Set;
|
|||
* serving, and the regions need to get reassigned.
|
||||
*/
|
||||
class ProcessServerShutdown extends RegionServerOperation {
|
||||
// Server name made of the concatenation of hostname, port and startcode
|
||||
// formatted as <code><hostname> ',' <port> ',' <startcode></code>
|
||||
private final String deadServer;
|
||||
private boolean isRootServer;
|
||||
private List<MetaRegion> metaRegions;
|
||||
|
@ -86,7 +88,7 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
private void closeMetaRegions() {
|
||||
this.isRootServer =
|
||||
this.master.getRegionManager().isRootServer(this.deadServerAddress) ||
|
||||
this.master.getRegionManager().isRootServerCandidate (deadServer);
|
||||
this.master.getRegionManager().isRootInTransitionOnThisServer(deadServer);
|
||||
if (this.isRootServer) {
|
||||
this.master.getRegionManager().unsetRootRegion();
|
||||
}
|
||||
|
@ -156,7 +158,7 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
// Check server name. If null, skip (We used to consider it was on
|
||||
// shutdown server but that would mean that we'd reassign regions that
|
||||
// were already out being assigned, ones that were product of a split
|
||||
// that happened while the shutdown was being processed.
|
||||
// that happened while the shutdown was being processed).
|
||||
String serverAddress = BaseScanner.getServerAddress(values);
|
||||
long startCode = BaseScanner.getStartCode(values);
|
||||
|
||||
|
@ -174,7 +176,6 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
Bytes.toString(row));
|
||||
}
|
||||
|
||||
// HRegionInfo info = master.getHRegionInfo(row, values.rowResult());
|
||||
HRegionInfo info = master.getHRegionInfo(row, values);
|
||||
if (info == null) {
|
||||
emptyRows.add(row);
|
||||
|
@ -248,7 +249,7 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
|
||||
public Boolean call() throws IOException {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("process server shutdown scanning root region on " +
|
||||
LOG.debug("Process server shutdown scanning root region on " +
|
||||
master.getRegionManager().getRootRegionLocation().getBindAddress());
|
||||
}
|
||||
Scan scan = new Scan();
|
||||
|
@ -282,11 +283,9 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
|
||||
@Override
|
||||
protected boolean process() throws IOException {
|
||||
LOG.info("process shutdown of server " + this.deadServer +
|
||||
": logSplit: " +
|
||||
logSplit + ", rootRescanned: " + rootRescanned +
|
||||
", numberOfMetaRegions: " +
|
||||
master.getRegionManager().numMetaRegions() +
|
||||
LOG.info("Process shutdown of server " + this.deadServer +
|
||||
": logSplit: " + logSplit + ", rootRescanned: " + rootRescanned +
|
||||
", numberOfMetaRegions: " + master.getRegionManager().numMetaRegions() +
|
||||
", onlineMetaRegions.size(): " +
|
||||
master.getRegionManager().numOnlineMetaRegions());
|
||||
if (!logSplit) {
|
||||
|
@ -305,13 +304,10 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
}
|
||||
logSplit = true;
|
||||
}
|
||||
|
||||
LOG.info("Log split complete, meta reassignment and scanning:");
|
||||
|
||||
if (this.isRootServer) {
|
||||
LOG.info("ProcessServerShutdown reassigning ROOT region");
|
||||
master.getRegionManager().reassignRootRegion();
|
||||
|
||||
isRootServer = false; // prevent double reassignment... heh.
|
||||
}
|
||||
|
||||
|
@ -341,12 +337,13 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
}
|
||||
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("process server shutdown scanning root region on " +
|
||||
LOG.debug("Process server shutdown scanning root region on " +
|
||||
master.getRegionManager().getRootRegionLocation().getBindAddress() +
|
||||
" finished " + Thread.currentThread().getName());
|
||||
}
|
||||
rootRescanned = true;
|
||||
}
|
||||
|
||||
if (!metaTableAvailable()) {
|
||||
// We can't proceed because not all meta regions are online.
|
||||
// metaAvailable() has put this request on the delayedToDoQueue
|
||||
|
@ -367,8 +364,6 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
}
|
||||
|
||||
closeRegionsInTransition();
|
||||
|
||||
// Remove this server from dead servers list. Finished splitting logs.
|
||||
this.master.getServerManager().removeDeadServer(deadServer);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Removed " + deadServer + " from deadservers Map");
|
||||
|
|
|
@ -857,7 +857,7 @@ public class RegionManager implements HConstants {
|
|||
* @param server Server
|
||||
* @return true if server is transitioning the ROOT table
|
||||
*/
|
||||
public boolean isRootServerCandidate(final String server) {
|
||||
public boolean isRootInTransitionOnThisServer(final String server) {
|
||||
synchronized (this.regionsInTransition) {
|
||||
for (RegionState s : regionsInTransition.values()) {
|
||||
if (s.getRegionInfo().isRootRegion()
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.HServerAddress;
|
|||
import org.apache.hadoop.hbase.HServerInfo;
|
||||
import org.apache.hadoop.hbase.HServerLoad;
|
||||
import org.apache.hadoop.hbase.Leases;
|
||||
import org.apache.hadoop.hbase.Leases.LeaseStillHeldException;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
|
@ -170,6 +171,10 @@ public class ServerManager implements HConstants {
|
|||
// Test for case where we get a region startup message from a regionserver
|
||||
// that has been quickly restarted but whose znode expiration handler has
|
||||
// not yet run, or from a server whose fail we are currently processing.
|
||||
// Test its host+port combo is present in serverAddresstoServerInfo. If it
|
||||
// is, reject the server and trigger its expiration. The next time it comes
|
||||
// in, it should have been removed from serverAddressToServerInfo and queued
|
||||
// for processing by ProcessServerShutdown.
|
||||
HServerInfo info = new HServerInfo(serverInfo);
|
||||
String hostAndPort = info.getServerAddress().toString();
|
||||
HServerInfo existingServer =
|
||||
|
@ -183,20 +188,25 @@ public class ServerManager implements HConstants {
|
|||
}
|
||||
throw new Leases.LeaseStillHeldException(hostAndPort);
|
||||
}
|
||||
if (isDead(hostAndPort, true)) {
|
||||
LOG.debug("Server start rejected; currently processing " + hostAndPort +
|
||||
" failure");
|
||||
throw new Leases.LeaseStillHeldException(hostAndPort);
|
||||
}
|
||||
if (isDead(hostAndPort, true)) {
|
||||
LOG.debug("Server start rejected; currently processing " + hostAndPort +
|
||||
" failure");
|
||||
throw new Leases.LeaseStillHeldException(hostAndPort);
|
||||
}
|
||||
checkIsDead(info.getServerName(), "STARTUP");
|
||||
LOG.info("Received start message from: " + info.getServerName());
|
||||
recordNewServer(info);
|
||||
}
|
||||
|
||||
/*
|
||||
* If this server is on the dead list, reject it with a LeaseStillHeldException
|
||||
* @param serverName Server name formatted as host_port_startcode.
|
||||
* @param what START or REPORT
|
||||
* @throws LeaseStillHeldException
|
||||
*/
|
||||
private void checkIsDead(final String serverName, final String what)
|
||||
throws LeaseStillHeldException {
|
||||
if (!isDead(serverName)) return;
|
||||
LOG.debug("Server " + what + " rejected; currently processing " +
|
||||
serverName + " as dead server");
|
||||
throw new Leases.LeaseStillHeldException(serverName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the HSI to the RS list and creates an empty load
|
||||
* @param info The region server informations
|
||||
|
@ -249,11 +259,7 @@ public class ServerManager implements HConstants {
|
|||
final HMsg msgs[], final HRegionInfo[] mostLoadedRegions)
|
||||
throws IOException {
|
||||
HServerInfo info = new HServerInfo(serverInfo);
|
||||
if (isDead(info.getServerName())) {
|
||||
LOG.info("Received report from region server " + info.getServerName() +
|
||||
" previously marked dead. Rejecting report.");
|
||||
throw new Leases.LeaseStillHeldException(info.getServerName());
|
||||
}
|
||||
checkIsDead(info.getServerName(), "REPORT");
|
||||
if (msgs.length > 0) {
|
||||
if (msgs[0].isType(HMsg.Type.MSG_REPORT_EXITING)) {
|
||||
processRegionServerExit(info, msgs);
|
||||
|
@ -665,7 +671,7 @@ public class ServerManager implements HConstants {
|
|||
this.master.getRegionManager().offlineMetaServer(info.getServerAddress());
|
||||
|
||||
//HBASE-1928: Check whether this server has been transitioning the ROOT table
|
||||
if (this.master.getRegionManager().isRootServerCandidate (serverName)) {
|
||||
if (this.master.getRegionManager().isRootInTransitionOnThisServer(serverName)) {
|
||||
this.master.getRegionManager().unsetRootRegion();
|
||||
this.master.getRegionManager().reassignRootRegion();
|
||||
}
|
||||
|
|
|
@ -600,8 +600,10 @@ public class HBaseTestingUtility {
|
|||
ZooKeeper zk = new ZooKeeper(quorumServers,
|
||||
sessionTimeout, EmptyWatcher.instance, sessionID, password);
|
||||
zk.close();
|
||||
final long sleep = sessionTimeout * 5L;
|
||||
LOG.info("ZK Closed; sleeping=" + sleep);
|
||||
|
||||
Thread.sleep(sessionTimeout * 5L);
|
||||
Thread.sleep(sleep);
|
||||
|
||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.client.*;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
@ -31,18 +33,19 @@ import static org.junit.Assert.fail;
|
|||
import java.io.IOException;
|
||||
|
||||
public class TestZooKeeper {
|
||||
private final Log LOG = LogFactory.getLog(this.getClass());
|
||||
|
||||
private final static HBaseTestingUtility
|
||||
TEST_UTIL = new HBaseTestingUtility();
|
||||
|
||||
private Configuration conf;
|
||||
private MiniHBaseCluster cluster;
|
||||
|
||||
/**
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
|
||||
TEST_UTIL.startMiniCluster(1);
|
||||
}
|
||||
|
||||
|
@ -60,7 +63,6 @@ public class TestZooKeeper {
|
|||
@Before
|
||||
public void setUp() throws Exception {
|
||||
conf = TEST_UTIL.getConfiguration();
|
||||
cluster = TEST_UTIL.getHBaseCluster();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -98,13 +100,16 @@ public class TestZooKeeper {
|
|||
}
|
||||
@Test
|
||||
public void testRegionServerSessionExpired() throws Exception{
|
||||
this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
|
||||
LOG.info("Starting testRegionServerSessionExpired");
|
||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
TEST_UTIL.getMiniHBaseCluster().getRegionServer(0).getConfiguration().
|
||||
setBoolean("hbase.regionserver.restart.on.zk.expire", true);
|
||||
TEST_UTIL.expireRegionServerSession(0);
|
||||
testSanity();
|
||||
}
|
||||
@Test
|
||||
public void testMasterSessionExpired() throws Exception {
|
||||
LOG.info("Starting testRegionServerSessionExpired");
|
||||
new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
TEST_UTIL.expireMasterSession();
|
||||
testSanity();
|
||||
|
@ -136,7 +141,7 @@ public class TestZooKeeper {
|
|||
public void testMultipleZK() {
|
||||
try {
|
||||
HTable localMeta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
HBaseConfiguration otherConf = new HBaseConfiguration(conf);
|
||||
Configuration otherConf = HBaseConfiguration.create(conf);
|
||||
otherConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1");
|
||||
HTable ipMeta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
|
||||
|
|
Loading…
Reference in New Issue