HBASE-16367 Race between master and region server initialization may lead to premature server abort
This commit is contained in:
parent
e5f9df1e23
commit
50f3c9572c
|
@ -40,6 +40,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
@ -451,6 +452,7 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
|
|
||||||
// Some unit tests don't need a cluster, so no zookeeper at all
|
// Some unit tests don't need a cluster, so no zookeeper at all
|
||||||
if (!conf.getBoolean("hbase.testing.nocluster", false)) {
|
if (!conf.getBoolean("hbase.testing.nocluster", false)) {
|
||||||
|
setInitLatch(new CountDownLatch(1));
|
||||||
activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this);
|
activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this);
|
||||||
int infoPort = putUpJettyServer();
|
int infoPort = putUpJettyServer();
|
||||||
startActiveMasterManager(infoPort);
|
startActiveMasterManager(infoPort);
|
||||||
|
@ -693,6 +695,7 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
// publish cluster ID
|
// publish cluster ID
|
||||||
status.setStatus("Publishing Cluster ID in ZooKeeper");
|
status.setStatus("Publishing Cluster ID in ZooKeeper");
|
||||||
ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
|
ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
|
||||||
|
this.initLatch.countDown();
|
||||||
|
|
||||||
this.serverManager = createServerManager(this);
|
this.serverManager = createServerManager(this);
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,8 @@ import java.util.TreeSet;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.ConcurrentSkipListMap;
|
import java.util.concurrent.ConcurrentSkipListMap;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
|
@ -233,6 +235,7 @@ public class HRegionServer extends HasThread implements
|
||||||
protected MemStoreFlusher cacheFlusher;
|
protected MemStoreFlusher cacheFlusher;
|
||||||
|
|
||||||
protected HeapMemoryManager hMemManager;
|
protected HeapMemoryManager hMemManager;
|
||||||
|
protected CountDownLatch initLatch = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cluster connection to be shared by services.
|
* Cluster connection to be shared by services.
|
||||||
|
@ -655,6 +658,10 @@ public class HRegionServer extends HasThread implements
|
||||||
this.fs, this.rootDir, !canUpdateTableDescriptor(), false);
|
this.fs, this.rootDir, !canUpdateTableDescriptor(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void setInitLatch(CountDownLatch latch) {
|
||||||
|
this.initLatch = latch;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns true if configured hostname should be used
|
* Returns true if configured hostname should be used
|
||||||
*/
|
*/
|
||||||
|
@ -799,6 +806,8 @@ public class HRegionServer extends HasThread implements
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
|
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE",
|
||||||
|
justification="cluster Id znode read would give us correct response")
|
||||||
private void initializeZooKeeper() throws IOException, InterruptedException {
|
private void initializeZooKeeper() throws IOException, InterruptedException {
|
||||||
// Create the master address tracker, register with zk, and start it. Then
|
// Create the master address tracker, register with zk, and start it. Then
|
||||||
// block until a master is available. No point in starting up if no master
|
// block until a master is available. No point in starting up if no master
|
||||||
|
@ -809,6 +818,9 @@ public class HRegionServer extends HasThread implements
|
||||||
// when ready.
|
// when ready.
|
||||||
blockAndCheckIfStopped(this.clusterStatusTracker);
|
blockAndCheckIfStopped(this.clusterStatusTracker);
|
||||||
|
|
||||||
|
if (this.initLatch != null) {
|
||||||
|
this.initLatch.await(50, TimeUnit.SECONDS);
|
||||||
|
}
|
||||||
// Retrieve clusterId
|
// Retrieve clusterId
|
||||||
// Since cluster status is now up
|
// Since cluster status is now up
|
||||||
// ID should have already been set by HMaster
|
// ID should have already been set by HMaster
|
||||||
|
|
Loading…
Reference in New Issue