HBASE-9746 RegionServer can't start when replication tries to replicate to an unknown host.
This commit is contained in:
parent
5ed89df4bb
commit
d79db4d221
|
@ -109,7 +109,6 @@ public class RecoverableZooKeeper {
|
||||||
Watcher watcher, int maxRetries, int retryIntervalMillis, String identifier)
|
Watcher watcher, int maxRetries, int retryIntervalMillis, String identifier)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// TODO: Add support for zk 'chroot'; we don't add it to the quorumServers String as we should.
|
// TODO: Add support for zk 'chroot'; we don't add it to the quorumServers String as we should.
|
||||||
this.zk = new ZooKeeper(quorumServers, sessionTimeout, watcher);
|
|
||||||
this.retryCounterFactory =
|
this.retryCounterFactory =
|
||||||
new RetryCounterFactory(maxRetries+1, retryIntervalMillis);
|
new RetryCounterFactory(maxRetries+1, retryIntervalMillis);
|
||||||
|
|
||||||
|
@ -125,16 +124,36 @@ public class RecoverableZooKeeper {
|
||||||
this.watcher = watcher;
|
this.watcher = watcher;
|
||||||
this.sessionTimeout = sessionTimeout;
|
this.sessionTimeout = sessionTimeout;
|
||||||
this.quorumServers = quorumServers;
|
this.quorumServers = quorumServers;
|
||||||
|
try {checkZk();} catch (Exception x) {/* ignore */}
|
||||||
salter = new SecureRandom();
|
salter = new SecureRandom();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to create a Zookeeper connection. Turns any exception encountered into a
|
||||||
|
* {@link KeeperException.OperationTimeoutException} so it can retried.
|
||||||
|
* @return The created Zookeeper connection object
|
||||||
|
* @throws KeeperException
|
||||||
|
*/
|
||||||
|
protected ZooKeeper checkZk() throws KeeperException {
|
||||||
|
if (this.zk == null) {
|
||||||
|
try {
|
||||||
|
this.zk = new ZooKeeper(quorumServers, sessionTimeout, watcher);
|
||||||
|
} catch (Exception uhe) {
|
||||||
|
LOG.warn("Unable to create ZooKeeper Connection", uhe);
|
||||||
|
throw new KeeperException.OperationTimeoutException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return zk;
|
||||||
|
}
|
||||||
|
|
||||||
public void reconnectAfterExpiration()
|
public void reconnectAfterExpiration()
|
||||||
throws IOException, InterruptedException {
|
throws IOException, KeeperException, InterruptedException {
|
||||||
LOG.info("Closing dead ZooKeeper connection, session" +
|
if (zk != null) {
|
||||||
" was: 0x"+Long.toHexString(zk.getSessionId()));
|
LOG.info("Closing dead ZooKeeper connection, session" +
|
||||||
zk.close();
|
" was: 0x"+Long.toHexString(zk.getSessionId()));
|
||||||
this.zk = new ZooKeeper(this.quorumServers,
|
zk.close();
|
||||||
this.sessionTimeout, this.watcher);
|
}
|
||||||
|
checkZk();
|
||||||
LOG.info("Recreated a ZooKeeper, session" +
|
LOG.info("Recreated a ZooKeeper, session" +
|
||||||
" is: 0x"+Long.toHexString(zk.getSessionId()));
|
" is: 0x"+Long.toHexString(zk.getSessionId()));
|
||||||
}
|
}
|
||||||
|
@ -153,7 +172,7 @@ public class RecoverableZooKeeper {
|
||||||
boolean isRetry = false; // False for first attempt, true for all retries.
|
boolean isRetry = false; // False for first attempt, true for all retries.
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
zk.delete(path, version);
|
checkZk().delete(path, version);
|
||||||
return;
|
return;
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
|
@ -196,7 +215,7 @@ public class RecoverableZooKeeper {
|
||||||
RetryCounter retryCounter = retryCounterFactory.create();
|
RetryCounter retryCounter = retryCounterFactory.create();
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
return zk.exists(path, watcher);
|
return checkZk().exists(path, watcher);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
case CONNECTIONLOSS:
|
case CONNECTIONLOSS:
|
||||||
|
@ -228,7 +247,7 @@ public class RecoverableZooKeeper {
|
||||||
RetryCounter retryCounter = retryCounterFactory.create();
|
RetryCounter retryCounter = retryCounterFactory.create();
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
return zk.exists(path, watch);
|
return checkZk().exists(path, watch);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
case CONNECTIONLOSS:
|
case CONNECTIONLOSS:
|
||||||
|
@ -270,7 +289,7 @@ public class RecoverableZooKeeper {
|
||||||
RetryCounter retryCounter = retryCounterFactory.create();
|
RetryCounter retryCounter = retryCounterFactory.create();
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
return zk.getChildren(path, watcher);
|
return checkZk().getChildren(path, watcher);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
case CONNECTIONLOSS:
|
case CONNECTIONLOSS:
|
||||||
|
@ -302,7 +321,7 @@ public class RecoverableZooKeeper {
|
||||||
RetryCounter retryCounter = retryCounterFactory.create();
|
RetryCounter retryCounter = retryCounterFactory.create();
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
return zk.getChildren(path, watch);
|
return checkZk().getChildren(path, watch);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
case CONNECTIONLOSS:
|
case CONNECTIONLOSS:
|
||||||
|
@ -334,7 +353,7 @@ public class RecoverableZooKeeper {
|
||||||
RetryCounter retryCounter = retryCounterFactory.create();
|
RetryCounter retryCounter = retryCounterFactory.create();
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
byte[] revData = zk.getData(path, watcher, stat);
|
byte[] revData = checkZk().getData(path, watcher, stat);
|
||||||
return this.removeMetaData(revData);
|
return this.removeMetaData(revData);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
|
@ -367,7 +386,7 @@ public class RecoverableZooKeeper {
|
||||||
RetryCounter retryCounter = retryCounterFactory.create();
|
RetryCounter retryCounter = retryCounterFactory.create();
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
byte[] revData = zk.getData(path, watch, stat);
|
byte[] revData = checkZk().getData(path, watch, stat);
|
||||||
return this.removeMetaData(revData);
|
return this.removeMetaData(revData);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
|
@ -404,7 +423,7 @@ public class RecoverableZooKeeper {
|
||||||
boolean isRetry = false;
|
boolean isRetry = false;
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
return zk.setData(path, newData, version);
|
return checkZk().setData(path, newData, version);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
case CONNECTIONLOSS:
|
case CONNECTIONLOSS:
|
||||||
|
@ -417,7 +436,7 @@ public class RecoverableZooKeeper {
|
||||||
// try to verify whether the previous setData success or not
|
// try to verify whether the previous setData success or not
|
||||||
try{
|
try{
|
||||||
Stat stat = new Stat();
|
Stat stat = new Stat();
|
||||||
byte[] revData = zk.getData(path, false, stat);
|
byte[] revData = checkZk().getData(path, false, stat);
|
||||||
if(Bytes.compareTo(revData, newData) == 0) {
|
if(Bytes.compareTo(revData, newData) == 0) {
|
||||||
// the bad version is caused by previous successful setData
|
// the bad version is caused by previous successful setData
|
||||||
return stat;
|
return stat;
|
||||||
|
@ -486,7 +505,7 @@ public class RecoverableZooKeeper {
|
||||||
boolean isRetry = false; // False for first attempt, true for all retries.
|
boolean isRetry = false; // False for first attempt, true for all retries.
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
return zk.create(path, data, acl, createMode);
|
return checkZk().create(path, data, acl, createMode);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
case NODEEXISTS:
|
case NODEEXISTS:
|
||||||
|
@ -494,7 +513,7 @@ public class RecoverableZooKeeper {
|
||||||
// If the connection was lost, there is still a possibility that
|
// If the connection was lost, there is still a possibility that
|
||||||
// we have successfully created the node at our previous attempt,
|
// we have successfully created the node at our previous attempt,
|
||||||
// so we read the node and compare.
|
// so we read the node and compare.
|
||||||
byte[] currentData = zk.getData(path, false, null);
|
byte[] currentData = checkZk().getData(path, false, null);
|
||||||
if (currentData != null &&
|
if (currentData != null &&
|
||||||
Bytes.compareTo(currentData, data) == 0) {
|
Bytes.compareTo(currentData, data) == 0) {
|
||||||
// We successfully created a non-sequential node
|
// We successfully created a non-sequential node
|
||||||
|
@ -539,7 +558,7 @@ public class RecoverableZooKeeper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
first = false;
|
first = false;
|
||||||
return zk.create(newPath, data, acl, createMode);
|
return checkZk().create(newPath, data, acl, createMode);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
case CONNECTIONLOSS:
|
case CONNECTIONLOSS:
|
||||||
|
@ -595,7 +614,7 @@ public class RecoverableZooKeeper {
|
||||||
Iterable<Op> multiOps = prepareZKMulti(ops);
|
Iterable<Op> multiOps = prepareZKMulti(ops);
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
return zk.multi(multiOps);
|
return checkZk().multi(multiOps);
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
switch (e.code()) {
|
switch (e.code()) {
|
||||||
case CONNECTIONLOSS:
|
case CONNECTIONLOSS:
|
||||||
|
@ -622,11 +641,11 @@ public class RecoverableZooKeeper {
|
||||||
String parent = path.substring(0, lastSlashIdx);
|
String parent = path.substring(0, lastSlashIdx);
|
||||||
String nodePrefix = path.substring(lastSlashIdx+1);
|
String nodePrefix = path.substring(lastSlashIdx+1);
|
||||||
|
|
||||||
List<String> nodes = zk.getChildren(parent, false);
|
List<String> nodes = checkZk().getChildren(parent, false);
|
||||||
List<String> matching = filterByPrefix(nodes, nodePrefix);
|
List<String> matching = filterByPrefix(nodes, nodePrefix);
|
||||||
for (String node : matching) {
|
for (String node : matching) {
|
||||||
String nodePath = parent + "/" + node;
|
String nodePath = parent + "/" + node;
|
||||||
Stat stat = zk.exists(nodePath, false);
|
Stat stat = checkZk().exists(nodePath, false);
|
||||||
if (stat != null) {
|
if (stat != null) {
|
||||||
return nodePath;
|
return nodePath;
|
||||||
}
|
}
|
||||||
|
@ -670,15 +689,15 @@ public class RecoverableZooKeeper {
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getSessionId() {
|
public long getSessionId() {
|
||||||
return zk.getSessionId();
|
return zk == null ? null : zk.getSessionId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws InterruptedException {
|
public void close() throws InterruptedException {
|
||||||
zk.close();
|
if (zk != null) zk.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public States getState() {
|
public States getState() {
|
||||||
return zk.getState();
|
return zk == null ? null : zk.getState();
|
||||||
}
|
}
|
||||||
|
|
||||||
public ZooKeeper getZooKeeper() {
|
public ZooKeeper getZooKeeper() {
|
||||||
|
@ -686,11 +705,11 @@ public class RecoverableZooKeeper {
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getSessionPasswd() {
|
public byte[] getSessionPasswd() {
|
||||||
return zk.getSessionPasswd();
|
return zk == null ? null : zk.getSessionPasswd();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void sync(String path, AsyncCallback.VoidCallback cb, Object ctx) {
|
public void sync(String path, AsyncCallback.VoidCallback cb, Object ctx) throws KeeperException {
|
||||||
this.zk.sync(path, null, null);
|
checkZk().sync(path, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -214,13 +214,7 @@ public class ZKConfig {
|
||||||
else if (key.startsWith("server.")) {
|
else if (key.startsWith("server.")) {
|
||||||
String host = value.substring(0, value.indexOf(':'));
|
String host = value.substring(0, value.indexOf(':'));
|
||||||
servers.add(host);
|
servers.add(host);
|
||||||
try {
|
anyValid = true;
|
||||||
//noinspection ResultOfMethodCallIgnored
|
|
||||||
InetAddress.getByName(host);
|
|
||||||
anyValid = true;
|
|
||||||
} catch (UnknownHostException e) {
|
|
||||||
LOG.warn(StringUtils.stringifyException(e));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -280,7 +280,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
|
||||||
return recoverableZooKeeper;
|
return recoverableZooKeeper;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reconnectAfterExpiration() throws IOException, InterruptedException {
|
public void reconnectAfterExpiration() throws IOException, KeeperException, InterruptedException {
|
||||||
recoverableZooKeeper.reconnectAfterExpiration();
|
recoverableZooKeeper.reconnectAfterExpiration();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -424,7 +424,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
|
||||||
* previously read version and data. We want to ensure that the version read
|
* previously read version and data. We want to ensure that the version read
|
||||||
* is up-to-date from when we begin the operation.
|
* is up-to-date from when we begin the operation.
|
||||||
*/
|
*/
|
||||||
public void sync(String path) {
|
public void sync(String path) throws KeeperException {
|
||||||
this.recoverableZooKeeper.sync(path, null, null);
|
this.recoverableZooKeeper.sync(path, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue