we need to retry when asking for a leader

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294347 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2012-02-27 21:43:50 +00:00
parent 493bd8b42f
commit a766b67aed
1 changed files with 31 additions and 14 deletions

View File

@ -200,9 +200,13 @@ public final class ZkController {
for (CoreDescriptor descriptor : descriptors) {
final String coreZkNodeName = getNodeName() + "_"
+ descriptor.getName();
publishAsDown(getBaseUrl(), descriptor, coreZkNodeName,
descriptor.getName());
waitForLeaderToSeeDownState(descriptor, coreZkNodeName);
try {
publishAsDown(getBaseUrl(), descriptor, coreZkNodeName,
descriptor.getName());
waitForLeaderToSeeDownState(descriptor, coreZkNodeName);
} catch (Exception e) {
SolrException.log(log, "", e);
}
}
}
@ -1084,16 +1088,25 @@ public final class ZkController {
CloudDescriptor cloudDesc = descriptor.getCloudDescriptor();
String collection = cloudDesc.getCollectionName();
String shard = cloudDesc.getShardId();
ZkCoreNodeProps leaderProps;
try {
// go straight to zk, not the cloud state - we must have current info
leaderProps = getLeaderProps(collection, shard);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (KeeperException e) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
ZkCoreNodeProps leaderProps = null;
int retries = 6;
for (int i = 0; i < retries; i++) {
try {
// go straight to zk, not the cloud state - we must have current info
leaderProps = getLeaderProps(collection, shard);
break;
} catch (Exception e) {
SolrException.log(log, "There was a problem finding the leader in zk", e);
try {
Thread.sleep(2000);
} catch (InterruptedException e1) {
Thread.currentThread().interrupt();
}
if (i == retries - 1) {
throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem finding the leader in zk");
}
}
}
String leaderBaseUrl = leaderProps.getBaseUrl();
@ -1122,7 +1135,8 @@ public final class ZkController {
// let's retry a couple times - perhaps the leader just went down,
// or perhaps he is just not quite ready for us yet
for (int i = 0; i < 3; i++) {
retries = 6;
for (int i = 0; i < retries; i++) {
try {
server.request(prepCmd);
break;
@ -1133,6 +1147,9 @@ public final class ZkController {
} catch (InterruptedException e1) {
Thread.currentThread().interrupt();
}
if (i == retries - 1) {
throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem making a request to the leader");
}
}
}