we need to retry when asking for a leader

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294347 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2012-02-27 21:43:50 +00:00
parent 493bd8b42f
commit a766b67aed
1 changed files with 31 additions and 14 deletions

View File

@ -200,9 +200,13 @@ public final class ZkController {
for (CoreDescriptor descriptor : descriptors) { for (CoreDescriptor descriptor : descriptors) {
final String coreZkNodeName = getNodeName() + "_" final String coreZkNodeName = getNodeName() + "_"
+ descriptor.getName(); + descriptor.getName();
publishAsDown(getBaseUrl(), descriptor, coreZkNodeName, try {
descriptor.getName()); publishAsDown(getBaseUrl(), descriptor, coreZkNodeName,
waitForLeaderToSeeDownState(descriptor, coreZkNodeName); descriptor.getName());
waitForLeaderToSeeDownState(descriptor, coreZkNodeName);
} catch (Exception e) {
SolrException.log(log, "", e);
}
} }
} }
@ -1084,16 +1088,25 @@ public final class ZkController {
CloudDescriptor cloudDesc = descriptor.getCloudDescriptor(); CloudDescriptor cloudDesc = descriptor.getCloudDescriptor();
String collection = cloudDesc.getCollectionName(); String collection = cloudDesc.getCollectionName();
String shard = cloudDesc.getShardId(); String shard = cloudDesc.getShardId();
ZkCoreNodeProps leaderProps; ZkCoreNodeProps leaderProps = null;
try {
// go straight to zk, not the cloud state - we must have current info int retries = 6;
leaderProps = getLeaderProps(collection, shard); for (int i = 0; i < retries; i++) {
} catch (InterruptedException e) { try {
// Restore the interrupted status // go straight to zk, not the cloud state - we must have current info
Thread.currentThread().interrupt(); leaderProps = getLeaderProps(collection, shard);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); break;
} catch (KeeperException e) { } catch (Exception e) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); SolrException.log(log, "There was a problem finding the leader in zk", e);
try {
Thread.sleep(2000);
} catch (InterruptedException e1) {
Thread.currentThread().interrupt();
}
if (i == retries - 1) {
throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem finding the leader in zk");
}
}
} }
String leaderBaseUrl = leaderProps.getBaseUrl(); String leaderBaseUrl = leaderProps.getBaseUrl();
@ -1122,7 +1135,8 @@ public final class ZkController {
// let's retry a couple times - perhaps the leader just went down, // let's retry a couple times - perhaps the leader just went down,
// or perhaps he is just not quite ready for us yet // or perhaps he is just not quite ready for us yet
for (int i = 0; i < 3; i++) { retries = 6;
for (int i = 0; i < retries; i++) {
try { try {
server.request(prepCmd); server.request(prepCmd);
break; break;
@ -1133,6 +1147,9 @@ public final class ZkController {
} catch (InterruptedException e1) { } catch (InterruptedException e1) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
} }
if (i == retries - 1) {
throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem making a request to the leader");
}
} }
} }