mirror of https://github.com/apache/lucene.git
SOLR-5820: OverseerCollectionProcessor#lookupReplicas has a timeout that is too short and a bad error message on timeout.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1574883 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d23a3374e1
commit
cdf254415b
|
@ -1627,7 +1627,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
if (!created)
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully createcollection: " + message.getStr("name"));
|
||||
|
||||
log.info("going to create cores replicas shardNames {} , repFactor : {}", shardNames, repFactor);
|
||||
log.info("Creating SolrCores for new collection, shardNames {} , replicationFactor : {}", shardNames, repFactor);
|
||||
Map<String ,ShardRequest> coresToCreate = new LinkedHashMap<String, ShardRequest>();
|
||||
for (int i = 1; i <= shardNames.size(); i++) {
|
||||
String sliceName = shardNames.get(i-1);
|
||||
|
@ -1671,14 +1671,17 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
sreq.actualShards = sreq.shards;
|
||||
sreq.params = params;
|
||||
|
||||
if(isLegacyCloud) shardHandler.submit(sreq, sreq.shards[0], sreq.params);
|
||||
else coresToCreate.put(coreName, sreq);
|
||||
if(isLegacyCloud) {
|
||||
shardHandler.submit(sreq, sreq.shards[0], sreq.params);
|
||||
} else {
|
||||
coresToCreate.put(coreName, sreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!isLegacyCloud) {
|
||||
//wait for all replica entries to be created
|
||||
Map<String, Replica> replicas = lookupReplicas(collectionName, coresToCreate.keySet());
|
||||
// wait for all replica entries to be created
|
||||
Map<String, Replica> replicas = waitToSeeReplicasInState(collectionName, coresToCreate.keySet());
|
||||
for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) {
|
||||
ShardRequest sreq = e.getValue();
|
||||
sreq.params.set(CoreAdminParams.CORE_NODE_NAME, replicas.get(e.getKey()).getName());
|
||||
|
@ -1704,37 +1707,35 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
}
|
||||
}
|
||||
|
||||
private Map<String, Replica> lookupReplicas(String collectionName, Collection<String> coreNames) throws InterruptedException {
|
||||
private Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
|
||||
Map<String, Replica> result = new HashMap<String, Replica>();
|
||||
long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS);
|
||||
for(;;) {
|
||||
DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName);
|
||||
for (String coreName : coreNames) {
|
||||
if(result.containsKey(coreName)) continue;
|
||||
long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
|
||||
while (true) {
|
||||
DocCollection coll = zkStateReader.getClusterState().getCollection(
|
||||
collectionName);
|
||||
for (String coreName : coreNames) {
|
||||
if (result.containsKey(coreName)) continue;
|
||||
for (Slice slice : coll.getSlices()) {
|
||||
for (Replica replica : slice.getReplicas()) {
|
||||
if(coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
|
||||
result.put(coreName,replica);
|
||||
if (coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
|
||||
result.put(coreName, replica);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(result.size() == coreNames.size()) {
|
||||
|
||||
if (result.size() == coreNames.size()) {
|
||||
return result;
|
||||
}
|
||||
if( System.nanoTime() > endTime) {
|
||||
//time up . throw exception and go out
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to create replica entries in ZK");
|
||||
if (System.nanoTime() > endTime) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas in cluster state.");
|
||||
}
|
||||
|
||||
|
||||
Thread.sleep(100);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException {
|
||||
String collection = message.getStr(COLLECTION_PROP);
|
||||
String node = message.getStr("node");
|
||||
|
@ -1789,7 +1790,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
|
||||
ZkStateReader.BASE_URL_PROP,zkStateReader.getBaseUrlForNodeName(node));
|
||||
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(props));
|
||||
params.set(CoreAdminParams.CORE_NODE_NAME, lookupReplicas(collection, Collections.singletonList(coreName)).get(coreName).getName());
|
||||
params.set(CoreAdminParams.CORE_NODE_NAME, waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -665,7 +665,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
|
|||
boolean disableLegacy = random().nextBoolean();
|
||||
CloudSolrServer client1 = null;
|
||||
|
||||
if(disableLegacy) {
|
||||
if (disableLegacy) {
|
||||
log.info("legacyCloud=false");
|
||||
client1 = createCloudClient(null);
|
||||
setClusterProp(client1, ZkStateReader.LEGACY_CLOUD, "false");
|
||||
|
|
Loading…
Reference in New Issue