SOLR-11990: Create Collection API now waits for the updated state of the withCollection to be visible after setting the COLOCATED_WITH value

Sometimes deletion of the withCollection succeeds even though the linked collection exists, if the delete request is sent immediately after the creation of the linked collection. This is because of a race between the delete request and the zookeeper update made by create collection such that the COLOCATED_WITH property is not visible to the overseer collection processor during the delete request. Therefore, we now wait until the property is visible before returning a success to the user. The test also adds more asserts so that a similar race will fail the test immediately with a proper assert message.
This commit is contained in:
Shalin Shekhar Mangar 2018-08-17 14:26:30 +05:30
parent 055105ba0b
commit 6e21cb3aa9
2 changed files with 15 additions and 3 deletions

View File

@ -30,6 +30,7 @@ import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.solr.client.solrj.cloud.DistribStateManager;
@ -80,6 +81,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.PULL_REPLICAS;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS;
import static org.apache.solr.common.params.CollectionAdminParams.COLL_CONF;
import static org.apache.solr.common.params.CollectionAdminParams.COLOCATED_WITH;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MODIFYCOLLECTION;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
@ -304,6 +306,13 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
ZkStateReader.COLLECTION_PROP, withCollection,
CollectionAdminParams.COLOCATED_WITH, collectionName);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
try {
zkStateReader.waitForState(withCollection, 5, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionName.equals(collectionState.getStr(COLOCATED_WITH)));
} catch (TimeoutException e) {
log.warn("Timed out waiting to see the " + COLOCATED_WITH + " property set on collection: " + withCollection);
// maybe the overseer queue is backed up, we don't want to fail the create request
// because of this time out, continue
}
}
} catch (SolrException ex) {

View File

@ -33,6 +33,7 @@ import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.cloud.autoscaling.ActionContext;
import org.apache.solr.cloud.autoscaling.ComputePlanAction;
import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
@ -59,7 +60,7 @@ import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTIO
*
* See SOLR-11990 for more details.
*/
@LogLevel("org.apache.solr.cloud.autoscaling=TRACE;org.apache.solr.client.solrj.cloud.autoscaling=DEBUG")
@LogLevel("org.apache.solr.cloud.autoscaling=TRACE;org.apache.solr.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.overseer=DEBUG")
public class TestWithCollection extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -186,7 +187,8 @@ public class TestWithCollection extends SolrCloudTestCase {
.setWithCollection(abc)
.process(solrClient);
try {
CollectionAdminRequest.deleteCollection(abc).process(solrClient);
CollectionAdminResponse response = CollectionAdminRequest.deleteCollection(abc).process(solrClient);
fail("Deleting collection: " + abc + " should have failed with an exception. Instead response was: " + response.getResponse());
} catch (HttpSolrClient.RemoteSolrException e) {
assertTrue(e.getMessage().contains("is co-located with collection"));
}
@ -205,7 +207,8 @@ public class TestWithCollection extends SolrCloudTestCase {
.process(solrClient);
// sanity check
try {
CollectionAdminRequest.deleteCollection(abc).process(solrClient);
CollectionAdminResponse response = CollectionAdminRequest.deleteCollection(abc).process(solrClient);
fail("Deleting collection: " + abc + " should have failed with an exception. Instead response was: " + response.getResponse());
} catch (HttpSolrClient.RemoteSolrException e) {
assertTrue(e.getMessage().contains("is co-located with collection"));
}