From 3933047cd7db8a7fdd47a4ba3d6d8f916f3a6823 Mon Sep 17 00:00:00 2001 From: Chris Hostetter Date: Tue, 30 Jul 2019 10:14:38 -0700 Subject: [PATCH] SOLR-13660: Fixed AbstractFullDistribZkTestBase.waitForActiveReplicaCount() to ensure replicas are active. (cherry picked from commit 6dea203d11feb8c047da883131aacccb3a949042) --- solr/CHANGES.txt | 4 ++ .../cloud/AbstractFullDistribZkTestBase.java | 51 +++++++++++++------ 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index dbaa013eabe..574da4444b5 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -60,6 +60,10 @@ Bug Fixes * SOLR-11556: Backup and restore command really supports picking one of a few repositories by repository parameter (Timothy Potter via Mikhail Khludnev) +* SOLR-13660: Fixed AbstractFullDistribZkTestBase.waitForActiveReplicaCount() to ensure + replicas are active. (hossman) + + Other Changes ---------------------- diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java index cb636bfa94c..522c938a5b0 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java @@ -399,13 +399,21 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes List createPullReplicaRequests = Collections.synchronizedList(new ArrayList<>()); StringBuilder sb = new StringBuilder(); + // HACK: Don't be fooled by the replication factor of '1'... + // + // This CREATE command asks for a repFactor of 1, but uses an empty nodeSet. + // This allows this method to create a collection with numShards == sliceCount, + // but no actual cores ... yet. The actual replicas are added later (once the actual + // jetty instances are started) assertEquals(0, CollectionAdminRequest - .createCollection(DEFAULT_COLLECTION, "conf1", sliceCount, 1) - .setStateFormat(Integer.parseInt(getStateFormat())) - .setCreateNodeSet("") - .process(cloudClient).getStatus()); + .createCollection(DEFAULT_COLLECTION, "conf1", sliceCount, 1) // not real rep factor! + .setStateFormat(Integer.parseInt(getStateFormat())) + .setCreateNodeSet("") // empty node set prevents creation of cores + .process(cloudClient).getStatus()); - cloudClient.waitForState(DEFAULT_COLLECTION, 30, TimeUnit.SECONDS, (c) -> c != null && c.getSlices().size() == sliceCount); + cloudClient.waitForState(DEFAULT_COLLECTION, 30, TimeUnit.SECONDS, + // expect sliceCount active shards, but no active replicas + SolrCloudTestCase.activeClusterShape(sliceCount, 0)); ExecutorService customThreadPool = ExecutorUtil.newMDCAwareCachedThreadPool(new SolrjNamedThreadFactory("closeThreadPool")); @@ -583,17 +591,24 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes } protected void waitForActiveReplicaCount(CloudSolrClient client, String collection, int expectedNumReplicas) throws TimeoutException, NotInClusterStateException { + log.debug("Waiting to see {} active replicas in collection: {}", expectedNumReplicas, collection); AtomicInteger nReplicas = new AtomicInteger(); try { - client.getZkStateReader().waitForState(collection, 30, TimeUnit.SECONDS, (c) -> { - if (c == null) - return false; - int numReplicas = getTotalReplicas(c, c.getName()); - nReplicas.set(numReplicas); - if (numReplicas == expectedNumReplicas) return true; - - return false; - }); + client.getZkStateReader().waitForState(collection, 30, TimeUnit.SECONDS, (liveNodes, collectionState) -> { + if (collectionState == null) { + return false; + } + int activeReplicas = 0; + for (Slice slice : collectionState) { + for (Replica replica : slice) { + if (replica.isActive(liveNodes)) { + activeReplicas++; + } + } + } + nReplicas.set(activeReplicas); + return (activeReplicas == expectedNumReplicas); + }); } catch (TimeoutException | InterruptedException e) { try { printLayout(); @@ -610,7 +625,13 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes return 0; } - /* Total number of replicas (number of cores serving an index to the collection) shown by the cluster state */ + /** + * Total number of replicas for all shards as indicated by the cluster state, regardless of status. + * + * @deprecated This method is virtually useless as it does not consider the status of either + * the shard or replica, nor wether the node hosting each replica is alive. + */ + @Deprecated protected int getTotalReplicas(DocCollection c, String collection) { if (c == null) return 0; // support for when collection hasn't been created yet int cnt = 0;