From 6f109e7c653a12cfc880b272f508e4b799ad6c0e Mon Sep 17 00:00:00 2001 From: Erick Erickson Date: Sat, 11 Jul 2015 04:17:49 +0000 Subject: [PATCH] SOLR-7172: addreplica API fails with incorrect error msg 'cannot create collection' git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1690341 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../java/org/apache/solr/cloud/Assign.java | 182 ++++++----- .../cloud/OverseerCollectionProcessor.java | 21 +- .../cloud/CollectionTooManyReplicasTest.java | 291 ++++++++++++++++++ .../solr/common/params/CoreAdminParams.java | 3 + 5 files changed, 404 insertions(+), 96 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 91485d86814..83b1737d3e0 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -221,6 +221,9 @@ Bug Fixes * SOLR-7132: The Collections API ADDREPLICA command property.name is not reflected in the clusterstate until after Solr restarts (Erick Erickson) +* SOLR-7172: addreplica API fails with incorrect error msg "cannot create collection" + (Erick Erickson) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/Assign.java b/solr/core/src/java/org/apache/solr/cloud/Assign.java index 5238c9fabb0..58b1a90844e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Assign.java +++ b/solr/core/src/java/org/apache/solr/cloud/Assign.java @@ -24,6 +24,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; @@ -72,10 +73,10 @@ public class Assign { return "core_node" + (max + 1); } - + /** * Assign a new unique id up to slices count - then add replicas evenly. - * + * * @return the assigned shard id */ public static String assignShard(String collection, ClusterState state, Integer numShards) { @@ -124,7 +125,7 @@ public class Assign { static String buildCoreName(DocCollection collection, String shard) { Slice slice = collection.getSlice(shard); int replicaNum = slice.getReplicas().size(); - for (;;) { + for (; ; ) { String replicaName = collection.getName() + "_" + shard + "_replica" + replicaNum; boolean exists = false; for (Replica replica : slice.getReplicas()) { @@ -139,12 +140,12 @@ public class Assign { return collection.getName() + "_" + shard + "_replica" + replicaNum; } - static class Node { + static class ReplicaCount { public final String nodeName; public int thisCollectionNodes = 0; public int totalNodes = 0; - Node(String nodeName) { + ReplicaCount(String nodeName) { this.nodeName = nodeName; } @@ -153,32 +154,112 @@ public class Assign { } } - public static List getNodesForNewShard(ClusterState clusterState, String collectionName,String shard,int numberOfNodes, - String createNodeSetStr, CoreContainer cc) { + // Only called from createShard and addReplica (so far). + // + // Gets a list of candidate nodes to put the required replica(s) on. Throws errors if not enough replicas + // could be created on live nodes given maxShardsPerNode, Replication factor (if from createShard) etc. + public static List getNodesForNewReplicas(ClusterState clusterState, String collectionName, + String shard, int numberOfNodes, + String createNodeSetStr, CoreContainer cc) { DocCollection coll = clusterState.getCollection(collectionName); Integer maxShardsPerNode = coll.getInt(MAX_SHARDS_PER_NODE, 1); - Integer repFactor = coll.getInt(REPLICATION_FACTOR, 1); - int numSlices = coll.getSlices().size(); List createNodeList = createNodeSetStr == null ? null: StrUtils.splitSmart(createNodeSetStr, ",", true); + HashMap nodeNameVsShardCount = getNodeNameVsShardCount(collectionName, clusterState, createNodeList); + + if (createNodeList == null) { // We only care if we haven't been told to put new replicas on specific nodes. + int availableSlots = 0; + for (Map.Entry ent : nodeNameVsShardCount.entrySet()) { + //ADDREPLICA can put more than maxShardsPerNode on an instnace, so this test is necessary. + if (maxShardsPerNode > ent.getValue().thisCollectionNodes) { + availableSlots += (maxShardsPerNode - ent.getValue().thisCollectionNodes); + } + } + if (availableSlots < numberOfNodes) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + String.format(Locale.ROOT, "Cannot create %d new replicas for collection %s given the current number of live nodes and a maxShardsPerNode of %d", + numberOfNodes, collectionName, maxShardsPerNode)); + } + } + + List l = (List) coll.get(DocCollection.RULE); + if (l != null) { + return getNodesViaRules(clusterState, shard, numberOfNodes, cc, coll, createNodeList, l); + } + + ArrayList sortedNodeList = new ArrayList<>(nodeNameVsShardCount.values()); + Collections.sort(sortedNodeList, new Comparator() { + @Override + public int compare(ReplicaCount x, ReplicaCount y) { + return (x.weight() < y.weight()) ? -1 : ((x.weight() == y.weight()) ? 0 : 1); + } + }); + return sortedNodeList; + + } + + private static List getNodesViaRules(ClusterState clusterState, String shard, int numberOfNodes, + CoreContainer cc, DocCollection coll, List createNodeList, List l) { + ArrayList rules = new ArrayList<>(); + for (Object o : l) rules.add(new Rule((Map) o)); + Map> shardVsNodes = new LinkedHashMap<>(); + for (Slice slice : coll.getSlices()) { + LinkedHashMap n = new LinkedHashMap<>(); + shardVsNodes.put(slice.getName(), n); + for (Replica replica : slice.getReplicas()) { + Integer count = n.get(replica.getNodeName()); + if (count == null) count = 0; + n.put(replica.getNodeName(), ++count); + } + } + List snitches = (List) coll.get(DocCollection.SNITCH); + List nodesList = createNodeList == null ? + new ArrayList<>(clusterState.getLiveNodes()) : + createNodeList; + Map positions = new ReplicaAssigner( + rules, + Collections.singletonMap(shard, numberOfNodes), + snitches, + shardVsNodes, + nodesList, cc, clusterState).getNodeMappings(); + + List repCounts = new ArrayList<>(); + for (String s : positions.values()) { + repCounts.add(new ReplicaCount(s)); + } + return repCounts; + } + + private static HashMap getNodeNameVsShardCount(String collectionName, + ClusterState clusterState, List createNodeList) { Set nodes = clusterState.getLiveNodes(); List nodeList = new ArrayList<>(nodes.size()); nodeList.addAll(nodes); if (createNodeList != null) nodeList.retainAll(createNodeList); - - HashMap nodeNameVsShardCount = new HashMap<>(); - for (String s : nodeList) nodeNameVsShardCount.put(s,new Node(s)); + HashMap nodeNameVsShardCount = new HashMap<>(); + for (String s : nodeList) { + nodeNameVsShardCount.put(s, new ReplicaCount(s)); + } + if (createNodeList != null) { // Overrides petty considerations about maxShardsPerNode + if (createNodeList.size() != nodeNameVsShardCount.size()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "At least one of the node(s) specified are not currently active, no action taken."); + } + return nodeNameVsShardCount; + } + DocCollection coll = clusterState.getCollection(collectionName); + Integer maxShardsPerNode = coll.getInt(MAX_SHARDS_PER_NODE, 1); for (String s : clusterState.getCollections()) { DocCollection c = clusterState.getCollection(s); //identify suitable nodes by checking the no:of cores in each of them for (Slice slice : c.getSlices()) { Collection replicas = slice.getReplicas(); for (Replica replica : replicas) { - Node count = nodeNameVsShardCount.get(replica.getNodeName()); + ReplicaCount count = nodeNameVsShardCount.get(replica.getNodeName()); if (count != null) { - count.totalNodes++; + count.totalNodes++; // Used ot "weigh" whether this node should be used later. if (s.equals(collectionName)) { count.thisCollectionNodes++; if (count.thisCollectionNodes >= maxShardsPerNode) nodeNameVsShardCount.remove(replica.getNodeName()); @@ -188,77 +269,8 @@ public class Assign { } } - if (nodeNameVsShardCount.size() <= 0) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cannot create collection " + collectionName - + ". No live Solr-instances" + ((createNodeList != null)?" among Solr-instances specified in " + CREATE_NODE_SET + ":" + createNodeSetStr:"")); - } - - if (repFactor > nodeNameVsShardCount.size()) { - log.warn("Specified " - + ZkStateReader.REPLICATION_FACTOR - + " of " - + repFactor - + " on collection " - + collectionName - + " is higher than or equal to the number of Solr instances currently live or part of your " + CREATE_NODE_SET + "(" - + nodeList.size() - + "). It's unusual to run two replica of the same slice on the same Solr-instance."); - } - - int maxCoresAllowedToCreate = maxShardsPerNode * nodeList.size(); - int requestedCoresToCreate = numSlices * repFactor; - int minCoresToCreate = requestedCoresToCreate; - if (maxCoresAllowedToCreate < minCoresToCreate) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cannot create shards " + collectionName + ". Value of " - + MAX_SHARDS_PER_NODE + " is " + maxShardsPerNode - + ", and the number of live nodes is " + nodeList.size() - + ". This allows a maximum of " + maxCoresAllowedToCreate - + " to be created. Value of " + NUM_SLICES + " is " + numSlices - + " and value of " + ZkStateReader.REPLICATION_FACTOR + " is " + repFactor - + ". This requires " + requestedCoresToCreate - + " shards to be created (higher than the allowed number)"); - } - - List l = (List) coll.get(DocCollection.RULE); - if(l != null) { - ArrayList rules = new ArrayList<>(); - for (Object o : l) rules.add(new Rule((Map) o)); - Map> shardVsNodes = new LinkedHashMap<>(); - for (Slice slice : coll.getSlices()) { - LinkedHashMap n = new LinkedHashMap<>(); - shardVsNodes.put(slice.getName(), n); - for (Replica replica : slice.getReplicas()) { - Integer count = n.get(replica.getNodeName()); - if(count == null) count = 0; - n.put(replica.getNodeName(),++count); - } - } - List snitches = (List) coll.get(DocCollection.SNITCH); - List nodesList = createNodeList == null ? - new ArrayList<>(clusterState.getLiveNodes()) : - createNodeList ; - Map positions = new ReplicaAssigner( - rules, - Collections.singletonMap(shard, numberOfNodes), - snitches, - shardVsNodes, - nodesList, cc, clusterState).getNodeMappings(); - - List n = new ArrayList<>(); - for (String s : positions.values()) n.add(new Node(s)); - return n; - - }else { - - ArrayList sortedNodeList = new ArrayList<>(nodeNameVsShardCount.values()); - Collections.sort(sortedNodeList, new Comparator() { - @Override - public int compare(Node x, Node y) { - return (x.weight() < y.weight()) ? -1 : ((x.weight() == y.weight()) ? 0 : 1); - } - }); - return sortedNodeList; - } + return nodeNameVsShardCount; } + } diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java index 3647e7352cd..8df05cba430 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java @@ -44,7 +44,7 @@ import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.CoreAdminRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.UpdateResponse; -import org.apache.solr.cloud.Assign.Node; +import org.apache.solr.cloud.Assign.ReplicaCount; import org.apache.solr.cloud.DistributedQueue.QueueEvent; import org.apache.solr.cloud.Overseer.LeaderStatus; import org.apache.solr.cloud.overseer.ClusterStateMutator; @@ -95,7 +95,7 @@ import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.solr.cloud.Assign.getNodesForNewShard; +import static org.apache.solr.cloud.Assign.getNodesForNewReplicas; import static org.apache.solr.common.cloud.DocCollection.SNITCH; import static org.apache.solr.common.cloud.ZkNodeProps.makeMap; import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP; @@ -1285,7 +1285,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable { DocCollection collection = clusterState.getCollection(collectionName); int repFactor = message.getInt(REPLICATION_FACTOR, collection.getInt(REPLICATION_FACTOR, 1)); String createNodeSetStr = message.getStr(CREATE_NODE_SET); - List sortedNodeList = getNodesForNewShard(clusterState, collectionName, sliceName, repFactor, + List sortedNodeList = getNodesForNewReplicas(clusterState, collectionName, sliceName, repFactor, createNodeSetStr, overseer.getZkController().getCoreContainer()); Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(message)); @@ -2490,7 +2490,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable { private void addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException { String collection = message.getStr(COLLECTION_PROP); - String node = message.getStr("node"); + String node = message.getStr(CoreAdminParams.NODE); String shard = message.getStr(SHARD_ID_PROP); String coreName = message.getStr(CoreAdminParams.NAME); if (StringUtils.isBlank(coreName)) { @@ -2508,13 +2508,12 @@ public class OverseerCollectionProcessor implements Runnable, Closeable { "Collection: " + collection + " shard: " + shard + " does not exist"); } ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); - - if (node == null) { - node = getNodesForNewShard(clusterState, collection, shard, 1, null, - overseer.getZkController().getCoreContainer()).get(0).nodeName; - log.info("Node not provided, Identified {} for creating new replica", node); - } - + + // Kind of unnecessary, but it does put the logic of whether to override maxShardsPerNode in one place. + node = getNodesForNewReplicas(clusterState, collection, shard, 1, node, + overseer.getZkController().getCoreContainer()).get(0).nodeName; + log.info("Node not provided, Identified {} for creating new replica", node); + if (!clusterState.liveNodesContain(node)) { throw new SolrException(ErrorCode.BAD_REQUEST, "Node: " + node + " is not live"); } diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java new file mode 100644 index 00000000000..d711d5d51b1 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java @@ -0,0 +1,291 @@ +package org.apache.solr.cloud; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.util.NamedList; +import org.apache.zookeeper.KeeperException; +import org.junit.Test; + +@Slow +public class CollectionTooManyReplicasTest extends AbstractFullDistribZkTestBase { + + public CollectionTooManyReplicasTest() { + sliceCount = 1; + } + + @Test + @ShardsFixed(num = 1) + public void testAddTooManyReplicas() throws Exception { + String collectionName = "TooManyReplicasInSeveralFlavors"; + CollectionAdminRequest.Create create = new CollectionAdminRequest.Create() + .setCollectionName(collectionName) + .setNumShards(2) + .setReplicationFactor(1) + .setMaxShardsPerNode(2) + .setStateFormat(2); + + CollectionAdminResponse response = create.process(cloudClient); + assertEquals(0, response.getStatus()); + assertTrue(response.isSuccess()); + // Now I have the fixed Jetty plus the control instnace, I have two replicas, one for each shard + + // Curiously, I should be able to add a bunch of replicas if I specify the node, even more than maxShardsPerNode + // Just get the first node any way we can. + // Get a node to use for the "node" parameter. + + String nodeName = getAllNodeNames(collectionName).get(0); + // Add a replica using the "node" parameter (no "too many replicas check") + // this node should have 2 replicas on it + CollectionAdminRequest.AddReplica addReplicaNode = new CollectionAdminRequest.AddReplica() + .setCollectionName(collectionName) + .setShardName("shard1") + .setNode(nodeName); + response = addReplicaNode.process(cloudClient); + assertEquals(0, response.getStatus()); + + // Three replicas so far, should be able to create another one "normally" + CollectionAdminRequest.AddReplica addReplica = new CollectionAdminRequest.AddReplica() + .setCollectionName(collectionName) + .setShardName("shard1"); + + response = addReplica.process(cloudClient); + assertEquals(0, response.getStatus()); + + // This one should fail though, no "node" parameter specified + try { + addReplica.process(cloudClient); + fail("Should have thrown an error because the nodes are full"); + } catch (HttpSolrClient.RemoteSolrException se) { + assertTrue("Should have gotten the right error message back", + se.getMessage().contains("given the current number of live nodes and a maxShardsPerNode of")); + } + + // Oddly, we should succeed next just because setting property.name will not check for nodes being "full up" + Properties props = new Properties(); + props.setProperty("name", "bogus2"); + addReplicaNode.setProperties(props); + response = addReplicaNode.process(cloudClient); + assertEquals(0, response.getStatus()); + + ZkStateReader zkStateReader = getCommonCloudSolrClient().getZkStateReader(); + zkStateReader.updateClusterState(true); + Slice slice = zkStateReader.getClusterState().getSlicesMap(collectionName).get("shard1"); + + Replica rep = null; + for (Replica rep1 : slice.getReplicas()) { // Silly compiler + if (rep1.get("core").equals("bogus2")) { + rep = rep1; + break; + } + } + assertNotNull("Should have found a replica named 'bogus2'", rep); + assertEquals("Replica should have been put on correct core", nodeName, rep.getNodeName()); + + // Shard1 should have 4 replicas + assertEquals("There should be 4 replicas for shard 1", 4, slice.getReplicas().size()); + + // And let's fail one more time because to insure that the math doesn't do weird stuff it we have more replicas + // than simple calcs would indicate. + try { + addReplica.process(cloudClient); + fail("Should have thrown an error because the nodes are full"); + } catch (HttpSolrClient.RemoteSolrException se) { + assertTrue("Should have gotten the right error message back", + se.getMessage().contains("given the current number of live nodes and a maxShardsPerNode of")); + } + } + + @Test + @ShardsFixed(num = 2) + public void testAddShard() throws Exception { + String collectionName = "TooManyReplicasWhenAddingShards"; + CollectionAdminRequest.Create create = new CollectionAdminRequest.Create() + .setCollectionName(collectionName) + .setReplicationFactor(2) + .setMaxShardsPerNode(2) + .setStateFormat(2) + .setRouterName("implicit") + .setShards("shardstart"); + + NamedList request = create.process(cloudClient).getResponse(); + + assertTrue("Could not create the collection", request.get("success") != null); + // We have two nodes, maxShardsPerNode is set to 2. Therefore, we should be able to add 2 shards each with + // two replicas, but fail on the third. + + CollectionAdminRequest.CreateShard createShard = new CollectionAdminRequest.CreateShard() + .setCollectionName(collectionName) + .setShardName("shard1"); + CollectionAdminResponse resp = createShard.process(cloudClient); + assertEquals(0, resp.getStatus()); + + // Now we should have one replica on each Jetty, add another to reach maxShardsPerNode + + createShard = new CollectionAdminRequest.CreateShard() + .setCollectionName(collectionName) + .setShardName("shard2"); + resp = createShard.process(cloudClient); + assertEquals(0, resp.getStatus()); + + + // Now fail to add the third as it should exceed maxShardsPerNode + createShard = new CollectionAdminRequest.CreateShard() + .setCollectionName(collectionName) + .setShardName("shard3"); + try { + createShard.process(cloudClient); + fail("Should have exceeded the max number of replicas allowed"); + } catch (HttpSolrClient.RemoteSolrException se) { + assertTrue("Should have gotten the right error message back", + se.getMessage().contains("given the current number of live nodes and a maxShardsPerNode of")); + } + + // Hmmm, providing a nodeset also overrides the checks for max replicas, so prove it. + List nodes = getAllNodeNames(collectionName); + + createShard = new CollectionAdminRequest.CreateShard() + .setCollectionName(collectionName) + .setShardName("shard4") + .setNodeSet(StringUtils.join(nodes, ",")); + resp = createShard.process(cloudClient); + assertEquals(0, resp.getStatus()); + + // And just for yucks, insure we fail the "regular" one again. + createShard = new CollectionAdminRequest.CreateShard() + .setCollectionName(collectionName) + .setShardName("shard5"); + try { + createShard.process(cloudClient); + fail("Should have exceeded the max number of replicas allowed"); + } catch (HttpSolrClient.RemoteSolrException se) { + assertTrue("Should have gotten the right error message back", + se.getMessage().contains("given the current number of live nodes and a maxShardsPerNode of")); + } + + // And finally, insure that there are all the replcias we expect. We should have shards 1, 2 and 4 and each + // should have exactly two replicas + ZkStateReader zkStateReader = getCommonCloudSolrClient().getZkStateReader(); + zkStateReader.updateClusterState(true); + Map slices = zkStateReader.getClusterState().getSlicesMap(collectionName); + assertEquals("There should be exaclty four slices", slices.size(), 4); + assertNotNull("shardstart should exist", slices.get("shardstart")); + assertNotNull("shard1 should exist", slices.get("shard1")); + assertNotNull("shard2 should exist", slices.get("shard2")); + assertNotNull("shard4 should exist", slices.get("shard4")); + assertEquals("Shardstart should have exactly 2 replicas", 2, slices.get("shardstart").getReplicas().size()); + assertEquals("Shard1 should have exactly 2 replicas", 2, slices.get("shard1").getReplicas().size()); + assertEquals("Shard2 should have exactly 2 replicas", 2, slices.get("shard2").getReplicas().size()); + assertEquals("Shard4 should have exactly 2 replicas", 2, slices.get("shard4").getReplicas().size()); + + } + + @Test + @ShardsFixed(num = 2) + public void testDownedShards() throws Exception { + String collectionName = "TooManyReplicasWhenAddingDownedNode"; + CollectionAdminRequest.Create create = new CollectionAdminRequest.Create() + .setCollectionName(collectionName) + .setReplicationFactor(1) + .setMaxShardsPerNode(2) + .setStateFormat(2) + .setRouterName("implicit") + .setShards("shardstart"); + + NamedList request = create.process(cloudClient).getResponse(); + + assertTrue("Could not create the collection", request.get("success") != null); + try (SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), + AbstractZkTestCase.TIMEOUT)) { + + List liveNodes = zkClient.getChildren("/live_nodes", null, true); + + // Shut down a Jetty, I really don't care which + JettySolrRunner downJetty = jettys.get(r.nextInt(2)); + + downJetty.stop(); + List liveNodesNow = null; + for (int idx = 0; idx < 150; ++idx) { + liveNodesNow = zkClient.getChildren("/live_nodes", null, true); + if (liveNodesNow.size() != liveNodes.size()) break; + Thread.sleep(100); + } + List deadNodes = new ArrayList<>(liveNodes); + assertTrue("Should be a downed node", deadNodes.removeAll(liveNodesNow)); + liveNodes.removeAll(deadNodes); + + //OK, we've killed a node. Insure we get errors when we ask to create a replica or shard that involves it. + // First try adding a replica to the downed node. + CollectionAdminRequest.AddReplica addReplicaNode = new CollectionAdminRequest.AddReplica() + .setCollectionName(collectionName) + .setShardName("shardstart") + .setNode(deadNodes.get(0)); + + try { + addReplicaNode.process(cloudClient); + fail("Should have gotten an exception"); + } catch (HttpSolrClient.RemoteSolrException se) { + assertTrue("Should have gotten a message about shard not ", + se.getMessage().contains("At least one of the node(s) specified are not currently active, no action taken.")); + } + + // Should also die if we just add a shard + CollectionAdminRequest.CreateShard createShard = new CollectionAdminRequest.CreateShard() + .setCollectionName(collectionName) + .setShardName("shard1") + .setNodeSet(deadNodes.get(0)); + try { + createShard.process(cloudClient); + fail("Should have gotten an exception"); + } catch (HttpSolrClient.RemoteSolrException se) { + assertTrue("Should have gotten a message about shard not ", + se.getMessage().contains("At least one of the node(s) specified are not currently active, no action taken.")); + } + //downJetty.start(); + } + } + + private List getAllNodeNames(String collectionName) throws KeeperException, InterruptedException { + ZkStateReader zkStateReader = getCommonCloudSolrClient().getZkStateReader(); + zkStateReader.updateClusterState(true); + Slice slice = zkStateReader.getClusterState().getSlicesMap(collectionName).get("shard1"); + + List nodes = new ArrayList<>(); + for (Replica rep : slice.getReplicas()) { + nodes.add(rep.getNodeName()); + } + + assertTrue("Should have some nodes!", nodes.size() > 0); + return nodes; + } + +} diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java index bf6cabb2eb0..011ecbc690f 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java @@ -116,6 +116,9 @@ public abstract class CoreAdminParams public static final String TRANSIENT = "transient"; + // Node to create a replica on for ADDREPLICA at least. + public static final String NODE = "node"; + public enum CoreAdminAction { STATUS, LOAD,