diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b3a4601fa48..3374b8b35ab 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -128,7 +128,10 @@ New Features * SOLR-9026: Extend facet telemetry support to legacy (non-json) facets under "debug/facet-debug" in the response. (Michael Sun, yonik) - + +* SOLR-7117: Provide an option to limit the maximum number of cores that can be created on a node by the + Auto Add Replica feature. For this you can set a "maxCoresPerNode" property via the Cluster Property API + (Varun Thacker, Mark Miller) Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java index bbb45aacafd..c3571e366ed 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java @@ -229,7 +229,8 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable { private boolean addReplica(final String collection, DownReplica badReplica) { // first find best home - first strategy, sort by number of cores // hosted where maxCoresPerNode is not violated - final String createUrl = getBestCreateUrl(zkStateReader, badReplica); + final Integer maxCoreCount = (Integer) zkStateReader.getClusterProps().get(ZkStateReader.MAX_CORES_PER_NODE); + final String createUrl = getBestCreateUrl(zkStateReader, badReplica, maxCoreCount); if (createUrl == null) { log.warn("Could not find a node to create new replica on."); return false; @@ -301,15 +302,16 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable { * @return the best node to replace the badReplica on or null if there is no * such node */ - static String getBestCreateUrl(ZkStateReader zkStateReader, DownReplica badReplica) { + static String getBestCreateUrl(ZkStateReader zkStateReader, DownReplica badReplica, Integer maxCoreCount) { assert badReplica != null; assert badReplica.collection != null; assert badReplica.slice != null; log.debug("getBestCreateUrl for " + badReplica.replica); - Map counts = new HashMap(); - Set unsuitableHosts = new HashSet(); + Map counts = new HashMap<>(); + Set unsuitableHosts = new HashSet<>(); Set liveNodes = new HashSet<>(zkStateReader.getClusterState().getLiveNodes()); + Map coresPerNode = new HashMap<>(); ClusterState clusterState = zkStateReader.getClusterState(); if (clusterState != null) { @@ -329,8 +331,13 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable { for (Replica replica : replicas) { liveNodes.remove(replica.getNodeName()); String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP); - if (baseUrl.equals( - badReplica.replica.getStr(ZkStateReader.BASE_URL_PROP))) { + if (coresPerNode.containsKey(baseUrl)) { + Integer nodeCount = coresPerNode.get(baseUrl); + coresPerNode.put(baseUrl, nodeCount++); + } else { + coresPerNode.put(baseUrl, 1); + } + if (baseUrl.equals(badReplica.replica.getStr(ZkStateReader.BASE_URL_PROP))) { continue; } // on a live node? @@ -351,16 +358,15 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable { if (badReplica.collection.getName().equals(collection) && badReplica.slice.getName().equals(slice.getName())) { cnt.ourReplicas++; } - - // TODO: this is collection wide and we want to take into - // account cluster wide - use new cluster sys prop + Integer maxShardsPerNode = badReplica.collection.getMaxShardsPerNode(); if (maxShardsPerNode == null) { log.warn("maxShardsPerNode is not defined for collection, name=" + badReplica.collection.getName()); maxShardsPerNode = Integer.MAX_VALUE; } - log.debug("collection={} node={} max shards per node={} potential hosts={}", collection, baseUrl, maxShardsPerNode, cnt); - + log.debug("collection={} node={} maxShardsPerNode={} maxCoresPerNode={} potential hosts={}", + collection, baseUrl, maxShardsPerNode, maxCoreCount, cnt); + Collection badSliceReplicas = null; DocCollection c = clusterState.getCollection(badReplica.collection.getName()); if (c != null) { @@ -370,7 +376,8 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable { } } boolean alreadyExistsOnNode = replicaAlreadyExistsOnNode(zkStateReader.getClusterState(), badSliceReplicas, badReplica, baseUrl); - if (unsuitableHosts.contains(baseUrl) || alreadyExistsOnNode || cnt.collectionShardsOnNode >= maxShardsPerNode) { + if (unsuitableHosts.contains(baseUrl) || alreadyExistsOnNode || cnt.collectionShardsOnNode >= maxShardsPerNode + || (maxCoreCount != null && coresPerNode.get(baseUrl) >= maxCoreCount) ) { counts.remove(baseUrl); unsuitableHosts.add(baseUrl); log.debug("not a candidate node, collection={} node={} max shards per node={} good replicas={}", collection, baseUrl, maxShardsPerNode, cnt); diff --git a/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverUtilsTest.java b/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverUtilsTest.java index e108ee50ec3..f5fee2174e3 100644 --- a/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverUtilsTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverUtilsTest.java @@ -68,7 +68,7 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 { @Before public void setUp() throws Exception { super.setUp(); - results = new ArrayList(); + results = new ArrayList<>(); } @After @@ -82,27 +82,27 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 { @Test public void testGetBestCreateUrlBasics() { Result result = buildClusterState("csr1R*r2", NODE1); - String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertNull("Should be no live node to failover to", createUrl); result = buildClusterState("csr1R*r2", NODE1, NODE2); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertNull("Only failover candidate node already has a replica", createUrl); result = buildClusterState("csr1R*r2sr3", NODE1, NODE2, NODE3); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals("Node3 does not have a replica from the bad slice and should be the best choice", NODE3_URL, createUrl); - - result = buildClusterState("csr1R*r2-4sr3r4r5", NODE1, NODE2, NODE3); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); - assertTrue(createUrl.equals(NODE2_URL) || createUrl.equals(NODE3_URL)); - + + result = buildClusterState("csr1R*r2Fsr3r4r5", NODE1, NODE2, NODE3); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); + assertTrue(createUrl.equals(NODE3_URL)); + result = buildClusterState("csr1*r2r3sr3r3sr4", NODE1, NODE2, NODE3, NODE4); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE4_URL, createUrl); result = buildClusterState("csr1*r2sr3r3sr4sr4", NODE1, NODE2, NODE3, NODE4); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertTrue(createUrl.equals(NODE3_URL) || createUrl.equals(NODE4_URL)); } @@ -121,27 +121,27 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 { public void testGetBestCreateUrlMultipleCollections() throws Exception { Result result = buildClusterState("csr*r2csr2", NODE1); - String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); - assertEquals(null, createUrl); - + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); + assertNull(createUrl); + result = buildClusterState("csr*r2csr2", NODE1); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); - assertEquals(null, createUrl); - + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); + assertNull(createUrl); + result = buildClusterState("csr*r2csr2", NODE1, NODE2); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); - assertEquals(null, createUrl); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); + assertNull(createUrl); } - + @Test public void testGetBestCreateUrlMultipleCollections2() { Result result = buildClusterState("csr*r2sr3cr2", NODE1); - String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); - assertEquals(null, createUrl); - + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); + assertNull(createUrl); + result = buildClusterState("csr*r2sr3cr2", NODE1, NODE2, NODE3); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE3_URL, createUrl); } @@ -149,49 +149,74 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 { @Test public void testGetBestCreateUrlMultipleCollections3() { Result result = buildClusterState("csr5r1sr4r2sr3r6csr2*r6sr5r3sr4r3", NODE1, NODE4, NODE5, NODE6); - String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE1_URL, createUrl); } @Test public void testGetBestCreateUrlMultipleCollections4() { Result result = buildClusterState("csr1r4sr3r5sr2r6csr5r6sr4r6sr5*r4", NODE6); - String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE6_URL, createUrl); } @Test public void testFailOverToEmptySolrInstance() { Result result = buildClusterState("csr1*r1sr1csr1", NODE2); - String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE2_URL, createUrl); } @Test public void testFavorForeignSlices() { Result result = buildClusterState("csr*sr2csr3r3", NODE2, NODE3); - String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE3_URL, createUrl); result = buildClusterState("csr*sr2csr3r3r3r3r3r3r3", NODE2, NODE3); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE2_URL, createUrl); } - + @Test public void testCollectionMaxNodesPerShard() { Result result = buildClusterState("csr*sr2", 1, 1, NODE2); - String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); - assertEquals(null, createUrl); - + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); + assertNull(createUrl); + result = buildClusterState("csr*sr2", 1, 2, NODE2); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE2_URL, createUrl); - + result = buildClusterState("csr*csr2r2", 1, 1, NODE2); - createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null); assertEquals(NODE2_URL, createUrl); } + + @Test + public void testMaxCoresPerNode() { + Result result = buildClusterState("csr*sr2", 1, 1, NODE2); + String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 1); + assertNull(createUrl); + + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2); + assertNull(createUrl); + + result = buildClusterState("csr*sr2", 1, 2, NODE2); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2); + assertEquals(NODE2_URL, createUrl); + + result = buildClusterState("csr*sr2sr3sr4", 1, 1, NODE2, NODE3, NODE4); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 1); + assertNull(createUrl); + + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2); + assertNull(createUrl); + + result = buildClusterState("csr*sr2sr3sr4", 1, 2, NODE2, NODE3, NODE4); + createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2); + assertTrue(createUrl.equals(NODE3_URL) || createUrl.equals(NODE4_URL)); + } private Result buildClusterState(String string, String ... liveNodes) { return buildClusterState(string, 1, liveNodes); @@ -351,7 +376,7 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 { // trunk briefly had clusterstate taking a zkreader :( this was required to work around that - leaving // until that issue is resolved. MockZkStateReader reader = new MockZkStateReader(null, collectionStates.keySet()); - ClusterState clusterState = new ClusterState(1, new HashSet(Arrays.asList(liveNodes)), collectionStates); + ClusterState clusterState = new ClusterState(1, new HashSet<>(Arrays.asList(liveNodes)), collectionStates); reader = new MockZkStateReader(clusterState, collectionStates.keySet()); String json; diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index eafad8438ad..ab031b0ab21 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -88,6 +88,7 @@ public class ZkStateReader implements Closeable { public static final String REPLICATION_FACTOR = "replicationFactor"; public static final String MAX_SHARDS_PER_NODE = "maxShardsPerNode"; public static final String AUTO_ADD_REPLICAS = "autoAddReplicas"; + public static final String MAX_CORES_PER_NODE = "maxCoresPerNode"; public static final String ROLES = "/roles.json"; @@ -137,7 +138,8 @@ public class ZkStateReader implements Closeable { LEGACY_CLOUD, URL_SCHEME, AUTO_ADD_REPLICAS, - BACKUP_LOCATION))); + BACKUP_LOCATION, + MAX_CORES_PER_NODE))); /** * Returns config set name for collection.