SOLR-7117: AutoAddReplicas should have a cluster wide property for controlling number of cores hosted on each node

This commit is contained in:
Varun Thacker 2016-05-08 23:50:18 +05:30
parent 2bc376c7f5
commit 8e999d1a96
4 changed files with 88 additions and 51 deletions

View File

@ -99,7 +99,10 @@ New Features
* SOLR-9026: Extend facet telemetry support to legacy (non-json) facets under "debug/facet-debug" in * SOLR-9026: Extend facet telemetry support to legacy (non-json) facets under "debug/facet-debug" in
the response. (Michael Sun, yonik) the response. (Michael Sun, yonik)
* SOLR-7117: Provide an option to limit the maximum number of cores that can be created on a node by the
Auto Add Replica feature. For this you can set a "maxCoresPerNode" property via the Cluster Property API
(Varun Thacker, Mark Miller)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -229,7 +229,8 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
private boolean addReplica(final String collection, DownReplica badReplica) { private boolean addReplica(final String collection, DownReplica badReplica) {
// first find best home - first strategy, sort by number of cores // first find best home - first strategy, sort by number of cores
// hosted where maxCoresPerNode is not violated // hosted where maxCoresPerNode is not violated
final String createUrl = getBestCreateUrl(zkStateReader, badReplica); final Integer maxCoreCount = (Integer) zkStateReader.getClusterProps().get(ZkStateReader.MAX_CORES_PER_NODE);
final String createUrl = getBestCreateUrl(zkStateReader, badReplica, maxCoreCount);
if (createUrl == null) { if (createUrl == null) {
log.warn("Could not find a node to create new replica on."); log.warn("Could not find a node to create new replica on.");
return false; return false;
@ -301,15 +302,16 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
* @return the best node to replace the badReplica on or null if there is no * @return the best node to replace the badReplica on or null if there is no
* such node * such node
*/ */
static String getBestCreateUrl(ZkStateReader zkStateReader, DownReplica badReplica) { static String getBestCreateUrl(ZkStateReader zkStateReader, DownReplica badReplica, Integer maxCoreCount) {
assert badReplica != null; assert badReplica != null;
assert badReplica.collection != null; assert badReplica.collection != null;
assert badReplica.slice != null; assert badReplica.slice != null;
log.debug("getBestCreateUrl for " + badReplica.replica); log.debug("getBestCreateUrl for " + badReplica.replica);
Map<String,Counts> counts = new HashMap<String, Counts>(); Map<String,Counts> counts = new HashMap<>();
Set<String> unsuitableHosts = new HashSet<String>(); Set<String> unsuitableHosts = new HashSet<>();
Set<String> liveNodes = new HashSet<>(zkStateReader.getClusterState().getLiveNodes()); Set<String> liveNodes = new HashSet<>(zkStateReader.getClusterState().getLiveNodes());
Map<String, Integer> coresPerNode = new HashMap<>();
ClusterState clusterState = zkStateReader.getClusterState(); ClusterState clusterState = zkStateReader.getClusterState();
if (clusterState != null) { if (clusterState != null) {
@ -329,8 +331,13 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
for (Replica replica : replicas) { for (Replica replica : replicas) {
liveNodes.remove(replica.getNodeName()); liveNodes.remove(replica.getNodeName());
String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP); String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
if (baseUrl.equals( if (coresPerNode.containsKey(baseUrl)) {
badReplica.replica.getStr(ZkStateReader.BASE_URL_PROP))) { Integer nodeCount = coresPerNode.get(baseUrl);
coresPerNode.put(baseUrl, nodeCount++);
} else {
coresPerNode.put(baseUrl, 1);
}
if (baseUrl.equals(badReplica.replica.getStr(ZkStateReader.BASE_URL_PROP))) {
continue; continue;
} }
// on a live node? // on a live node?
@ -351,16 +358,15 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
if (badReplica.collection.getName().equals(collection) && badReplica.slice.getName().equals(slice.getName())) { if (badReplica.collection.getName().equals(collection) && badReplica.slice.getName().equals(slice.getName())) {
cnt.ourReplicas++; cnt.ourReplicas++;
} }
// TODO: this is collection wide and we want to take into
// account cluster wide - use new cluster sys prop
Integer maxShardsPerNode = badReplica.collection.getMaxShardsPerNode(); Integer maxShardsPerNode = badReplica.collection.getMaxShardsPerNode();
if (maxShardsPerNode == null) { if (maxShardsPerNode == null) {
log.warn("maxShardsPerNode is not defined for collection, name=" + badReplica.collection.getName()); log.warn("maxShardsPerNode is not defined for collection, name=" + badReplica.collection.getName());
maxShardsPerNode = Integer.MAX_VALUE; maxShardsPerNode = Integer.MAX_VALUE;
} }
log.debug("collection={} node={} max shards per node={} potential hosts={}", collection, baseUrl, maxShardsPerNode, cnt); log.debug("collection={} node={} maxShardsPerNode={} maxCoresPerNode={} potential hosts={}",
collection, baseUrl, maxShardsPerNode, maxCoreCount, cnt);
Collection<Replica> badSliceReplicas = null; Collection<Replica> badSliceReplicas = null;
DocCollection c = clusterState.getCollection(badReplica.collection.getName()); DocCollection c = clusterState.getCollection(badReplica.collection.getName());
if (c != null) { if (c != null) {
@ -370,7 +376,8 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
} }
} }
boolean alreadyExistsOnNode = replicaAlreadyExistsOnNode(zkStateReader.getClusterState(), badSliceReplicas, badReplica, baseUrl); boolean alreadyExistsOnNode = replicaAlreadyExistsOnNode(zkStateReader.getClusterState(), badSliceReplicas, badReplica, baseUrl);
if (unsuitableHosts.contains(baseUrl) || alreadyExistsOnNode || cnt.collectionShardsOnNode >= maxShardsPerNode) { if (unsuitableHosts.contains(baseUrl) || alreadyExistsOnNode || cnt.collectionShardsOnNode >= maxShardsPerNode
|| (maxCoreCount != null && coresPerNode.get(baseUrl) >= maxCoreCount) ) {
counts.remove(baseUrl); counts.remove(baseUrl);
unsuitableHosts.add(baseUrl); unsuitableHosts.add(baseUrl);
log.debug("not a candidate node, collection={} node={} max shards per node={} good replicas={}", collection, baseUrl, maxShardsPerNode, cnt); log.debug("not a candidate node, collection={} node={} max shards per node={} good replicas={}", collection, baseUrl, maxShardsPerNode, cnt);

View File

@ -68,7 +68,7 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
@Before @Before
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
results = new ArrayList<Result>(); results = new ArrayList<>();
} }
@After @After
@ -82,27 +82,27 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
@Test @Test
public void testGetBestCreateUrlBasics() { public void testGetBestCreateUrlBasics() {
Result result = buildClusterState("csr1R*r2", NODE1); Result result = buildClusterState("csr1R*r2", NODE1);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull("Should be no live node to failover to", createUrl); assertNull("Should be no live node to failover to", createUrl);
result = buildClusterState("csr1R*r2", NODE1, NODE2); result = buildClusterState("csr1R*r2", NODE1, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull("Only failover candidate node already has a replica", createUrl); assertNull("Only failover candidate node already has a replica", createUrl);
result = buildClusterState("csr1R*r2sr3", NODE1, NODE2, NODE3); result = buildClusterState("csr1R*r2sr3", NODE1, NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals("Node3 does not have a replica from the bad slice and should be the best choice", NODE3_URL, createUrl); assertEquals("Node3 does not have a replica from the bad slice and should be the best choice", NODE3_URL, createUrl);
result = buildClusterState("csr1R*r2-4sr3r4r5", NODE1, NODE2, NODE3); result = buildClusterState("csr1R*r2Fsr3r4r5", NODE1, NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertTrue(createUrl.equals(NODE2_URL) || createUrl.equals(NODE3_URL)); assertTrue(createUrl.equals(NODE3_URL));
result = buildClusterState("csr1*r2r3sr3r3sr4", NODE1, NODE2, NODE3, NODE4); result = buildClusterState("csr1*r2r3sr3r3sr4", NODE1, NODE2, NODE3, NODE4);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE4_URL, createUrl); assertEquals(NODE4_URL, createUrl);
result = buildClusterState("csr1*r2sr3r3sr4sr4", NODE1, NODE2, NODE3, NODE4); result = buildClusterState("csr1*r2sr3r3sr4sr4", NODE1, NODE2, NODE3, NODE4);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertTrue(createUrl.equals(NODE3_URL) || createUrl.equals(NODE4_URL)); assertTrue(createUrl.equals(NODE3_URL) || createUrl.equals(NODE4_URL));
} }
@ -121,27 +121,27 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
public void testGetBestCreateUrlMultipleCollections() throws Exception { public void testGetBestCreateUrlMultipleCollections() throws Exception {
Result result = buildClusterState("csr*r2csr2", NODE1); Result result = buildClusterState("csr*r2csr2", NODE1);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(null, createUrl); assertNull(createUrl);
result = buildClusterState("csr*r2csr2", NODE1); result = buildClusterState("csr*r2csr2", NODE1);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(null, createUrl); assertNull(createUrl);
result = buildClusterState("csr*r2csr2", NODE1, NODE2); result = buildClusterState("csr*r2csr2", NODE1, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(null, createUrl); assertNull(createUrl);
} }
@Test @Test
public void testGetBestCreateUrlMultipleCollections2() { public void testGetBestCreateUrlMultipleCollections2() {
Result result = buildClusterState("csr*r2sr3cr2", NODE1); Result result = buildClusterState("csr*r2sr3cr2", NODE1);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(null, createUrl); assertNull(createUrl);
result = buildClusterState("csr*r2sr3cr2", NODE1, NODE2, NODE3); result = buildClusterState("csr*r2sr3cr2", NODE1, NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE3_URL, createUrl); assertEquals(NODE3_URL, createUrl);
} }
@ -149,49 +149,74 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
@Test @Test
public void testGetBestCreateUrlMultipleCollections3() { public void testGetBestCreateUrlMultipleCollections3() {
Result result = buildClusterState("csr5r1sr4r2sr3r6csr2*r6sr5r3sr4r3", NODE1, NODE4, NODE5, NODE6); Result result = buildClusterState("csr5r1sr4r2sr3r6csr2*r6sr5r3sr4r3", NODE1, NODE4, NODE5, NODE6);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE1_URL, createUrl); assertEquals(NODE1_URL, createUrl);
} }
@Test @Test
public void testGetBestCreateUrlMultipleCollections4() { public void testGetBestCreateUrlMultipleCollections4() {
Result result = buildClusterState("csr1r4sr3r5sr2r6csr5r6sr4r6sr5*r4", NODE6); Result result = buildClusterState("csr1r4sr3r5sr2r6csr5r6sr4r6sr5*r4", NODE6);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE6_URL, createUrl); assertEquals(NODE6_URL, createUrl);
} }
@Test @Test
public void testFailOverToEmptySolrInstance() { public void testFailOverToEmptySolrInstance() {
Result result = buildClusterState("csr1*r1sr1csr1", NODE2); Result result = buildClusterState("csr1*r1sr1csr1", NODE2);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE2_URL, createUrl); assertEquals(NODE2_URL, createUrl);
} }
@Test @Test
public void testFavorForeignSlices() { public void testFavorForeignSlices() {
Result result = buildClusterState("csr*sr2csr3r3", NODE2, NODE3); Result result = buildClusterState("csr*sr2csr3r3", NODE2, NODE3);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE3_URL, createUrl); assertEquals(NODE3_URL, createUrl);
result = buildClusterState("csr*sr2csr3r3r3r3r3r3r3", NODE2, NODE3); result = buildClusterState("csr*sr2csr3r3r3r3r3r3r3", NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE2_URL, createUrl); assertEquals(NODE2_URL, createUrl);
} }
@Test @Test
public void testCollectionMaxNodesPerShard() { public void testCollectionMaxNodesPerShard() {
Result result = buildClusterState("csr*sr2", 1, 1, NODE2); Result result = buildClusterState("csr*sr2", 1, 1, NODE2);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(null, createUrl); assertNull(createUrl);
result = buildClusterState("csr*sr2", 1, 2, NODE2); result = buildClusterState("csr*sr2", 1, 2, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE2_URL, createUrl); assertEquals(NODE2_URL, createUrl);
result = buildClusterState("csr*csr2r2", 1, 1, NODE2); result = buildClusterState("csr*csr2r2", 1, 1, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica); createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE2_URL, createUrl); assertEquals(NODE2_URL, createUrl);
} }
@Test
public void testMaxCoresPerNode() {
Result result = buildClusterState("csr*sr2", 1, 1, NODE2);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 1);
assertNull(createUrl);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2);
assertNull(createUrl);
result = buildClusterState("csr*sr2", 1, 2, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2);
assertEquals(NODE2_URL, createUrl);
result = buildClusterState("csr*sr2sr3sr4", 1, 1, NODE2, NODE3, NODE4);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 1);
assertNull(createUrl);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2);
assertNull(createUrl);
result = buildClusterState("csr*sr2sr3sr4", 1, 2, NODE2, NODE3, NODE4);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2);
assertTrue(createUrl.equals(NODE3_URL) || createUrl.equals(NODE4_URL));
}
private Result buildClusterState(String string, String ... liveNodes) { private Result buildClusterState(String string, String ... liveNodes) {
return buildClusterState(string, 1, liveNodes); return buildClusterState(string, 1, liveNodes);
@ -351,7 +376,7 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
// trunk briefly had clusterstate taking a zkreader :( this was required to work around that - leaving // trunk briefly had clusterstate taking a zkreader :( this was required to work around that - leaving
// until that issue is resolved. // until that issue is resolved.
MockZkStateReader reader = new MockZkStateReader(null, collectionStates.keySet()); MockZkStateReader reader = new MockZkStateReader(null, collectionStates.keySet());
ClusterState clusterState = new ClusterState(1, new HashSet<String>(Arrays.asList(liveNodes)), collectionStates); ClusterState clusterState = new ClusterState(1, new HashSet<>(Arrays.asList(liveNodes)), collectionStates);
reader = new MockZkStateReader(clusterState, collectionStates.keySet()); reader = new MockZkStateReader(clusterState, collectionStates.keySet());
String json; String json;

View File

@ -88,6 +88,7 @@ public class ZkStateReader implements Closeable {
public static final String REPLICATION_FACTOR = "replicationFactor"; public static final String REPLICATION_FACTOR = "replicationFactor";
public static final String MAX_SHARDS_PER_NODE = "maxShardsPerNode"; public static final String MAX_SHARDS_PER_NODE = "maxShardsPerNode";
public static final String AUTO_ADD_REPLICAS = "autoAddReplicas"; public static final String AUTO_ADD_REPLICAS = "autoAddReplicas";
public static final String MAX_CORES_PER_NODE = "maxCoresPerNode";
public static final String ROLES = "/roles.json"; public static final String ROLES = "/roles.json";
@ -137,7 +138,8 @@ public class ZkStateReader implements Closeable {
LEGACY_CLOUD, LEGACY_CLOUD,
URL_SCHEME, URL_SCHEME,
AUTO_ADD_REPLICAS, AUTO_ADD_REPLICAS,
BACKUP_LOCATION))); BACKUP_LOCATION,
MAX_CORES_PER_NODE)));
/** /**
* Returns config set name for collection. * Returns config set name for collection.