SOLR-7117: AutoAddReplicas should have a cluster wide property for controlling number of cores hosted on each node

This commit is contained in:
Varun Thacker 2016-05-08 23:50:18 +05:30
parent 2bc376c7f5
commit 8e999d1a96
4 changed files with 88 additions and 51 deletions

View File

@ -99,7 +99,10 @@ New Features
* SOLR-9026: Extend facet telemetry support to legacy (non-json) facets under "debug/facet-debug" in
the response. (Michael Sun, yonik)
* SOLR-7117: Provide an option to limit the maximum number of cores that can be created on a node by the
Auto Add Replica feature. For this you can set a "maxCoresPerNode" property via the Cluster Property API
(Varun Thacker, Mark Miller)
Bug Fixes
----------------------

View File

@ -229,7 +229,8 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
private boolean addReplica(final String collection, DownReplica badReplica) {
// first find best home - first strategy, sort by number of cores
// hosted where maxCoresPerNode is not violated
final String createUrl = getBestCreateUrl(zkStateReader, badReplica);
final Integer maxCoreCount = (Integer) zkStateReader.getClusterProps().get(ZkStateReader.MAX_CORES_PER_NODE);
final String createUrl = getBestCreateUrl(zkStateReader, badReplica, maxCoreCount);
if (createUrl == null) {
log.warn("Could not find a node to create new replica on.");
return false;
@ -301,15 +302,16 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
* @return the best node to replace the badReplica on or null if there is no
* such node
*/
static String getBestCreateUrl(ZkStateReader zkStateReader, DownReplica badReplica) {
static String getBestCreateUrl(ZkStateReader zkStateReader, DownReplica badReplica, Integer maxCoreCount) {
assert badReplica != null;
assert badReplica.collection != null;
assert badReplica.slice != null;
log.debug("getBestCreateUrl for " + badReplica.replica);
Map<String,Counts> counts = new HashMap<String, Counts>();
Set<String> unsuitableHosts = new HashSet<String>();
Map<String,Counts> counts = new HashMap<>();
Set<String> unsuitableHosts = new HashSet<>();
Set<String> liveNodes = new HashSet<>(zkStateReader.getClusterState().getLiveNodes());
Map<String, Integer> coresPerNode = new HashMap<>();
ClusterState clusterState = zkStateReader.getClusterState();
if (clusterState != null) {
@ -329,8 +331,13 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
for (Replica replica : replicas) {
liveNodes.remove(replica.getNodeName());
String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
if (baseUrl.equals(
badReplica.replica.getStr(ZkStateReader.BASE_URL_PROP))) {
if (coresPerNode.containsKey(baseUrl)) {
Integer nodeCount = coresPerNode.get(baseUrl);
coresPerNode.put(baseUrl, nodeCount++);
} else {
coresPerNode.put(baseUrl, 1);
}
if (baseUrl.equals(badReplica.replica.getStr(ZkStateReader.BASE_URL_PROP))) {
continue;
}
// on a live node?
@ -351,16 +358,15 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
if (badReplica.collection.getName().equals(collection) && badReplica.slice.getName().equals(slice.getName())) {
cnt.ourReplicas++;
}
// TODO: this is collection wide and we want to take into
// account cluster wide - use new cluster sys prop
Integer maxShardsPerNode = badReplica.collection.getMaxShardsPerNode();
if (maxShardsPerNode == null) {
log.warn("maxShardsPerNode is not defined for collection, name=" + badReplica.collection.getName());
maxShardsPerNode = Integer.MAX_VALUE;
}
log.debug("collection={} node={} max shards per node={} potential hosts={}", collection, baseUrl, maxShardsPerNode, cnt);
log.debug("collection={} node={} maxShardsPerNode={} maxCoresPerNode={} potential hosts={}",
collection, baseUrl, maxShardsPerNode, maxCoreCount, cnt);
Collection<Replica> badSliceReplicas = null;
DocCollection c = clusterState.getCollection(badReplica.collection.getName());
if (c != null) {
@ -370,7 +376,8 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
}
}
boolean alreadyExistsOnNode = replicaAlreadyExistsOnNode(zkStateReader.getClusterState(), badSliceReplicas, badReplica, baseUrl);
if (unsuitableHosts.contains(baseUrl) || alreadyExistsOnNode || cnt.collectionShardsOnNode >= maxShardsPerNode) {
if (unsuitableHosts.contains(baseUrl) || alreadyExistsOnNode || cnt.collectionShardsOnNode >= maxShardsPerNode
|| (maxCoreCount != null && coresPerNode.get(baseUrl) >= maxCoreCount) ) {
counts.remove(baseUrl);
unsuitableHosts.add(baseUrl);
log.debug("not a candidate node, collection={} node={} max shards per node={} good replicas={}", collection, baseUrl, maxShardsPerNode, cnt);

View File

@ -68,7 +68,7 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
@Before
public void setUp() throws Exception {
super.setUp();
results = new ArrayList<Result>();
results = new ArrayList<>();
}
@After
@ -82,27 +82,27 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
@Test
public void testGetBestCreateUrlBasics() {
Result result = buildClusterState("csr1R*r2", NODE1);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull("Should be no live node to failover to", createUrl);
result = buildClusterState("csr1R*r2", NODE1, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull("Only failover candidate node already has a replica", createUrl);
result = buildClusterState("csr1R*r2sr3", NODE1, NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals("Node3 does not have a replica from the bad slice and should be the best choice", NODE3_URL, createUrl);
result = buildClusterState("csr1R*r2-4sr3r4r5", NODE1, NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
assertTrue(createUrl.equals(NODE2_URL) || createUrl.equals(NODE3_URL));
result = buildClusterState("csr1R*r2Fsr3r4r5", NODE1, NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertTrue(createUrl.equals(NODE3_URL));
result = buildClusterState("csr1*r2r3sr3r3sr4", NODE1, NODE2, NODE3, NODE4);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE4_URL, createUrl);
result = buildClusterState("csr1*r2sr3r3sr4sr4", NODE1, NODE2, NODE3, NODE4);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertTrue(createUrl.equals(NODE3_URL) || createUrl.equals(NODE4_URL));
}
@ -121,27 +121,27 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
public void testGetBestCreateUrlMultipleCollections() throws Exception {
Result result = buildClusterState("csr*r2csr2", NODE1);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
assertEquals(null, createUrl);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull(createUrl);
result = buildClusterState("csr*r2csr2", NODE1);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
assertEquals(null, createUrl);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull(createUrl);
result = buildClusterState("csr*r2csr2", NODE1, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
assertEquals(null, createUrl);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull(createUrl);
}
@Test
public void testGetBestCreateUrlMultipleCollections2() {
Result result = buildClusterState("csr*r2sr3cr2", NODE1);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
assertEquals(null, createUrl);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull(createUrl);
result = buildClusterState("csr*r2sr3cr2", NODE1, NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE3_URL, createUrl);
}
@ -149,49 +149,74 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
@Test
public void testGetBestCreateUrlMultipleCollections3() {
Result result = buildClusterState("csr5r1sr4r2sr3r6csr2*r6sr5r3sr4r3", NODE1, NODE4, NODE5, NODE6);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE1_URL, createUrl);
}
@Test
public void testGetBestCreateUrlMultipleCollections4() {
Result result = buildClusterState("csr1r4sr3r5sr2r6csr5r6sr4r6sr5*r4", NODE6);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE6_URL, createUrl);
}
@Test
public void testFailOverToEmptySolrInstance() {
Result result = buildClusterState("csr1*r1sr1csr1", NODE2);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE2_URL, createUrl);
}
@Test
public void testFavorForeignSlices() {
Result result = buildClusterState("csr*sr2csr3r3", NODE2, NODE3);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE3_URL, createUrl);
result = buildClusterState("csr*sr2csr3r3r3r3r3r3r3", NODE2, NODE3);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE2_URL, createUrl);
}
@Test
public void testCollectionMaxNodesPerShard() {
Result result = buildClusterState("csr*sr2", 1, 1, NODE2);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
assertEquals(null, createUrl);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertNull(createUrl);
result = buildClusterState("csr*sr2", 1, 2, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE2_URL, createUrl);
result = buildClusterState("csr*csr2r2", 1, 1, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, null);
assertEquals(NODE2_URL, createUrl);
}
@Test
public void testMaxCoresPerNode() {
Result result = buildClusterState("csr*sr2", 1, 1, NODE2);
String createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 1);
assertNull(createUrl);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2);
assertNull(createUrl);
result = buildClusterState("csr*sr2", 1, 2, NODE2);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2);
assertEquals(NODE2_URL, createUrl);
result = buildClusterState("csr*sr2sr3sr4", 1, 1, NODE2, NODE3, NODE4);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 1);
assertNull(createUrl);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2);
assertNull(createUrl);
result = buildClusterState("csr*sr2sr3sr4", 1, 2, NODE2, NODE3, NODE4);
createUrl = OverseerAutoReplicaFailoverThread.getBestCreateUrl(result.reader, result.badReplica, 2);
assertTrue(createUrl.equals(NODE3_URL) || createUrl.equals(NODE4_URL));
}
private Result buildClusterState(String string, String ... liveNodes) {
return buildClusterState(string, 1, liveNodes);
@ -351,7 +376,7 @@ public class SharedFSAutoReplicaFailoverUtilsTest extends SolrTestCaseJ4 {
// trunk briefly had clusterstate taking a zkreader :( this was required to work around that - leaving
// until that issue is resolved.
MockZkStateReader reader = new MockZkStateReader(null, collectionStates.keySet());
ClusterState clusterState = new ClusterState(1, new HashSet<String>(Arrays.asList(liveNodes)), collectionStates);
ClusterState clusterState = new ClusterState(1, new HashSet<>(Arrays.asList(liveNodes)), collectionStates);
reader = new MockZkStateReader(clusterState, collectionStates.keySet());
String json;

View File

@ -88,6 +88,7 @@ public class ZkStateReader implements Closeable {
public static final String REPLICATION_FACTOR = "replicationFactor";
public static final String MAX_SHARDS_PER_NODE = "maxShardsPerNode";
public static final String AUTO_ADD_REPLICAS = "autoAddReplicas";
public static final String MAX_CORES_PER_NODE = "maxCoresPerNode";
public static final String ROLES = "/roles.json";
@ -137,7 +138,8 @@ public class ZkStateReader implements Closeable {
LEGACY_CLOUD,
URL_SCHEME,
AUTO_ADD_REPLICAS,
BACKUP_LOCATION)));
BACKUP_LOCATION,
MAX_CORES_PER_NODE)));
/**
* Returns config set name for collection.