SOLR-12648: Autoscaling framework based replica placement is not used unless a policy is specified or non-empty cluster policy exists

This commit is contained in:
Shalin Shekhar Mangar 2018-10-03 15:52:51 +05:30
parent e687748405
commit 65105aa81b
7 changed files with 308 additions and 16 deletions

View File

@ -128,6 +128,9 @@ Bug Fixes
* SOLR-12776: Setting of TMP in solr.cmd causes invisibility of Solr to JDK tools (Petr Bodnar via Erick Erickson)
* SOLR-12648: Autoscaling framework based replica placement is not used unless a policy is specified or
non-empty cluster policy exists. (shalin)
================== 7.5.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -23,10 +23,13 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState;
@ -45,8 +48,8 @@ import org.slf4j.LoggerFactory;
public class CloudUtil {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* See if coreNodeName has been taken over by another baseUrl and unload core
* + throw exception if it has been.
@ -57,26 +60,26 @@ public class CloudUtil {
ZkController zkController = cc.getZkController();
String thisCnn = zkController.getCoreNodeName(desc);
String thisBaseUrl = zkController.getBaseUrl();
log.debug("checkSharedFSFailoverReplaced running for coreNodeName={} baseUrl={}", thisCnn, thisBaseUrl);
// if we see our core node name on a different base url, unload
final DocCollection docCollection = zkController.getClusterState().getCollectionOrNull(desc.getCloudDescriptor().getCollectionName());
final DocCollection docCollection = zkController.getClusterState().getCollectionOrNull(desc.getCloudDescriptor().getCollectionName());
if (docCollection != null && docCollection.getSlicesMap() != null) {
Map<String,Slice> slicesMap = docCollection.getSlicesMap();
for (Slice slice : slicesMap.values()) {
for (Replica replica : slice.getReplicas()) {
String cnn = replica.getName();
String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
log.debug("compare against coreNodeName={} baseUrl={}", cnn, baseUrl);
if (thisCnn != null && thisCnn.equals(cnn)
&& !thisBaseUrl.equals(baseUrl)) {
if (cc.getLoadedCoreNames().contains(desc.getName())) {
cc.unload(desc.getName());
}
try {
FileUtils.deleteDirectory(desc.getInstanceDir().toFile());
} catch (IOException e) {
@ -144,9 +147,37 @@ public class CloudUtil {
}
/**
* <b>Note:</b> where possible, the {@link #usePolicyFramework(DocCollection, SolrCloudManager)} method should
* be used instead of this method
*
* @return true if autoscaling policy framework should be used for replica placement
*/
public static boolean usePolicyFramework(SolrCloudManager cloudManager) throws IOException, InterruptedException {
Objects.requireNonNull(cloudManager, "The SolrCloudManager instance cannot be null");
return usePolicyFramework(Optional.empty(), cloudManager);
}
/**
* @return true if auto scaling policy framework should be used for replica placement
* for this collection, otherwise false
*/
public static boolean usePolicyFramework(DocCollection collection, SolrCloudManager cloudManager)
throws IOException, InterruptedException {
Objects.requireNonNull(collection, "The DocCollection instance cannot be null");
Objects.requireNonNull(cloudManager, "The SolrCloudManager instance cannot be null");
return usePolicyFramework(Optional.of(collection), cloudManager);
}
private static boolean usePolicyFramework(Optional<DocCollection> collection, SolrCloudManager cloudManager) throws IOException, InterruptedException {
AutoScalingConfig autoScalingConfig = cloudManager.getDistribStateManager().getAutoScalingConfig();
return !autoScalingConfig.getPolicy().getClusterPolicy().isEmpty() || collection.getPolicyName() != null;
// if no autoscaling configuration exists then obviously we cannot use the policy framework
if (autoScalingConfig.getPolicy().isEmpty()) return false;
// do custom preferences exist
if (!autoScalingConfig.getPolicy().getClusterPreferences().equals(Policy.DEFAULT_PREFERENCES)) return true;
// does a cluster policy exist
if (!autoScalingConfig.getPolicy().getClusterPolicy().isEmpty()) return true;
// finally we check if the current collection has a policy
return !collection.isPresent() || collection.get().getPolicyName() != null;
}
}

View File

@ -41,6 +41,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.rule.ReplicaAssigner;
import org.apache.solr.cloud.rule.Rule;
import org.apache.solr.common.SolrException;
@ -583,7 +584,7 @@ public class Assign {
AutoScalingConfig autoScalingConfig = solrCloudManager.getDistribStateManager().getAutoScalingConfig();
StrategyType strategyType = null;
if ((ruleMaps == null || ruleMaps.isEmpty()) && policyName == null && autoScalingConfig.getPolicy().getClusterPolicy().isEmpty()) {
if ((ruleMaps == null || ruleMaps.isEmpty()) && !CloudUtil.usePolicyFramework(collection, solrCloudManager)) {
strategyType = StrategyType.LEGACY;
} else if (ruleMaps != null && !ruleMaps.isEmpty()) {
strategyType = StrategyType.RULES;

View File

@ -36,7 +36,6 @@ import java.util.concurrent.atomic.AtomicReference;
import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
import org.apache.solr.client.solrj.cloud.autoscaling.NotEmptyException;
import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
@ -134,8 +133,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
String router = message.getStr("router.name", DocRouter.DEFAULT_NAME);
String policy = message.getStr(Policy.POLICY);
AutoScalingConfig autoScalingConfig = ocmh.cloudManager.getDistribStateManager().getAutoScalingConfig();
boolean usePolicyFramework = !autoScalingConfig.getPolicy().getClusterPolicy().isEmpty() || policy != null;
boolean usePolicyFramework = CloudUtil.usePolicyFramework(ocmh.cloudManager) || policy != null;
// fail fast if parameters are wrong or incomplete
List<String> shardNames = populateShardNames(message, router);

View File

@ -94,7 +94,15 @@ public class Policy implements MapWriter {
final List<Pair<String, Type>> params;
final List<String> perReplicaAttributes;
final int zkVersion;
/**
* True if cluster policy, preferences and custom policies are all non-existent
*/
final boolean empty;
/**
* True if cluster preferences was originally empty, false otherwise. It is used to figure out if
* the current preferences were implicitly added or not.
*/
final boolean emptyPreferences;
public Policy() {
this(Collections.emptyMap());
@ -115,7 +123,8 @@ public class Policy implements MapWriter {
Preference preference = initialClusterPreferences.get(i);
preference.next = initialClusterPreferences.get(i + 1);
}
if (initialClusterPreferences.isEmpty()) {
emptyPreferences = initialClusterPreferences.isEmpty();
if (emptyPreferences) {
initialClusterPreferences.addAll(DEFAULT_PREFERENCES);
}
this.clusterPreferences = Collections.unmodifiableList(initialClusterPreferences);
@ -162,7 +171,8 @@ public class Policy implements MapWriter {
this.zkVersion = version;
this.policies = policies != null ? Collections.unmodifiableMap(policies) : Collections.emptyMap();
this.clusterPolicy = clusterPolicy != null ? Collections.unmodifiableList(clusterPolicy) : Collections.emptyList();
this.clusterPreferences = clusterPreferences != null ? Collections.unmodifiableList(clusterPreferences) : DEFAULT_PREFERENCES;
this.emptyPreferences = clusterPreferences == null;
this.clusterPreferences = emptyPreferences ? DEFAULT_PREFERENCES : Collections.unmodifiableList(clusterPreferences);
this.params = Collections.unmodifiableList(
buildParams(this.clusterPreferences, this.clusterPolicy, this.policies).stream()
.map(s -> new Pair<>(s, VariableBase.getTagType(s)))
@ -211,6 +221,10 @@ public class Policy implements MapWriter {
@Override
public void writeMap(EntryWriter ew) throws IOException {
// if we were initially empty then we don't want to persist any implicitly added
// policy or preferences
if (empty) return;
if (!policies.isEmpty()) {
ew.put(POLICIES, (MapWriter) ew1 -> {
for (Map.Entry<String, List<Clause>> e : policies.entrySet()) {
@ -218,7 +232,7 @@ public class Policy implements MapWriter {
}
});
}
if (!clusterPreferences.isEmpty()) {
if (!emptyPreferences && !clusterPreferences.isEmpty()) {
ew.put(CLUSTER_PREFERENCES, (IteratorWriter) iw -> {
for (Preference p : clusterPreferences) iw.add(p);
});

View File

@ -19,6 +19,7 @@ package org.apache.solr.client.solrj.cloud.autoscaling;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@ -137,4 +138,11 @@ public class Preference implements MapWriter {
public String toString() {
return Utils.toJSONString(this);
}
/**
* @return an unmodifiable copy of the original map from which this object was constructed
*/
public Map getOriginal() {
return Collections.unmodifiableMap(original);
}
}

View File

@ -21,6 +21,7 @@ package org.apache.solr.client.solrj.cloud.autoscaling;
import java.io.IOException;
import java.io.StringWriter;
import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -33,6 +34,7 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
@ -47,11 +49,15 @@ import org.apache.solr.client.solrj.cloud.autoscaling.Suggester.Hint;
import org.apache.solr.client.solrj.impl.ClusterStateProvider;
import org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.api.collections.Assign;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ReplicaPosition;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.cloud.rule.ImplicitSnitch;
import org.apache.solr.common.params.CollectionParams;
@ -71,9 +77,13 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.CLUSTER_PREFERENCES;
import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.CORES;
import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.FREEDISK;
import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.REPLICA;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_TYPE;
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOVEREPLICA;
@ -3036,5 +3046,232 @@ public class TestPolicy extends SolrTestCaseJ4 {
}
public void testAutoscalingPreferencesUsedWithNoPolicy() throws IOException, InterruptedException {
String dataproviderdata = "{" +
" 'liveNodes': [" +
" 'node1:8983'," +
" 'node2:8984'," +
" 'node3:8985'" +
" ]," +
" 'replicaInfo': {" +
" 'node1:8983': {" +
" 'c1': {" +
" 's1': [" +
" {'r1': {'type': 'NRT', 'INDEX.sizeInGB':'1100'}}," +
" {'r2': {'type': 'NRT'}}" +
" ]," +
" 's2': [" +
" {'r1': {'type': 'NRT', 'INDEX.sizeInGB':'1100'}}," +
" {'r2': {'type': 'NRT'}}" +
" ]" +
" }" +
" }" +
" }," +
" 'nodeValues': {" +
" 'node1:8983': {" +
" 'cores': 4," +
" 'freedisk': 300," +
" 'totaldisk': 4700," +
" 'port': 8983" +
" }," +
" 'node2:8984': {" +
" 'cores': 0," +
" 'freedisk': 1000," +
" 'totaldisk': 1200," +
" 'port': 8984" +
" }," +
" 'node3:8985': {" +
" 'cores': 0," +
" 'freedisk': 1651," +
" 'totaldisk': 1700," +
" 'port': 8985" +
" }" +
" }," +
" 'autoscalingJson': {" +
" 'cluster-preferences': [" +
" { 'maximize': 'freedisk'}," +
" { 'minimize': 'cores', 'precision': 3}" +
" ]" +
" }" +
"}";
String clusterState = "{\n" +
" \"c1\" : {\n" +
" \"router\":{\"name\":\"compositeId\"},\n" +
" \"maxShardsPerNode\":-1,\n" +
" \"shards\" : {\n" +
" \"s1\" : {\n" +
" \"replicas\" : {\n" +
" \"r1\" : {\n" +
" \"type\" : \"NRT\",\n" +
" \"node_name\" : \"node1:8983\",\n" +
" \"state\" : \"active\",\n" +
" \"leader\" : \"true\"\n" +
" },\n" +
" \"r2\" : {\n" +
" \"type\" : \"NRT\",\n" +
" \"node_name\" : \"node1:8983\",\n" +
" \"state\" : \"active\"\n" +
" }\n" +
" }\n" +
" },\n" +
" \"s2\" : {\n" +
" \"replicas\" : {\n" +
" \"r1\" : {\n" +
" \"type\" : \"NRT\",\n" +
" \"node_name\" : \"node1:8983\",\n" +
" \"state\" : \"active\",\n" +
" \"leader\" : \"true\"\n" +
" },\n" +
" \"r2\" : {\n" +
" \"type\" : \"NRT\",\n" +
" \"node_name\" : \"node1:8983\",\n" +
" \"state\" : \"active\"\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
Map m = (Map) Utils.fromJSONString(dataproviderdata);
Map replicaInfo = (Map) m.get("replicaInfo");
replicaInfo.forEach((node, val) -> {
Map m1 = (Map) val;
m1.forEach((coll, val2) -> {
Map m2 = (Map) val2;
m2.forEach((shard, val3) -> {
List l3 = (List) val3;
for (int i = 0; i < l3.size(); i++) {
Object o = l3.get(i);
Map m3 = (Map) o;
String name = m3.keySet().iterator().next().toString();
m3 = (Map) m3.get(name);
Replica.Type type = Replica.Type.get((String) m3.get("type"));
l3.set(i, new ReplicaInfo(name, name
, coll.toString(), shard.toString(), type, (String) node, m3));
}
});
});
});
AutoScalingConfig asc = m.containsKey("autoscalingJson") ? new AutoScalingConfig((Map<String, Object>) m.get("autoscalingJson")) : null;
DelegatingCloudManager cloudManager = new DelegatingCloudManager(null) {
@Override
public DistribStateManager getDistribStateManager() {
return new DelegatingDistribStateManager(null) {
@Override
public AutoScalingConfig getAutoScalingConfig() {
return asc;
}
};
}
@Override
public ClusterStateProvider getClusterStateProvider() {
return new DelegatingClusterStateProvider(null) {
@Override
public Set<String> getLiveNodes() {
return new HashSet<>((Collection<String>) m.get("liveNodes"));
}
@Override
public ClusterState getClusterState() throws IOException {
return ClusterState.load(0, clusterState.getBytes(Charset.forName("UTF-8")), getLiveNodes(), ZkStateReader.getCollectionPath("c1"));
}
};
}
@Override
public NodeStateProvider getNodeStateProvider() {
return new DelegatingNodeStateProvider(null) {
@Override
public Map<String, Object> getNodeValues(String node, Collection<String> tags) {
Map<String, Object> result = (Map<String, Object>) Utils.getObjectByPath(m, false, Arrays.asList("nodeValues", node));
return result == null ? Collections.emptyMap() : result;
}
@Override
public Map<String, Map<String, List<ReplicaInfo>>> getReplicaInfo(String node, Collection<String> keys) {
Map<String, Map<String, List<ReplicaInfo>>> result = (Map<String, Map<String, List<ReplicaInfo>>>) Utils.getObjectByPath(m, false, Arrays.asList("replicaInfo", node));
return result == null ? Collections.emptyMap() : result;
}
};
}
};
ZkNodeProps message = new ZkNodeProps(
Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(),
COLLECTION_PROP, "c1",
SHARD_ID_PROP, "s1",
REPLICA_TYPE, Replica.Type.NRT.toString()
);
Assign.AssignRequest assignRequest = new Assign.AssignRequestBuilder()
.forCollection("c1")
.forShard(Collections.singletonList("s1"))
.assignNrtReplicas(1)
.build();
Assign.AssignStrategyFactory assignStrategyFactory = new Assign.AssignStrategyFactory(cloudManager);
ClusterState state = cloudManager.getClusterStateProvider().getClusterState();
DocCollection collection = state.getCollection("c1");
Assign.AssignStrategy assignStrategy = assignStrategyFactory.create(state, collection);
List<ReplicaPosition> replicaPositions = assignStrategy.assign(cloudManager, assignRequest);
assertEquals(1, replicaPositions.size());
ReplicaPosition replicaPosition = replicaPositions.get(0);
assertEquals("node3:8985", replicaPosition.node); // only node3:8985 has enough space to handle the new replica
assertEquals("s1", replicaPosition.shard); // sanity check
}
/**
* Tests that an empty policy should not persist implicitly added keys to MapWriter
* <p>
* The reason behind doing this is to ensure that implicitly added cluster preferences do not ever
* go to ZooKeeper so that we can decide whether to enable autoscaling policy framework or not.
*
* @see org.apache.solr.cloud.CloudUtil#usePolicyFramework(DocCollection, SolrCloudManager)
*/
public void testPolicyMapWriterWithEmptyPreferences() throws IOException {
List<Map> defaultPreferences = Policy.DEFAULT_PREFERENCES
.stream().map(preference -> preference.getOriginal()).collect(Collectors.toList());
// first we create a completely empty policy
Policy policy = new Policy();
// sanity check that the default cluster preferences were added implicitly
assertNotNull(policy.getClusterPreferences());
// and they were the same as the default preferences
assertEquals(policy.getClusterPreferences().size(), defaultPreferences.size());
Set<String> writtenKeys = new HashSet<>();
policy.writeMap(new MapWriter.EntryWriter() {
@Override
public MapWriter.EntryWriter put(String k, Object v) throws IOException {
writtenKeys.add(k);
return this;
}
});
// but those implicitly added cluster preferences are never written by MapWriter
assertEquals(0, writtenKeys.size());
// reset
writtenKeys.clear();
// now we create a policy that only has cluster preferences which happen to be the same as the default
// preferences
policy = new Policy(Utils.makeMap(CLUSTER_PREFERENCES, defaultPreferences));
// sanity checks
assertNotNull(policy.getClusterPreferences());
assertEquals(policy.getClusterPreferences().size(), defaultPreferences.size());
policy.writeMap(new MapWriter.EntryWriter() {
@Override
public MapWriter.EntryWriter put(String k, Object v) throws IOException {
writtenKeys.add(k);
return this;
}
});
// since the user explicitly added those preferences, they should be written by MapWriter
assertEquals(1, writtenKeys.size());
assertTrue(writtenKeys.contains(CLUSTER_PREFERENCES));
}
}