mirror of https://github.com/apache/lucene.git
SOLR-9527: Improve distribution of replicas when restoring a collection
This commit is contained in:
parent
4cc20d27cd
commit
b1efd37ba7
|
@ -335,6 +335,10 @@ Bug Fixes
|
|||
* SOLR-10630: HttpSolrCall.getAuthCtx().new AuthorizationContext() {...}.getParams()
|
||||
sometimes throws java.lang.NullPointerException (hu xiaodong via shalin)
|
||||
|
||||
* SOLR-9527: Improve distribution of replicas when restoring a collection
|
||||
(Hrishikesh Gadre, Stephen Lewis, Rohit, Varun Thacker)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.solr.common.cloud.ZkConfigManager;
|
|||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.CollectionAdminParams;
|
||||
import org.apache.solr.common.params.CollectionParams.CollectionAction;
|
||||
import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
|
||||
|
@ -104,9 +105,9 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
|
|||
public static final String NUM_SLICES = "numShards";
|
||||
|
||||
static final boolean CREATE_NODE_SET_SHUFFLE_DEFAULT = true;
|
||||
public static final String CREATE_NODE_SET_SHUFFLE = "createNodeSet.shuffle";
|
||||
public static final String CREATE_NODE_SET_SHUFFLE = CollectionAdminParams.CREATE_NODE_SET_SHUFFLE_PARAM;
|
||||
public static final String CREATE_NODE_SET_EMPTY = "EMPTY";
|
||||
public static final String CREATE_NODE_SET = "createNodeSet";
|
||||
public static final String CREATE_NODE_SET = CollectionAdminParams.CREATE_NODE_SET_PARAM;
|
||||
|
||||
public static final String ROUTER = "router";
|
||||
|
||||
|
|
|
@ -20,16 +20,22 @@ package org.apache.solr.cloud;
|
|||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
import org.apache.solr.cloud.rule.ReplicaAssigner;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.ImplicitDocRouter;
|
||||
|
@ -53,6 +59,7 @@ import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_PROPS;
|
|||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.RANDOM;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.SHARDS_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
|
||||
|
@ -60,7 +67,8 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.CR
|
|||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
|
||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
|
||||
|
||||
public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
@ -74,6 +82,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
@Override
|
||||
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
|
||||
// TODO maybe we can inherit createCollection's options/code
|
||||
|
||||
String restoreCollectionName = message.getStr(COLLECTION_PROP);
|
||||
String backupName = message.getStr(NAME); // of backup
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
|
@ -93,6 +102,22 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP);
|
||||
DocCollection backupCollectionState = backupMgr.readCollectionState(location, backupName, backupCollection);
|
||||
|
||||
// Get the Solr nodes to restore a collection.
|
||||
final List<String> nodeList = OverseerCollectionMessageHandler.getLiveOrLiveAndCreateNodeSetList(
|
||||
zkStateReader.getClusterState().getLiveNodes(), message, RANDOM);
|
||||
|
||||
int numShards = backupCollectionState.getActiveSlices().size();
|
||||
int repFactor = message.getInt(REPLICATION_FACTOR, backupCollectionState.getReplicationFactor());
|
||||
int maxShardsPerNode = message.getInt(MAX_SHARDS_PER_NODE, backupCollectionState.getMaxShardsPerNode());
|
||||
int availableNodeCount = nodeList.size();
|
||||
if ((numShards * repFactor) > (availableNodeCount * maxShardsPerNode)) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST,
|
||||
String.format(Locale.ROOT, "Solr cloud with available number of nodes:%d is insufficient for"
|
||||
+ " restoring a collection with %d shards, replication factor %d and maxShardsPerNode %d."
|
||||
+ " Consider increasing maxShardsPerNode value OR number of available nodes.",
|
||||
availableNodeCount, numShards, repFactor, maxShardsPerNode));
|
||||
}
|
||||
|
||||
//Upload the configs
|
||||
String configName = (String) properties.get(COLL_CONF);
|
||||
String restoreConfigName = message.getStr(COLL_CONF, configName);
|
||||
|
@ -168,9 +193,16 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
}
|
||||
|
||||
// TODO how do we leverage the CREATE_NODE_SET / RULE / SNITCH logic in createCollection?
|
||||
// TODO how do we leverage the RULE / SNITCH logic in createCollection?
|
||||
|
||||
ClusterState clusterState = zkStateReader.getClusterState();
|
||||
|
||||
List<String> sliceNames = new ArrayList<>();
|
||||
restoreCollection.getSlices().forEach(x -> sliceNames.add(x.getName()));
|
||||
|
||||
Map<ReplicaAssigner.Position, String> positionVsNodes = ocmh.identifyNodes(clusterState, nodeList,
|
||||
message, sliceNames, repFactor);
|
||||
|
||||
//Create one replica per shard and copy backed up data to it
|
||||
for (Slice slice : restoreCollection.getSlices()) {
|
||||
log.debug("Adding replica for shard={} collection={} ", slice.getName(), restoreCollection);
|
||||
|
@ -178,6 +210,19 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD);
|
||||
propMap.put(COLLECTION_PROP, restoreCollectionName);
|
||||
propMap.put(SHARD_ID_PROP, slice.getName());
|
||||
|
||||
// Get the first node matching the shard to restore in
|
||||
String node;
|
||||
for (Map.Entry<ReplicaAssigner.Position, String> pvn : positionVsNodes.entrySet()) {
|
||||
ReplicaAssigner.Position position = pvn.getKey();
|
||||
if (position.shard == slice.getName()) {
|
||||
node = pvn.getValue();
|
||||
propMap.put(CoreAdminParams.NODE, node);
|
||||
positionVsNodes.remove(position);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// add async param
|
||||
if (asyncId != null) {
|
||||
propMap.put(ASYNC, asyncId);
|
||||
|
@ -227,6 +272,19 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
HashMap<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(COLLECTION_PROP, restoreCollectionName);
|
||||
propMap.put(SHARD_ID_PROP, slice.getName());
|
||||
|
||||
// Get the first node matching the shard to restore in
|
||||
String node;
|
||||
for (Map.Entry<ReplicaAssigner.Position, String> pvn : positionVsNodes.entrySet()) {
|
||||
ReplicaAssigner.Position position = pvn.getKey();
|
||||
if (position.shard == slice.getName()) {
|
||||
node = pvn.getValue();
|
||||
propMap.put(CoreAdminParams.NODE, node);
|
||||
positionVsNodes.remove(position);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// add async param
|
||||
if (asyncId != null) {
|
||||
propMap.put(ASYNC, asyncId);
|
||||
|
|
|
@ -101,6 +101,7 @@ import static org.apache.solr.cloud.Overseer.QUEUE_OPERATION;
|
|||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_PROP_PREFIX;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES;
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.ONLY_ACTIVE_NODES;
|
||||
|
@ -814,10 +815,19 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex);
|
||||
}
|
||||
|
||||
String createNodeArg = req.getParams().get(CREATE_NODE_SET);
|
||||
if (CREATE_NODE_SET_EMPTY.equals(createNodeArg)) {
|
||||
throw new SolrException(
|
||||
SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Cannot restore with a CREATE_NODE_SET of CREATE_NODE_SET_EMPTY."
|
||||
);
|
||||
}
|
||||
|
||||
Map<String, Object> params = req.getParams().getAll(null, NAME, COLLECTION_PROP);
|
||||
params.put(CoreAdminParams.BACKUP_LOCATION, location);
|
||||
// from CREATE_OP:
|
||||
req.getParams().getAll(params, COLL_CONF, REPLICATION_FACTOR, MAX_SHARDS_PER_NODE, STATE_FORMAT, AUTO_ADD_REPLICAS);
|
||||
req.getParams().getAll(params, COLL_CONF, REPLICATION_FACTOR, MAX_SHARDS_PER_NODE, STATE_FORMAT,
|
||||
AUTO_ADD_REPLICAS, CREATE_NODE_SET, CREATE_NODE_SET_SHUFFLE);
|
||||
copyPropertiesWithPrefix(req.getParams(), params, COLL_PROP_PREFIX);
|
||||
return params;
|
||||
}),
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.solr.cloud;
|
|||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
@ -28,6 +30,7 @@ import java.util.TreeMap;
|
|||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest.ClusterProp;
|
||||
|
@ -236,9 +239,24 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
// may need to increase maxShardsPerNode (e.g. if it was shard split, then now we need more)
|
||||
restore.setMaxShardsPerNode(origShardToDocCount.size());
|
||||
}
|
||||
|
||||
if (rarely()) { // Try with createNodeSet configuration
|
||||
int nodeSetSize = cluster.getJettySolrRunners().size() / 2;
|
||||
List<String> nodeStrs = new ArrayList<>(nodeSetSize);
|
||||
Iterator<JettySolrRunner> iter = cluster.getJettySolrRunners().iterator();
|
||||
for (int i = 0; i < nodeSetSize ; i++) {
|
||||
nodeStrs.add(iter.next().getNodeName());
|
||||
}
|
||||
restore.setCreateNodeSet(String.join(",", nodeStrs));
|
||||
restore.setCreateNodeSetShuffle(usually());
|
||||
// we need to double maxShardsPerNode value since we reduced number of available nodes by half.
|
||||
restore.setMaxShardsPerNode(origShardToDocCount.size() * 2);
|
||||
}
|
||||
|
||||
Properties props = new Properties();
|
||||
props.setProperty("customKey", "customVal");
|
||||
restore.setProperties(props);
|
||||
|
||||
if (sameConfig==false) {
|
||||
restore.setConfigName("customConfigName");
|
||||
}
|
||||
|
@ -268,6 +286,15 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
assertEquals(sameConfig ? "conf1" : "customConfigName",
|
||||
cluster.getSolrClient().getZkStateReader().readConfigName(restoreCollectionName));
|
||||
|
||||
Map<String, Integer> numReplicasByNodeName = new HashMap<>();
|
||||
restoreCollection.getReplicas().forEach(x -> {
|
||||
numReplicasByNodeName.put(x.getNodeName(), numReplicasByNodeName.getOrDefault(x.getNodeName(), 0) + 1);
|
||||
});
|
||||
numReplicasByNodeName.forEach((k, v) -> {
|
||||
assertTrue("Node " + k + " has " + v + " replicas. Expected num replicas : " + restoreCollection.getMaxShardsPerNode() ,
|
||||
v <= restoreCollection.getMaxShardsPerNode());
|
||||
});
|
||||
|
||||
// assert added core properties:
|
||||
// DWS: did via manual inspection.
|
||||
// TODO Find the applicable core.properties on the file system but how?
|
||||
|
|
|
@ -178,6 +178,11 @@ public class TestSolrCloudSnapshots extends SolrCloudTestCase {
|
|||
{
|
||||
CollectionAdminRequest.Restore restore = CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
|
||||
.setLocation(backupLocation);
|
||||
if (replicaFailures) {
|
||||
// In this case one of the Solr servers would be down. Hence we need to increase
|
||||
// max_shards_per_node property for restore command to succeed.
|
||||
restore.setMaxShardsPerNode(2);
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
assertEquals(0, restore.process(solrClient).getStatus());
|
||||
} else {
|
||||
|
|
|
@ -46,6 +46,8 @@ import org.apache.solr.common.util.ContentStream;
|
|||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.COUNT_PROP;
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.CREATE_NODE_SET_PARAM;
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.CREATE_NODE_SET_SHUFFLE_PARAM;
|
||||
|
||||
/**
|
||||
* This class is experimental and subject to change.
|
||||
|
@ -490,7 +492,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
if (configName != null)
|
||||
params.set("collection.configName", configName);
|
||||
if (createNodeSet != null)
|
||||
params.set("createNodeSet", createNodeSet);
|
||||
params.set(CREATE_NODE_SET_PARAM, createNodeSet);
|
||||
if (numShards != null) {
|
||||
params.set( ZkStateReader.NUM_SHARDS_PROP, numShards);
|
||||
}
|
||||
|
@ -852,6 +854,8 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
protected Integer maxShardsPerNode;
|
||||
protected Integer replicationFactor;
|
||||
protected Boolean autoAddReplicas;
|
||||
protected Optional<String> createNodeSet = Optional.empty();
|
||||
protected Optional<Boolean> createNodeSetShuffle = Optional.empty();
|
||||
protected Properties properties;
|
||||
|
||||
public Restore(String collection, String backupName) {
|
||||
|
@ -889,6 +893,22 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
return this;
|
||||
}
|
||||
|
||||
public void setCreateNodeSet(String createNodeSet) {
|
||||
this.createNodeSet = Optional.of(createNodeSet);
|
||||
}
|
||||
|
||||
public Optional<String> getCreateNodeSet() {
|
||||
return createNodeSet;
|
||||
}
|
||||
|
||||
public Optional<Boolean> getCreateNodeSetShuffle() {
|
||||
return createNodeSetShuffle;
|
||||
}
|
||||
|
||||
public void setCreateNodeSetShuffle(boolean createNodeSetShuffle) {
|
||||
this.createNodeSetShuffle = Optional.of(createNodeSetShuffle);
|
||||
}
|
||||
|
||||
// Collection creation params in common:
|
||||
public Restore setConfigName(String config) { this.configName = config; return this; }
|
||||
public String getConfigName() { return configName; }
|
||||
|
@ -907,7 +927,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
}
|
||||
public Restore setProperties(Properties properties) { this.properties = properties; return this;}
|
||||
|
||||
// TODO support createNodeSet, rule, snitch
|
||||
// TODO support rule, snitch
|
||||
|
||||
@Override
|
||||
public SolrParams getParams() {
|
||||
|
@ -931,6 +951,12 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
if (repositoryName.isPresent()) {
|
||||
params.set(CoreAdminParams.BACKUP_REPOSITORY, repositoryName.get());
|
||||
}
|
||||
if (createNodeSet.isPresent()) {
|
||||
params.set(CREATE_NODE_SET_PARAM, createNodeSet.get());
|
||||
}
|
||||
if (createNodeSetShuffle.isPresent()) {
|
||||
params.set(CREATE_NODE_SET_SHUFFLE_PARAM, createNodeSetShuffle.get());
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
@ -1111,7 +1137,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
public SolrParams getParams() {
|
||||
ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
|
||||
if (nodeSet != null) {
|
||||
params.set("createNodeSet", nodeSet);
|
||||
params.set(CREATE_NODE_SET_PARAM, nodeSet);
|
||||
}
|
||||
if (properties != null) {
|
||||
addProperties(params, properties);
|
||||
|
|
|
@ -28,6 +28,16 @@ public interface CollectionAdminParams {
|
|||
|
||||
String COUNT_PROP = "count";
|
||||
|
||||
/**
|
||||
* A parameter to specify list of Solr nodes to be used (e.g. for collection creation or restore operation).
|
||||
*/
|
||||
public static final String CREATE_NODE_SET_PARAM = "createNodeSet";
|
||||
|
||||
/**
|
||||
* A parameter which specifies if the provided list of Solr nodes (via {@linkplain #CREATE_NODE_SET_PARAM})
|
||||
* should be shuffled before being used.
|
||||
*/
|
||||
public static final String CREATE_NODE_SET_SHUFFLE_PARAM = "createNodeSet.shuffle";
|
||||
|
||||
/**
|
||||
* A parameter to specify the name of the index backup strategy to be used.
|
||||
|
|
Loading…
Reference in New Issue