mirror of https://github.com/apache/lucene.git
SOLR-12489: User specified replicationFactor and maxShardsPerNode is used when specified during a restore operation.
A user can now specify nrtReplicas/tlogReplicas/pullReplicas while restoring the collection. Specifying replicationFactor or nrtReplicas have the same effect and only one can be specified
This commit is contained in:
parent
81906d31a6
commit
3d20e8967b
|
@ -92,6 +92,9 @@ Bug Fixes
|
|||
* SOLR-11676: Keep nrtReplicas and replicationFactor in sync while creating a collection and modifying a collection
|
||||
(Varun Thacker)
|
||||
|
||||
* SOLR-12489: User specified replicationFactor and maxShardsPerNode is used when specified during a restore operation.
|
||||
A user can now specify nrtReplicas/tlogReplicas/pullReplicas while restoring the collection.
|
||||
Specifying replicationFactor or nrtReplicas have the same effect and only one can be specified (Varun Thacker)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
|
|
@ -110,14 +110,21 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
zkStateReader.getClusterState().getLiveNodes(), message, OverseerCollectionMessageHandler.RANDOM);
|
||||
|
||||
int numShards = backupCollectionState.getActiveSlices().size();
|
||||
|
||||
int numNrtReplicas = getInt(message, NRT_REPLICAS, backupCollectionState.getNumNrtReplicas(), 0);
|
||||
if (numNrtReplicas == 0) {
|
||||
numNrtReplicas = getInt(message, REPLICATION_FACTOR, backupCollectionState.getReplicationFactor(), 0);
|
||||
|
||||
int numNrtReplicas;
|
||||
if (message.get(REPLICATION_FACTOR) != null) {
|
||||
numNrtReplicas = message.getInt(REPLICATION_FACTOR, 0);
|
||||
} else if (message.get(NRT_REPLICAS) != null) {
|
||||
numNrtReplicas = message.getInt(NRT_REPLICAS, 0);
|
||||
} else {
|
||||
//replicationFactor and nrtReplicas is always in sync after SOLR-11676
|
||||
//pick from cluster state of the backed up collection
|
||||
numNrtReplicas = backupCollectionState.getReplicationFactor();
|
||||
}
|
||||
int numTlogReplicas = getInt(message, TLOG_REPLICAS, backupCollectionState.getNumTlogReplicas(), 0);
|
||||
int numPullReplicas = getInt(message, PULL_REPLICAS, backupCollectionState.getNumPullReplicas(), 0);
|
||||
int totalReplicasPerShard = numNrtReplicas + numTlogReplicas + numPullReplicas;
|
||||
assert totalReplicasPerShard > 0;
|
||||
|
||||
int maxShardsPerNode = message.getInt(MAX_SHARDS_PER_NODE, backupCollectionState.getMaxShardsPerNode());
|
||||
int availableNodeCount = nodeList.size();
|
||||
|
@ -151,11 +158,16 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
if (properties.get(STATE_FORMAT) == null) {
|
||||
propMap.put(STATE_FORMAT, "2");
|
||||
}
|
||||
propMap.put(REPLICATION_FACTOR, numNrtReplicas);
|
||||
propMap.put(NRT_REPLICAS, numNrtReplicas);
|
||||
propMap.put(TLOG_REPLICAS, numTlogReplicas);
|
||||
propMap.put(PULL_REPLICAS, numPullReplicas);
|
||||
properties.put(MAX_SHARDS_PER_NODE, maxShardsPerNode);
|
||||
|
||||
// inherit settings from input API, defaulting to the backup's setting. Ex: replicationFactor
|
||||
for (String collProp : OverseerCollectionMessageHandler.COLL_PROPS.keySet()) {
|
||||
Object val = message.getProperties().getOrDefault(collProp, backupCollectionState.get(collProp));
|
||||
if (val != null) {
|
||||
if (val != null && propMap.get(collProp) == null) {
|
||||
propMap.put(collProp, val);
|
||||
}
|
||||
}
|
||||
|
@ -308,7 +320,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
|
||||
if (totalReplicasPerShard > 1) {
|
||||
log.info("Adding replicas to restored collection={}", restoreCollection);
|
||||
log.info("Adding replicas to restored collection={}", restoreCollection.getName());
|
||||
for (Slice slice : restoreCollection.getSlices()) {
|
||||
|
||||
//Add the remaining replicas for each shard, considering it's type
|
||||
|
|
|
@ -1016,12 +1016,16 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
"Cannot restore with a CREATE_NODE_SET of CREATE_NODE_SET_EMPTY."
|
||||
);
|
||||
}
|
||||
if (req.getParams().get(NRT_REPLICAS) != null && req.getParams().get(REPLICATION_FACTOR) != null) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Cannot set both replicationFactor and nrtReplicas as they mean the same thing");
|
||||
}
|
||||
|
||||
Map<String, Object> params = copy(req.getParams(), null, NAME, COLLECTION_PROP);
|
||||
params.put(CoreAdminParams.BACKUP_LOCATION, location);
|
||||
// from CREATE_OP:
|
||||
copy(req.getParams(), params, COLL_CONF, REPLICATION_FACTOR, MAX_SHARDS_PER_NODE, STATE_FORMAT,
|
||||
AUTO_ADD_REPLICAS, CREATE_NODE_SET, CREATE_NODE_SET_SHUFFLE);
|
||||
copy(req.getParams(), params, COLL_CONF, REPLICATION_FACTOR, NRT_REPLICAS, TLOG_REPLICAS,
|
||||
PULL_REPLICAS, MAX_SHARDS_PER_NODE, STATE_FORMAT, AUTO_ADD_REPLICAS, CREATE_NODE_SET, CREATE_NODE_SET_SHUFFLE);
|
||||
copyPropertiesWithPrefix(req.getParams(), params, COLL_PROP_PREFIX);
|
||||
return params;
|
||||
}),
|
||||
|
|
|
@ -93,14 +93,15 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
replFactor = TestUtil.nextInt(random(), 1, 2);
|
||||
numTlogReplicas = TestUtil.nextInt(random(), 0, 1);
|
||||
numPullReplicas = TestUtil.nextInt(random(), 0, 1);
|
||||
int backupReplFactor = replFactor + numPullReplicas + numTlogReplicas;
|
||||
|
||||
CollectionAdminRequest.Create create = isImplicit ?
|
||||
// NOTE: use shard list with same # of shards as NUM_SHARDS; we assume this later
|
||||
CollectionAdminRequest.createCollectionWithImplicitRouter(getCollectionName(), "conf1", "shard1,shard2", replFactor, numTlogReplicas, numPullReplicas) :
|
||||
CollectionAdminRequest.createCollection(getCollectionName(), "conf1", NUM_SHARDS, replFactor, numTlogReplicas, numPullReplicas);
|
||||
|
||||
if (NUM_SHARDS * (replFactor + numTlogReplicas + numPullReplicas) > cluster.getJettySolrRunners().size() || random().nextBoolean()) {
|
||||
create.setMaxShardsPerNode((int)Math.ceil(NUM_SHARDS * (replFactor + numTlogReplicas + numPullReplicas) / cluster.getJettySolrRunners().size()));//just to assert it survives the restoration
|
||||
if (NUM_SHARDS * (backupReplFactor) > cluster.getJettySolrRunners().size() || random().nextBoolean()) {
|
||||
create.setMaxShardsPerNode((int)Math.ceil(NUM_SHARDS * backupReplFactor / (double) cluster.getJettySolrRunners().size()));//just to assert it survives the restoration
|
||||
if (doSplitShardOperation) {
|
||||
create.setMaxShardsPerNode(create.getMaxShardsPerNode() * 2);
|
||||
}
|
||||
|
@ -139,7 +140,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
solrClient.commit(getCollectionName());
|
||||
}
|
||||
|
||||
testBackupAndRestore(getCollectionName());
|
||||
testBackupAndRestore(getCollectionName(), backupReplFactor);
|
||||
testConfigBackupOnly("conf1", getCollectionName());
|
||||
testInvalidPath(getCollectionName());
|
||||
}
|
||||
|
@ -222,7 +223,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
return numDocs;
|
||||
}
|
||||
|
||||
private void testBackupAndRestore(String collectionName) throws Exception {
|
||||
private void testBackupAndRestore(String collectionName, int backupReplFactor) throws Exception {
|
||||
String backupLocation = getBackupLocation();
|
||||
String backupName = "mytestbackup";
|
||||
|
||||
|
@ -249,22 +250,43 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
String restoreCollectionName = collectionName + "_restored";
|
||||
boolean sameConfig = random().nextBoolean();
|
||||
|
||||
int restoreReplcationFactor = replFactor;
|
||||
int restoreTlogReplicas = numTlogReplicas;
|
||||
int restorePullReplicas = numPullReplicas;
|
||||
boolean setExternalReplicationFactor = false;
|
||||
if (random().nextBoolean()) { //Override replicationFactor / tLogReplicas / pullReplicas
|
||||
setExternalReplicationFactor = true;
|
||||
restoreTlogReplicas = TestUtil.nextInt(random(), 0, 1);
|
||||
restoreReplcationFactor = TestUtil.nextInt(random(), 1, 2);
|
||||
restorePullReplicas = TestUtil.nextInt(random(), 0, 1);
|
||||
}
|
||||
int numShards = backupCollection.getActiveSlices().size();
|
||||
|
||||
int restoreReplFactor = restoreReplcationFactor + restoreTlogReplicas + restorePullReplicas;
|
||||
|
||||
boolean isMaxShardsPerNodeExternal = false;
|
||||
int restoreMaxShardsPerNode = -1;
|
||||
{
|
||||
CollectionAdminRequest.Restore restore = CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
|
||||
.setLocation(backupLocation).setRepositoryName(getBackupRepoName());
|
||||
|
||||
|
||||
//explicitly specify the replicationFactor/pullReplicas/nrtReplicas/tlogReplicas .
|
||||
//Value is still the same as the original. maybe test with different values that the original for better test coverage
|
||||
if (random().nextBoolean()) {
|
||||
restore.setReplicationFactor(replFactor);
|
||||
}
|
||||
if (backupCollection.getReplicas().size() > cluster.getJettySolrRunners().size()) {
|
||||
// may need to increase maxShardsPerNode (e.g. if it was shard split, then now we need more)
|
||||
restore.setMaxShardsPerNode((int)Math.ceil(backupCollection.getReplicas().size()/cluster.getJettySolrRunners().size()));
|
||||
//explicitly specify the replicationFactor/pullReplicas/nrtReplicas/tlogReplicas.
|
||||
if (setExternalReplicationFactor) {
|
||||
restore.setReplicationFactor(restoreReplcationFactor);
|
||||
restore.setTlogReplicas(restoreTlogReplicas);
|
||||
restore.setPullReplicas(restorePullReplicas);
|
||||
}
|
||||
|
||||
|
||||
if (restoreReplFactor > backupReplFactor) { //else the backup maxShardsPerNode should be enough
|
||||
restoreMaxShardsPerNode = (int)Math.ceil((restoreReplFactor * numShards/(double) cluster.getJettySolrRunners().size()));
|
||||
log.info("numShards={} restoreReplFactor={} maxShardsPerNode={} totalNodes={}",
|
||||
numShards, restoreReplFactor, restoreMaxShardsPerNode, cluster.getJettySolrRunners().size());
|
||||
isMaxShardsPerNodeExternal = true;
|
||||
|
||||
restore.setMaxShardsPerNode(restoreMaxShardsPerNode);
|
||||
}
|
||||
|
||||
if (rarely()) { // Try with createNodeSet configuration
|
||||
int nodeSetSize = cluster.getJettySolrRunners().size() / 2;
|
||||
List<String> nodeStrs = new ArrayList<>(nodeSetSize);
|
||||
|
@ -308,10 +330,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
assertEquals(origShardToDocCount, getShardToDocCountMap(client, restoreCollection));
|
||||
}
|
||||
|
||||
assertEquals(backupCollection.getReplicationFactor(), restoreCollection.getReplicationFactor());
|
||||
assertEquals(backupCollection.getAutoAddReplicas(), restoreCollection.getAutoAddReplicas());
|
||||
assertEquals(backupCollection.getActiveSlices().iterator().next().getReplicas().size(),
|
||||
restoreCollection.getActiveSlices().iterator().next().getReplicas().size());
|
||||
assertEquals(sameConfig ? "conf1" : "customConfigName",
|
||||
cluster.getSolrClient().getZkStateReader().readConfigName(restoreCollectionName));
|
||||
|
||||
|
@ -324,12 +343,15 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
v <= restoreCollection.getMaxShardsPerNode());
|
||||
});
|
||||
|
||||
assertEquals("Different count of nrtReplicas. Backup collection state=" + backupCollection + "\nRestore " +
|
||||
"collection state=" + restoreCollection, replFactor, restoreCollection.getNumNrtReplicas().intValue());
|
||||
assertEquals("Different count of pullReplicas. Backup collection state=" + backupCollection + "\nRestore" +
|
||||
" collection state=" + restoreCollection, numPullReplicas, restoreCollection.getNumPullReplicas().intValue());
|
||||
assertEquals("Different count of TlogReplica. Backup collection state=" + backupCollection + "\nRestore" +
|
||||
" collection state=" + restoreCollection, numTlogReplicas, restoreCollection.getNumTlogReplicas().intValue());
|
||||
assertEquals(restoreCollection.toString(), restoreReplcationFactor, restoreCollection.getReplicationFactor().intValue());
|
||||
assertEquals(restoreCollection.toString(), restoreReplcationFactor, restoreCollection.getNumNrtReplicas().intValue());
|
||||
assertEquals(restoreCollection.toString(), restorePullReplicas, restoreCollection.getNumPullReplicas().intValue());
|
||||
assertEquals(restoreCollection.toString(), restoreTlogReplicas, restoreCollection.getNumTlogReplicas().intValue());
|
||||
if (isMaxShardsPerNodeExternal) {
|
||||
assertEquals(restoreCollectionName, restoreMaxShardsPerNode, restoreCollection.getMaxShardsPerNode());
|
||||
} else {
|
||||
assertEquals(restoreCollectionName, backupCollection.getMaxShardsPerNode(), restoreCollection.getMaxShardsPerNode());
|
||||
}
|
||||
|
||||
assertEquals("Restore collection should use stateFormat=2", 2, restoreCollection.getStateFormat());
|
||||
|
||||
|
|
|
@ -2179,6 +2179,15 @@ Defines the name of the configurations to use for this collection. These must al
|
|||
`replicationFactor`::
|
||||
The number of replicas to be created for each shard.
|
||||
|
||||
`nrtReplicas`::
|
||||
The number of NRT (Near-Real-Time) replicas to create for this collection. This type of replica maintains a transaction log and updates its index locally. This parameter behaves the same way as setting replicationFactor parameter.
|
||||
|
||||
`tlogReplicas`::
|
||||
The number of TLOG replicas to create for this collection. This type of replica maintains a transaction log but only updates its index via replication from a leader. See the section <<shards-and-indexing-data-in-solrcloud.adoc#types-of-replicas,Types of Replicas>> for more information about replica types.
|
||||
|
||||
`pullReplicas`::
|
||||
The number of PULL replicas to create for this collection. This type of replica does not maintain a transaction log and only updates its index via replication from a leader. This type is not eligible to become a leader and should not be the only type of replicas in the collection. See the section <<shards-and-indexing-data-in-solrcloud.adoc#types-of-replicas,Types of Replicas>> for more information about replica types.
|
||||
|
||||
`maxShardsPerNode`::
|
||||
When creating collections, the shards and/or replicas are spread across all available (i.e., live) nodes, and two replicas of the same shard will never be on the same node.
|
||||
+
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
|||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||
import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
|
||||
import org.apache.solr.common.MapWriter;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.ImplicitDocRouter;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
|
@ -833,6 +834,9 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
protected String configName;
|
||||
protected Integer maxShardsPerNode;
|
||||
protected Integer replicationFactor;
|
||||
protected Integer nrtReplicas;
|
||||
protected Integer tlogReplicas;
|
||||
protected Integer pullReplicas;
|
||||
protected Boolean autoAddReplicas;
|
||||
protected Optional<String> createNodeSet = Optional.empty();
|
||||
protected Optional<Boolean> createNodeSetShuffle = Optional.empty();
|
||||
|
@ -885,7 +889,16 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
public Restore setMaxShardsPerNode(int maxShardsPerNode) { this.maxShardsPerNode = maxShardsPerNode; return this; }
|
||||
|
||||
public Integer getReplicationFactor() { return replicationFactor; }
|
||||
public Restore setReplicationFactor(Integer repl) { this.replicationFactor = repl; return this; }
|
||||
public Restore setReplicationFactor(Integer replicationFactor) { this.replicationFactor = replicationFactor; return this; }
|
||||
|
||||
public Integer getNrtReplicas() { return nrtReplicas; }
|
||||
public Restore setNrtReplicas(Integer nrtReplicas) { this.nrtReplicas= nrtReplicas; return this; };
|
||||
|
||||
public Integer getTlogReplicas() { return tlogReplicas; }
|
||||
public Restore setTlogReplicas(Integer tlogReplicas) { this.tlogReplicas = tlogReplicas; return this; }
|
||||
|
||||
public Integer getPullReplicas() { return pullReplicas; }
|
||||
public Restore setPullReplicas(Integer pullReplicas) { this.pullReplicas = pullReplicas; return this; }
|
||||
|
||||
public Boolean getAutoAddReplicas() { return autoAddReplicas; }
|
||||
public Restore setAutoAddReplicas(boolean autoAddReplicas) { this.autoAddReplicas = autoAddReplicas; return this; }
|
||||
|
@ -907,9 +920,22 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
|||
if (maxShardsPerNode != null) {
|
||||
params.set( ZkStateReader.MAX_SHARDS_PER_NODE, maxShardsPerNode);
|
||||
}
|
||||
if (replicationFactor != null && nrtReplicas != null) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Cannot set both replicationFactor and nrtReplicas as they mean the same thing");
|
||||
}
|
||||
if (replicationFactor != null) {
|
||||
params.set(ZkStateReader.REPLICATION_FACTOR, replicationFactor);
|
||||
}
|
||||
if (nrtReplicas != null) {
|
||||
params.set(ZkStateReader.NRT_REPLICAS, nrtReplicas);
|
||||
}
|
||||
if (pullReplicas != null) {
|
||||
params.set(ZkStateReader.PULL_REPLICAS, pullReplicas);
|
||||
}
|
||||
if (tlogReplicas != null) {
|
||||
params.set(ZkStateReader.TLOG_REPLICAS, tlogReplicas);
|
||||
}
|
||||
if (autoAddReplicas != null) {
|
||||
params.set(ZkStateReader.AUTO_ADD_REPLICAS, autoAddReplicas);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue