SOLR-12489: User specified replicationFactor and maxShardsPerNode is used when specified during a restore operation.

A user can now specify nrtReplicas/tlogReplicas/pullReplicas while restoring the collection.

Specifying replicationFactor or nrtReplicas have the same effect and only one can be specified
This commit is contained in:
Varun Thacker 2018-06-19 19:45:08 +05:30
parent 81906d31a6
commit 3d20e8967b
6 changed files with 107 additions and 31 deletions

View File

@ -92,6 +92,9 @@ Bug Fixes
* SOLR-11676: Keep nrtReplicas and replicationFactor in sync while creating a collection and modifying a collection
(Varun Thacker)
* SOLR-12489: User specified replicationFactor and maxShardsPerNode is used when specified during a restore operation.
A user can now specify nrtReplicas/tlogReplicas/pullReplicas while restoring the collection.
Specifying replicationFactor or nrtReplicas have the same effect and only one can be specified (Varun Thacker)
Optimizations
----------------------

View File

@ -110,14 +110,21 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
zkStateReader.getClusterState().getLiveNodes(), message, OverseerCollectionMessageHandler.RANDOM);
int numShards = backupCollectionState.getActiveSlices().size();
int numNrtReplicas = getInt(message, NRT_REPLICAS, backupCollectionState.getNumNrtReplicas(), 0);
if (numNrtReplicas == 0) {
numNrtReplicas = getInt(message, REPLICATION_FACTOR, backupCollectionState.getReplicationFactor(), 0);
int numNrtReplicas;
if (message.get(REPLICATION_FACTOR) != null) {
numNrtReplicas = message.getInt(REPLICATION_FACTOR, 0);
} else if (message.get(NRT_REPLICAS) != null) {
numNrtReplicas = message.getInt(NRT_REPLICAS, 0);
} else {
//replicationFactor and nrtReplicas is always in sync after SOLR-11676
//pick from cluster state of the backed up collection
numNrtReplicas = backupCollectionState.getReplicationFactor();
}
int numTlogReplicas = getInt(message, TLOG_REPLICAS, backupCollectionState.getNumTlogReplicas(), 0);
int numPullReplicas = getInt(message, PULL_REPLICAS, backupCollectionState.getNumPullReplicas(), 0);
int totalReplicasPerShard = numNrtReplicas + numTlogReplicas + numPullReplicas;
assert totalReplicasPerShard > 0;
int maxShardsPerNode = message.getInt(MAX_SHARDS_PER_NODE, backupCollectionState.getMaxShardsPerNode());
int availableNodeCount = nodeList.size();
@ -151,11 +158,16 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
if (properties.get(STATE_FORMAT) == null) {
propMap.put(STATE_FORMAT, "2");
}
propMap.put(REPLICATION_FACTOR, numNrtReplicas);
propMap.put(NRT_REPLICAS, numNrtReplicas);
propMap.put(TLOG_REPLICAS, numTlogReplicas);
propMap.put(PULL_REPLICAS, numPullReplicas);
properties.put(MAX_SHARDS_PER_NODE, maxShardsPerNode);
// inherit settings from input API, defaulting to the backup's setting. Ex: replicationFactor
for (String collProp : OverseerCollectionMessageHandler.COLL_PROPS.keySet()) {
Object val = message.getProperties().getOrDefault(collProp, backupCollectionState.get(collProp));
if (val != null) {
if (val != null && propMap.get(collProp) == null) {
propMap.put(collProp, val);
}
}
@ -308,7 +320,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
}
if (totalReplicasPerShard > 1) {
log.info("Adding replicas to restored collection={}", restoreCollection);
log.info("Adding replicas to restored collection={}", restoreCollection.getName());
for (Slice slice : restoreCollection.getSlices()) {
//Add the remaining replicas for each shard, considering it's type

View File

@ -1016,12 +1016,16 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
"Cannot restore with a CREATE_NODE_SET of CREATE_NODE_SET_EMPTY."
);
}
if (req.getParams().get(NRT_REPLICAS) != null && req.getParams().get(REPLICATION_FACTOR) != null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Cannot set both replicationFactor and nrtReplicas as they mean the same thing");
}
Map<String, Object> params = copy(req.getParams(), null, NAME, COLLECTION_PROP);
params.put(CoreAdminParams.BACKUP_LOCATION, location);
// from CREATE_OP:
copy(req.getParams(), params, COLL_CONF, REPLICATION_FACTOR, MAX_SHARDS_PER_NODE, STATE_FORMAT,
AUTO_ADD_REPLICAS, CREATE_NODE_SET, CREATE_NODE_SET_SHUFFLE);
copy(req.getParams(), params, COLL_CONF, REPLICATION_FACTOR, NRT_REPLICAS, TLOG_REPLICAS,
PULL_REPLICAS, MAX_SHARDS_PER_NODE, STATE_FORMAT, AUTO_ADD_REPLICAS, CREATE_NODE_SET, CREATE_NODE_SET_SHUFFLE);
copyPropertiesWithPrefix(req.getParams(), params, COLL_PROP_PREFIX);
return params;
}),

View File

@ -93,14 +93,15 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
replFactor = TestUtil.nextInt(random(), 1, 2);
numTlogReplicas = TestUtil.nextInt(random(), 0, 1);
numPullReplicas = TestUtil.nextInt(random(), 0, 1);
int backupReplFactor = replFactor + numPullReplicas + numTlogReplicas;
CollectionAdminRequest.Create create = isImplicit ?
// NOTE: use shard list with same # of shards as NUM_SHARDS; we assume this later
CollectionAdminRequest.createCollectionWithImplicitRouter(getCollectionName(), "conf1", "shard1,shard2", replFactor, numTlogReplicas, numPullReplicas) :
CollectionAdminRequest.createCollection(getCollectionName(), "conf1", NUM_SHARDS, replFactor, numTlogReplicas, numPullReplicas);
if (NUM_SHARDS * (replFactor + numTlogReplicas + numPullReplicas) > cluster.getJettySolrRunners().size() || random().nextBoolean()) {
create.setMaxShardsPerNode((int)Math.ceil(NUM_SHARDS * (replFactor + numTlogReplicas + numPullReplicas) / cluster.getJettySolrRunners().size()));//just to assert it survives the restoration
if (NUM_SHARDS * (backupReplFactor) > cluster.getJettySolrRunners().size() || random().nextBoolean()) {
create.setMaxShardsPerNode((int)Math.ceil(NUM_SHARDS * backupReplFactor / (double) cluster.getJettySolrRunners().size()));//just to assert it survives the restoration
if (doSplitShardOperation) {
create.setMaxShardsPerNode(create.getMaxShardsPerNode() * 2);
}
@ -139,7 +140,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
solrClient.commit(getCollectionName());
}
testBackupAndRestore(getCollectionName());
testBackupAndRestore(getCollectionName(), backupReplFactor);
testConfigBackupOnly("conf1", getCollectionName());
testInvalidPath(getCollectionName());
}
@ -222,7 +223,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
return numDocs;
}
private void testBackupAndRestore(String collectionName) throws Exception {
private void testBackupAndRestore(String collectionName, int backupReplFactor) throws Exception {
String backupLocation = getBackupLocation();
String backupName = "mytestbackup";
@ -249,22 +250,43 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
String restoreCollectionName = collectionName + "_restored";
boolean sameConfig = random().nextBoolean();
int restoreReplcationFactor = replFactor;
int restoreTlogReplicas = numTlogReplicas;
int restorePullReplicas = numPullReplicas;
boolean setExternalReplicationFactor = false;
if (random().nextBoolean()) { //Override replicationFactor / tLogReplicas / pullReplicas
setExternalReplicationFactor = true;
restoreTlogReplicas = TestUtil.nextInt(random(), 0, 1);
restoreReplcationFactor = TestUtil.nextInt(random(), 1, 2);
restorePullReplicas = TestUtil.nextInt(random(), 0, 1);
}
int numShards = backupCollection.getActiveSlices().size();
int restoreReplFactor = restoreReplcationFactor + restoreTlogReplicas + restorePullReplicas;
boolean isMaxShardsPerNodeExternal = false;
int restoreMaxShardsPerNode = -1;
{
CollectionAdminRequest.Restore restore = CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
.setLocation(backupLocation).setRepositoryName(getBackupRepoName());
//explicitly specify the replicationFactor/pullReplicas/nrtReplicas/tlogReplicas .
//Value is still the same as the original. maybe test with different values that the original for better test coverage
if (random().nextBoolean()) {
restore.setReplicationFactor(replFactor);
}
if (backupCollection.getReplicas().size() > cluster.getJettySolrRunners().size()) {
// may need to increase maxShardsPerNode (e.g. if it was shard split, then now we need more)
restore.setMaxShardsPerNode((int)Math.ceil(backupCollection.getReplicas().size()/cluster.getJettySolrRunners().size()));
//explicitly specify the replicationFactor/pullReplicas/nrtReplicas/tlogReplicas.
if (setExternalReplicationFactor) {
restore.setReplicationFactor(restoreReplcationFactor);
restore.setTlogReplicas(restoreTlogReplicas);
restore.setPullReplicas(restorePullReplicas);
}
if (restoreReplFactor > backupReplFactor) { //else the backup maxShardsPerNode should be enough
restoreMaxShardsPerNode = (int)Math.ceil((restoreReplFactor * numShards/(double) cluster.getJettySolrRunners().size()));
log.info("numShards={} restoreReplFactor={} maxShardsPerNode={} totalNodes={}",
numShards, restoreReplFactor, restoreMaxShardsPerNode, cluster.getJettySolrRunners().size());
isMaxShardsPerNodeExternal = true;
restore.setMaxShardsPerNode(restoreMaxShardsPerNode);
}
if (rarely()) { // Try with createNodeSet configuration
int nodeSetSize = cluster.getJettySolrRunners().size() / 2;
List<String> nodeStrs = new ArrayList<>(nodeSetSize);
@ -308,10 +330,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
assertEquals(origShardToDocCount, getShardToDocCountMap(client, restoreCollection));
}
assertEquals(backupCollection.getReplicationFactor(), restoreCollection.getReplicationFactor());
assertEquals(backupCollection.getAutoAddReplicas(), restoreCollection.getAutoAddReplicas());
assertEquals(backupCollection.getActiveSlices().iterator().next().getReplicas().size(),
restoreCollection.getActiveSlices().iterator().next().getReplicas().size());
assertEquals(sameConfig ? "conf1" : "customConfigName",
cluster.getSolrClient().getZkStateReader().readConfigName(restoreCollectionName));
@ -324,12 +343,15 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
v <= restoreCollection.getMaxShardsPerNode());
});
assertEquals("Different count of nrtReplicas. Backup collection state=" + backupCollection + "\nRestore " +
"collection state=" + restoreCollection, replFactor, restoreCollection.getNumNrtReplicas().intValue());
assertEquals("Different count of pullReplicas. Backup collection state=" + backupCollection + "\nRestore" +
" collection state=" + restoreCollection, numPullReplicas, restoreCollection.getNumPullReplicas().intValue());
assertEquals("Different count of TlogReplica. Backup collection state=" + backupCollection + "\nRestore" +
" collection state=" + restoreCollection, numTlogReplicas, restoreCollection.getNumTlogReplicas().intValue());
assertEquals(restoreCollection.toString(), restoreReplcationFactor, restoreCollection.getReplicationFactor().intValue());
assertEquals(restoreCollection.toString(), restoreReplcationFactor, restoreCollection.getNumNrtReplicas().intValue());
assertEquals(restoreCollection.toString(), restorePullReplicas, restoreCollection.getNumPullReplicas().intValue());
assertEquals(restoreCollection.toString(), restoreTlogReplicas, restoreCollection.getNumTlogReplicas().intValue());
if (isMaxShardsPerNodeExternal) {
assertEquals(restoreCollectionName, restoreMaxShardsPerNode, restoreCollection.getMaxShardsPerNode());
} else {
assertEquals(restoreCollectionName, backupCollection.getMaxShardsPerNode(), restoreCollection.getMaxShardsPerNode());
}
assertEquals("Restore collection should use stateFormat=2", 2, restoreCollection.getStateFormat());

View File

@ -2179,6 +2179,15 @@ Defines the name of the configurations to use for this collection. These must al
`replicationFactor`::
The number of replicas to be created for each shard.
`nrtReplicas`::
The number of NRT (Near-Real-Time) replicas to create for this collection. This type of replica maintains a transaction log and updates its index locally. This parameter behaves the same way as setting replicationFactor parameter.
`tlogReplicas`::
The number of TLOG replicas to create for this collection. This type of replica maintains a transaction log but only updates its index via replication from a leader. See the section <<shards-and-indexing-data-in-solrcloud.adoc#types-of-replicas,Types of Replicas>> for more information about replica types.
`pullReplicas`::
The number of PULL replicas to create for this collection. This type of replica does not maintain a transaction log and only updates its index via replication from a leader. This type is not eligible to become a leader and should not be the only type of replicas in the collection. See the section <<shards-and-indexing-data-in-solrcloud.adoc#types-of-replicas,Types of Replicas>> for more information about replica types.
`maxShardsPerNode`::
When creating collections, the shards and/or replicas are spread across all available (i.e., live) nodes, and two replicas of the same shard will never be on the same node.
+

View File

@ -35,6 +35,7 @@ import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.ImplicitDocRouter;
import org.apache.solr.common.cloud.Replica;
@ -833,6 +834,9 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
protected String configName;
protected Integer maxShardsPerNode;
protected Integer replicationFactor;
protected Integer nrtReplicas;
protected Integer tlogReplicas;
protected Integer pullReplicas;
protected Boolean autoAddReplicas;
protected Optional<String> createNodeSet = Optional.empty();
protected Optional<Boolean> createNodeSetShuffle = Optional.empty();
@ -885,7 +889,16 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
public Restore setMaxShardsPerNode(int maxShardsPerNode) { this.maxShardsPerNode = maxShardsPerNode; return this; }
public Integer getReplicationFactor() { return replicationFactor; }
public Restore setReplicationFactor(Integer repl) { this.replicationFactor = repl; return this; }
public Restore setReplicationFactor(Integer replicationFactor) { this.replicationFactor = replicationFactor; return this; }
public Integer getNrtReplicas() { return nrtReplicas; }
public Restore setNrtReplicas(Integer nrtReplicas) { this.nrtReplicas= nrtReplicas; return this; };
public Integer getTlogReplicas() { return tlogReplicas; }
public Restore setTlogReplicas(Integer tlogReplicas) { this.tlogReplicas = tlogReplicas; return this; }
public Integer getPullReplicas() { return pullReplicas; }
public Restore setPullReplicas(Integer pullReplicas) { this.pullReplicas = pullReplicas; return this; }
public Boolean getAutoAddReplicas() { return autoAddReplicas; }
public Restore setAutoAddReplicas(boolean autoAddReplicas) { this.autoAddReplicas = autoAddReplicas; return this; }
@ -907,9 +920,22 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
if (maxShardsPerNode != null) {
params.set( ZkStateReader.MAX_SHARDS_PER_NODE, maxShardsPerNode);
}
if (replicationFactor != null && nrtReplicas != null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Cannot set both replicationFactor and nrtReplicas as they mean the same thing");
}
if (replicationFactor != null) {
params.set(ZkStateReader.REPLICATION_FACTOR, replicationFactor);
}
if (nrtReplicas != null) {
params.set(ZkStateReader.NRT_REPLICAS, nrtReplicas);
}
if (pullReplicas != null) {
params.set(ZkStateReader.PULL_REPLICAS, pullReplicas);
}
if (tlogReplicas != null) {
params.set(ZkStateReader.TLOG_REPLICAS, tlogReplicas);
}
if (autoAddReplicas != null) {
params.set(ZkStateReader.AUTO_ADD_REPLICAS, autoAddReplicas);
}