SOLR-5004: Allow a shard to be split into 'n' sub-shards using the collections API

This commit is contained in:
Anshum Gupta 2018-10-22 14:59:10 -07:00
parent fcaea07f3c
commit d799fd53c7
7 changed files with 149 additions and 7 deletions

View File

@ -151,6 +151,9 @@ New Features
* SOLR-12846: Added support for "host" variable in autoscaling policy rules (noble)
* SOLR-5004: Splitshard collections API now supports splitting into more than 2 sub-shards directly i.e. by providing a
numSubShards parameter (Christine Poerschke, Anshum Gupta)
Other Changes
----------------------

View File

@ -74,10 +74,15 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.AD
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonAdminParams.NUM_SUB_SHARDS;
public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final int MIN_NUM_SUB_SHARDS = 2;
// This is an arbitrary number that seems reasonable at this time.
private static final int MAX_NUM_SUB_SHARDS = 8;
private static final int DEFAULT_NUM_SUB_SHARDS = 2;
private final OverseerCollectionMessageHandler ocmh;
@ -122,7 +127,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
}
// find the leader for the shard
Replica parentShardLeader = null;
Replica parentShardLeader;
try {
parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice.get(), 10000);
} catch (InterruptedException e) {
@ -326,7 +331,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
requestMap);
t.stop();
log.debug("Index on shard: " + nodeName + " split into two successfully");
log.debug("Index on shard: {} split into {} successfully", nodeName, subShardNames.size());
t = timings.sub("applyBufferedUpdates");
// apply buffered updates on sub-shards
@ -680,13 +685,13 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
List<DocRouter.Range> subRanges, List<String> subSlices, List<String> subShardNames,
boolean firstReplicaNrt) {
String splitKey = message.getStr("split.key");
String rangesStr = message.getStr(CoreAdminParams.RANGES);
DocRouter.Range range = parentSlice.getRange();
if (range == null) {
range = new PlainIdRouter().fullRange();
}
DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
String rangesStr = message.getStr(CoreAdminParams.RANGES);
if (rangesStr != null) {
String[] ranges = rangesStr.split(",");
if (ranges.length == 0 || ranges.length == 1) {
@ -740,8 +745,14 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
}
}
} else {
// todo: fixed to two partitions?
subRanges.addAll(router.partitionRange(2, range));
int numSubShards = message.getInt(NUM_SUB_SHARDS, DEFAULT_NUM_SUB_SHARDS);
log.info("{} set at: {}", NUM_SUB_SHARDS, numSubShards);
if(numSubShards < MIN_NUM_SUB_SHARDS || numSubShards > MAX_NUM_SUB_SHARDS)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"A shard can only be split into "+MIN_NUM_SUB_SHARDS+" to " + MAX_NUM_SUB_SHARDS
+ " subshards in one split request. Provided "+NUM_SUB_SHARDS+"=" + numSubShards);
subRanges.addAll(router.partitionRange(numSubShards, range));
}
for (int i = 0; i < subRanges.size(); i++) {

View File

@ -143,6 +143,7 @@ import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTIO
import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE;
import static org.apache.solr.common.params.CommonAdminParams.NUM_SUB_SHARDS;
import static org.apache.solr.common.params.CommonAdminParams.SPLIT_METHOD;
import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE;
import static org.apache.solr.common.params.CommonParams.NAME;
@ -638,6 +639,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
String shard = req.getParams().get(SHARD_ID_PROP);
String rangesStr = req.getParams().get(CoreAdminParams.RANGES);
String splitKey = req.getParams().get("split.key");
String numSubShards = req.getParams().get(NUM_SUB_SHARDS);
if (splitKey == null && shard == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "At least one of shard, or split.key should be specified.");
@ -650,6 +652,10 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
throw new SolrException(ErrorCode.BAD_REQUEST,
"Only one of 'ranges' or 'split.key' should be specified");
}
if (numSubShards != null && (splitKey != null || rangesStr != null)) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"numSubShards can not be specified with split.key or ranges parameters");
}
Map<String, Object> map = copy(req.getParams(), null,
COLLECTION_PROP,
@ -658,7 +664,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
CoreAdminParams.RANGES,
WAIT_FOR_FINAL_STATE,
TIMING,
SPLIT_METHOD);
SPLIT_METHOD,
NUM_SUB_SHARDS);
return copyPropertiesWithPrefix(req.getParams(), map, COLL_PROP_PREFIX);
}),
DELETESHARD_OP(DELETESHARD, (req, rsp, h) -> {

View File

@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.io.IOException;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.common.SolrException;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
public class SplitShardTest extends SolrCloudTestCase {
private final String COLLECTION_NAME = "splitshardtest-collection";
@BeforeClass
public static void setupCluster() throws Exception {
configureCluster(1)
.addConfig("conf", configset("cloud-minimal"))
.configure();
}
@Before
@Override
public void setUp() throws Exception {
super.setUp();
}
@After
@Override
public void tearDown() throws Exception {
super.tearDown();
cluster.deleteAllCollections();
}
@Test
public void doTest() throws IOException, SolrServerException {
CollectionAdminRequest
.createCollection(COLLECTION_NAME, "conf", 2, 1)
.setMaxShardsPerNode(100)
.process(cluster.getSolrClient());
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
.setNumSubShards(5)
.setShardName("shard1");
splitShard.process(cluster.getSolrClient());
waitForState("Timed out waiting for sub shards to be active. Number of active shards=" +
cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION_NAME).getActiveSlices().size(),
COLLECTION_NAME, activeClusterShape(6, 1));
try {
splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME).setShardName("shard2").setNumSubShards(10);
splitShard.process(cluster.getSolrClient());
fail("SplitShard should throw an exception when numSubShards > 8");
} catch (HttpSolrClient.RemoteSolrException ex) {
assertTrue(ex.getMessage().contains("A shard can only be split into 2 to 8 subshards in one split request."));
}
try {
splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME).setShardName("shard2").setNumSubShards(1);
splitShard.process(cluster.getSolrClient());
fail("SplitShard should throw an exception when numSubShards < 2");
} catch (HttpSolrClient.RemoteSolrException ex) {
assertTrue(ex.getMessage().contains("A shard can only be split into 2 to 8 subshards in one split request. Provided numSubShards=1"));
}
}
@Test
public void multipleOptionsSplitTest() throws IOException, SolrServerException {
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
.setNumSubShards(5)
.setRanges("0-c,d-7fffffff")
.setShardName("shard1");
boolean expectedException = false;
try {
splitShard.process(cluster.getSolrClient());
fail("An exception should have been thrown");
} catch (SolrException ex) {
expectedException = true;
}
assertTrue("Expected SolrException but it didn't happen", expectedException);
}
}

View File

@ -287,6 +287,11 @@ This parameter can be used to split a shard using a route key such that all docu
+
For example, suppose `split.key=A!` hashes to the range `12-15` and belongs to shard 'shard1' with range `0-20`. Splitting by this route key would yield three sub-shards with ranges `0-11`, `12-15` and `16-20`. Note that the sub-shard with the hash range of the route key may also contain documents for other route keys whose hash ranges overlap.
`numSubShards`::
The number of sub-shards to split the parent shard into. Allowed values for this are in the range of 2-8 and defaults to 2.
+
This parameter can only be used when ranges or split.key are not specified.
`splitMethod`::
Currently two methods of shard splitting are supported:
* `splitMethod=rewrite` (default) after selecting documents to retain in each partition this method creates sub-indexes from

View File

@ -1153,6 +1153,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
protected String splitKey;
protected String shard;
protected String splitMethod;
protected Integer numSubShards;
private Properties properties;
@ -1164,6 +1165,15 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
public SplitShard setRanges(String ranges) { this.ranges = ranges; return this; }
public String getRanges() { return ranges; }
public Integer getNumSubShards() {
return numSubShards;
}
public SplitShard setNumSubShards(Integer numSubShards) {
this.numSubShards = numSubShards;
return this;
}
public SplitShard setSplitMethod(String splitMethod) {
this.splitMethod = splitMethod;
return this;
@ -1210,6 +1220,8 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
params.set("split.key", this.splitKey);
params.set(CoreAdminParams.RANGES, ranges);
params.set(CommonAdminParams.SPLIT_METHOD, splitMethod);
if(numSubShards != null)
params.set("numSubShards", numSubShards);
if(properties != null) {
addProperties(params, properties);

View File

@ -27,6 +27,8 @@ public interface CommonAdminParams
String IN_PLACE_MOVE = "inPlaceMove";
/** Method to use for shard splitting. */
String SPLIT_METHOD = "splitMethod";
/** **/
String NUM_SUB_SHARDS = "numSubShards";
/** Timeout for replicas to become active. */
String TIMEOUT = "timeout";
}