mirror of https://github.com/apache/lucene.git
SOLR-5004: Allow a shard to be split into 'n' sub-shards using the collections API
This commit is contained in:
parent
fcaea07f3c
commit
d799fd53c7
|
@ -151,6 +151,9 @@ New Features
|
||||||
|
|
||||||
* SOLR-12846: Added support for "host" variable in autoscaling policy rules (noble)
|
* SOLR-12846: Added support for "host" variable in autoscaling policy rules (noble)
|
||||||
|
|
||||||
|
* SOLR-5004: Splitshard collections API now supports splitting into more than 2 sub-shards directly i.e. by providing a
|
||||||
|
numSubShards parameter (Christine Poerschke, Anshum Gupta)
|
||||||
|
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -74,10 +74,15 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.AD
|
||||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
|
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
|
||||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
|
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
|
||||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||||
|
import static org.apache.solr.common.params.CommonAdminParams.NUM_SUB_SHARDS;
|
||||||
|
|
||||||
|
|
||||||
public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
||||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
|
private static final int MIN_NUM_SUB_SHARDS = 2;
|
||||||
|
// This is an arbitrary number that seems reasonable at this time.
|
||||||
|
private static final int MAX_NUM_SUB_SHARDS = 8;
|
||||||
|
private static final int DEFAULT_NUM_SUB_SHARDS = 2;
|
||||||
|
|
||||||
private final OverseerCollectionMessageHandler ocmh;
|
private final OverseerCollectionMessageHandler ocmh;
|
||||||
|
|
||||||
|
@ -122,7 +127,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
||||||
}
|
}
|
||||||
|
|
||||||
// find the leader for the shard
|
// find the leader for the shard
|
||||||
Replica parentShardLeader = null;
|
Replica parentShardLeader;
|
||||||
try {
|
try {
|
||||||
parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice.get(), 10000);
|
parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice.get(), 10000);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
@ -326,7 +331,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
||||||
requestMap);
|
requestMap);
|
||||||
t.stop();
|
t.stop();
|
||||||
|
|
||||||
log.debug("Index on shard: " + nodeName + " split into two successfully");
|
log.debug("Index on shard: {} split into {} successfully", nodeName, subShardNames.size());
|
||||||
|
|
||||||
t = timings.sub("applyBufferedUpdates");
|
t = timings.sub("applyBufferedUpdates");
|
||||||
// apply buffered updates on sub-shards
|
// apply buffered updates on sub-shards
|
||||||
|
@ -680,13 +685,13 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
||||||
List<DocRouter.Range> subRanges, List<String> subSlices, List<String> subShardNames,
|
List<DocRouter.Range> subRanges, List<String> subSlices, List<String> subShardNames,
|
||||||
boolean firstReplicaNrt) {
|
boolean firstReplicaNrt) {
|
||||||
String splitKey = message.getStr("split.key");
|
String splitKey = message.getStr("split.key");
|
||||||
|
String rangesStr = message.getStr(CoreAdminParams.RANGES);
|
||||||
|
|
||||||
DocRouter.Range range = parentSlice.getRange();
|
DocRouter.Range range = parentSlice.getRange();
|
||||||
if (range == null) {
|
if (range == null) {
|
||||||
range = new PlainIdRouter().fullRange();
|
range = new PlainIdRouter().fullRange();
|
||||||
}
|
}
|
||||||
DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
|
DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
|
||||||
|
|
||||||
String rangesStr = message.getStr(CoreAdminParams.RANGES);
|
|
||||||
if (rangesStr != null) {
|
if (rangesStr != null) {
|
||||||
String[] ranges = rangesStr.split(",");
|
String[] ranges = rangesStr.split(",");
|
||||||
if (ranges.length == 0 || ranges.length == 1) {
|
if (ranges.length == 0 || ranges.length == 1) {
|
||||||
|
@ -740,8 +745,14 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// todo: fixed to two partitions?
|
int numSubShards = message.getInt(NUM_SUB_SHARDS, DEFAULT_NUM_SUB_SHARDS);
|
||||||
subRanges.addAll(router.partitionRange(2, range));
|
log.info("{} set at: {}", NUM_SUB_SHARDS, numSubShards);
|
||||||
|
|
||||||
|
if(numSubShards < MIN_NUM_SUB_SHARDS || numSubShards > MAX_NUM_SUB_SHARDS)
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
"A shard can only be split into "+MIN_NUM_SUB_SHARDS+" to " + MAX_NUM_SUB_SHARDS
|
||||||
|
+ " subshards in one split request. Provided "+NUM_SUB_SHARDS+"=" + numSubShards);
|
||||||
|
subRanges.addAll(router.partitionRange(numSubShards, range));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < subRanges.size(); i++) {
|
for (int i = 0; i < subRanges.size(); i++) {
|
||||||
|
|
|
@ -143,6 +143,7 @@ import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTIO
|
||||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
|
import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
|
||||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||||
import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE;
|
import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE;
|
||||||
|
import static org.apache.solr.common.params.CommonAdminParams.NUM_SUB_SHARDS;
|
||||||
import static org.apache.solr.common.params.CommonAdminParams.SPLIT_METHOD;
|
import static org.apache.solr.common.params.CommonAdminParams.SPLIT_METHOD;
|
||||||
import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE;
|
import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE;
|
||||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||||
|
@ -638,6 +639,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
||||||
String shard = req.getParams().get(SHARD_ID_PROP);
|
String shard = req.getParams().get(SHARD_ID_PROP);
|
||||||
String rangesStr = req.getParams().get(CoreAdminParams.RANGES);
|
String rangesStr = req.getParams().get(CoreAdminParams.RANGES);
|
||||||
String splitKey = req.getParams().get("split.key");
|
String splitKey = req.getParams().get("split.key");
|
||||||
|
String numSubShards = req.getParams().get(NUM_SUB_SHARDS);
|
||||||
|
|
||||||
if (splitKey == null && shard == null) {
|
if (splitKey == null && shard == null) {
|
||||||
throw new SolrException(ErrorCode.BAD_REQUEST, "At least one of shard, or split.key should be specified.");
|
throw new SolrException(ErrorCode.BAD_REQUEST, "At least one of shard, or split.key should be specified.");
|
||||||
|
@ -650,6 +652,10 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
||||||
throw new SolrException(ErrorCode.BAD_REQUEST,
|
throw new SolrException(ErrorCode.BAD_REQUEST,
|
||||||
"Only one of 'ranges' or 'split.key' should be specified");
|
"Only one of 'ranges' or 'split.key' should be specified");
|
||||||
}
|
}
|
||||||
|
if (numSubShards != null && (splitKey != null || rangesStr != null)) {
|
||||||
|
throw new SolrException(ErrorCode.BAD_REQUEST,
|
||||||
|
"numSubShards can not be specified with split.key or ranges parameters");
|
||||||
|
}
|
||||||
|
|
||||||
Map<String, Object> map = copy(req.getParams(), null,
|
Map<String, Object> map = copy(req.getParams(), null,
|
||||||
COLLECTION_PROP,
|
COLLECTION_PROP,
|
||||||
|
@ -658,7 +664,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
||||||
CoreAdminParams.RANGES,
|
CoreAdminParams.RANGES,
|
||||||
WAIT_FOR_FINAL_STATE,
|
WAIT_FOR_FINAL_STATE,
|
||||||
TIMING,
|
TIMING,
|
||||||
SPLIT_METHOD);
|
SPLIT_METHOD,
|
||||||
|
NUM_SUB_SHARDS);
|
||||||
return copyPropertiesWithPrefix(req.getParams(), map, COLL_PROP_PREFIX);
|
return copyPropertiesWithPrefix(req.getParams(), map, COLL_PROP_PREFIX);
|
||||||
}),
|
}),
|
||||||
DELETESHARD_OP(DELETESHARD, (req, rsp, h) -> {
|
DELETESHARD_OP(DELETESHARD, (req, rsp, h) -> {
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.cloud;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class SplitShardTest extends SolrCloudTestCase {
|
||||||
|
|
||||||
|
private final String COLLECTION_NAME = "splitshardtest-collection";
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setupCluster() throws Exception {
|
||||||
|
configureCluster(1)
|
||||||
|
.addConfig("conf", configset("cloud-minimal"))
|
||||||
|
.configure();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
@Override
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
super.tearDown();
|
||||||
|
cluster.deleteAllCollections();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void doTest() throws IOException, SolrServerException {
|
||||||
|
CollectionAdminRequest
|
||||||
|
.createCollection(COLLECTION_NAME, "conf", 2, 1)
|
||||||
|
.setMaxShardsPerNode(100)
|
||||||
|
.process(cluster.getSolrClient());
|
||||||
|
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
|
||||||
|
.setNumSubShards(5)
|
||||||
|
.setShardName("shard1");
|
||||||
|
splitShard.process(cluster.getSolrClient());
|
||||||
|
waitForState("Timed out waiting for sub shards to be active. Number of active shards=" +
|
||||||
|
cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION_NAME).getActiveSlices().size(),
|
||||||
|
COLLECTION_NAME, activeClusterShape(6, 1));
|
||||||
|
|
||||||
|
try {
|
||||||
|
splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME).setShardName("shard2").setNumSubShards(10);
|
||||||
|
splitShard.process(cluster.getSolrClient());
|
||||||
|
fail("SplitShard should throw an exception when numSubShards > 8");
|
||||||
|
} catch (HttpSolrClient.RemoteSolrException ex) {
|
||||||
|
assertTrue(ex.getMessage().contains("A shard can only be split into 2 to 8 subshards in one split request."));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME).setShardName("shard2").setNumSubShards(1);
|
||||||
|
splitShard.process(cluster.getSolrClient());
|
||||||
|
fail("SplitShard should throw an exception when numSubShards < 2");
|
||||||
|
} catch (HttpSolrClient.RemoteSolrException ex) {
|
||||||
|
assertTrue(ex.getMessage().contains("A shard can only be split into 2 to 8 subshards in one split request. Provided numSubShards=1"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void multipleOptionsSplitTest() throws IOException, SolrServerException {
|
||||||
|
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
|
||||||
|
.setNumSubShards(5)
|
||||||
|
.setRanges("0-c,d-7fffffff")
|
||||||
|
.setShardName("shard1");
|
||||||
|
boolean expectedException = false;
|
||||||
|
try {
|
||||||
|
splitShard.process(cluster.getSolrClient());
|
||||||
|
fail("An exception should have been thrown");
|
||||||
|
} catch (SolrException ex) {
|
||||||
|
expectedException = true;
|
||||||
|
}
|
||||||
|
assertTrue("Expected SolrException but it didn't happen", expectedException);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -287,6 +287,11 @@ This parameter can be used to split a shard using a route key such that all docu
|
||||||
+
|
+
|
||||||
For example, suppose `split.key=A!` hashes to the range `12-15` and belongs to shard 'shard1' with range `0-20`. Splitting by this route key would yield three sub-shards with ranges `0-11`, `12-15` and `16-20`. Note that the sub-shard with the hash range of the route key may also contain documents for other route keys whose hash ranges overlap.
|
For example, suppose `split.key=A!` hashes to the range `12-15` and belongs to shard 'shard1' with range `0-20`. Splitting by this route key would yield three sub-shards with ranges `0-11`, `12-15` and `16-20`. Note that the sub-shard with the hash range of the route key may also contain documents for other route keys whose hash ranges overlap.
|
||||||
|
|
||||||
|
`numSubShards`::
|
||||||
|
The number of sub-shards to split the parent shard into. Allowed values for this are in the range of 2-8 and defaults to 2.
|
||||||
|
+
|
||||||
|
This parameter can only be used when ranges or split.key are not specified.
|
||||||
|
|
||||||
`splitMethod`::
|
`splitMethod`::
|
||||||
Currently two methods of shard splitting are supported:
|
Currently two methods of shard splitting are supported:
|
||||||
* `splitMethod=rewrite` (default) after selecting documents to retain in each partition this method creates sub-indexes from
|
* `splitMethod=rewrite` (default) after selecting documents to retain in each partition this method creates sub-indexes from
|
||||||
|
|
|
@ -1153,6 +1153,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
||||||
protected String splitKey;
|
protected String splitKey;
|
||||||
protected String shard;
|
protected String shard;
|
||||||
protected String splitMethod;
|
protected String splitMethod;
|
||||||
|
protected Integer numSubShards;
|
||||||
|
|
||||||
private Properties properties;
|
private Properties properties;
|
||||||
|
|
||||||
|
@ -1164,6 +1165,15 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
||||||
public SplitShard setRanges(String ranges) { this.ranges = ranges; return this; }
|
public SplitShard setRanges(String ranges) { this.ranges = ranges; return this; }
|
||||||
public String getRanges() { return ranges; }
|
public String getRanges() { return ranges; }
|
||||||
|
|
||||||
|
public Integer getNumSubShards() {
|
||||||
|
return numSubShards;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SplitShard setNumSubShards(Integer numSubShards) {
|
||||||
|
this.numSubShards = numSubShards;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public SplitShard setSplitMethod(String splitMethod) {
|
public SplitShard setSplitMethod(String splitMethod) {
|
||||||
this.splitMethod = splitMethod;
|
this.splitMethod = splitMethod;
|
||||||
return this;
|
return this;
|
||||||
|
@ -1210,6 +1220,8 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
|
||||||
params.set("split.key", this.splitKey);
|
params.set("split.key", this.splitKey);
|
||||||
params.set(CoreAdminParams.RANGES, ranges);
|
params.set(CoreAdminParams.RANGES, ranges);
|
||||||
params.set(CommonAdminParams.SPLIT_METHOD, splitMethod);
|
params.set(CommonAdminParams.SPLIT_METHOD, splitMethod);
|
||||||
|
if(numSubShards != null)
|
||||||
|
params.set("numSubShards", numSubShards);
|
||||||
|
|
||||||
if(properties != null) {
|
if(properties != null) {
|
||||||
addProperties(params, properties);
|
addProperties(params, properties);
|
||||||
|
|
|
@ -27,6 +27,8 @@ public interface CommonAdminParams
|
||||||
String IN_PLACE_MOVE = "inPlaceMove";
|
String IN_PLACE_MOVE = "inPlaceMove";
|
||||||
/** Method to use for shard splitting. */
|
/** Method to use for shard splitting. */
|
||||||
String SPLIT_METHOD = "splitMethod";
|
String SPLIT_METHOD = "splitMethod";
|
||||||
|
/** **/
|
||||||
|
String NUM_SUB_SHARDS = "numSubShards";
|
||||||
/** Timeout for replicas to become active. */
|
/** Timeout for replicas to become active. */
|
||||||
String TIMEOUT = "timeout";
|
String TIMEOUT = "timeout";
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue