mirror of https://github.com/apache/lucene.git
SOLR-13399: Adding splitByPrefix param to IndexSizeTrigger; some splitByPrefix test and code cleanup
This commit is contained in:
parent
d468d71c03
commit
768ca7c5a7
|
@ -98,7 +98,7 @@ New Features
|
|||
|
||||
* SOLR-13399: SPLITSHARD implements a new splitByPrefix option that takes into account the actual document distribution
|
||||
when using compositeIds. Document distribution is calculated using the "id_prefix" field (if it exists) containing
|
||||
just the compositeId prefixes, or directly from the indexed "id" field otherwise. (yonik)
|
||||
just the compositeId prefixes, or directly from the indexed "id" field otherwise. (yonik, Megan Carey)
|
||||
|
||||
* SOLR-13565: Node level runtime libs loaded from remote urls (noble)
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import java.util.TreeMap;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
|
||||
|
@ -69,6 +70,7 @@ public class IndexSizeTrigger extends TriggerBase {
|
|||
public static final String MAX_OPS_PROP = "maxOps";
|
||||
public static final String SPLIT_FUZZ_PROP = CommonAdminParams.SPLIT_FUZZ;
|
||||
public static final String SPLIT_METHOD_PROP = CommonAdminParams.SPLIT_METHOD;
|
||||
public static final String SPLIT_BY_PREFIX = CommonAdminParams.SPLIT_BY_PREFIX;
|
||||
|
||||
public static final String BYTES_SIZE_PROP = "__bytes__";
|
||||
public static final String TOTAL_BYTES_SIZE_PROP = "__total_bytes__";
|
||||
|
@ -86,6 +88,7 @@ public class IndexSizeTrigger extends TriggerBase {
|
|||
private long aboveBytes, aboveDocs, belowBytes, belowDocs;
|
||||
private int maxOps;
|
||||
private SolrIndexSplitter.SplitMethod splitMethod;
|
||||
private boolean splitByPrefix;
|
||||
private float splitFuzz;
|
||||
private CollectionParams.CollectionAction aboveOp, belowOp;
|
||||
private final Set<String> collections = new HashSet<>();
|
||||
|
@ -96,7 +99,7 @@ public class IndexSizeTrigger extends TriggerBase {
|
|||
super(TriggerEventType.INDEXSIZE, name);
|
||||
TriggerUtils.validProperties(validProperties,
|
||||
ABOVE_BYTES_PROP, ABOVE_DOCS_PROP, BELOW_BYTES_PROP, BELOW_DOCS_PROP,
|
||||
COLLECTIONS_PROP, MAX_OPS_PROP, SPLIT_METHOD_PROP, SPLIT_FUZZ_PROP);
|
||||
COLLECTIONS_PROP, MAX_OPS_PROP, SPLIT_METHOD_PROP, SPLIT_FUZZ_PROP, SPLIT_BY_PREFIX);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -176,11 +179,10 @@ public class IndexSizeTrigger extends TriggerBase {
|
|||
} catch (Exception e) {
|
||||
throw new TriggerValidationException(getName(), MAX_OPS_PROP, "invalid value: '" + maxOpsStr + "': " + e.getMessage());
|
||||
}
|
||||
String methodStr = (String)properties.getOrDefault(CommonAdminParams.SPLIT_METHOD, SolrIndexSplitter.SplitMethod.LINK.toLower());
|
||||
String methodStr = (String)properties.getOrDefault(SPLIT_METHOD_PROP, SolrIndexSplitter.SplitMethod.LINK.toLower());
|
||||
splitMethod = SolrIndexSplitter.SplitMethod.get(methodStr);
|
||||
if (splitMethod == null) {
|
||||
throw new TriggerValidationException(getName(), SPLIT_METHOD_PROP, "Unknown value '" + CommonAdminParams.SPLIT_METHOD +
|
||||
": " + methodStr);
|
||||
throw new TriggerValidationException(getName(), SPLIT_METHOD_PROP, "unrecognized value of: '" + methodStr + "'");
|
||||
}
|
||||
String fuzzStr = String.valueOf(properties.getOrDefault(SPLIT_FUZZ_PROP, 0.0f));
|
||||
try {
|
||||
|
@ -188,6 +190,19 @@ public class IndexSizeTrigger extends TriggerBase {
|
|||
} catch (Exception e) {
|
||||
throw new TriggerValidationException(getName(), SPLIT_FUZZ_PROP, "invalid value: '" + fuzzStr + "': " + e.getMessage());
|
||||
}
|
||||
String splitByPrefixStr = String.valueOf(properties.getOrDefault(SPLIT_BY_PREFIX, false));
|
||||
try {
|
||||
splitByPrefix = getValidBool(splitByPrefixStr);
|
||||
} catch (Exception e) {
|
||||
throw new TriggerValidationException(getName(), SPLIT_BY_PREFIX, "invalid value: '" + splitByPrefixStr + "': " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private boolean getValidBool(String str) throws Exception {
|
||||
if (str != null && (str.toLowerCase(Locale.ROOT).equals("true") || str.toLowerCase(Locale.ROOT).equals("false"))) {
|
||||
return Boolean.parseBoolean(str);
|
||||
}
|
||||
throw new IllegalArgumentException("Expected a valid boolean value but got " + str);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -430,10 +445,11 @@ public class IndexSizeTrigger extends TriggerBase {
|
|||
TriggerEvent.Op op = new TriggerEvent.Op(aboveOp);
|
||||
op.addHint(Suggester.Hint.COLL_SHARD, new Pair<>(coll, r.getShard()));
|
||||
Map<String, Object> params = new HashMap<>();
|
||||
params.put(CommonAdminParams.SPLIT_METHOD, splitMethod.toLower());
|
||||
params.put(SPLIT_METHOD_PROP, splitMethod.toLower());
|
||||
if (splitFuzz > 0) {
|
||||
params.put(CommonAdminParams.SPLIT_FUZZ, splitFuzz);
|
||||
params.put(SPLIT_FUZZ_PROP, splitFuzz);
|
||||
}
|
||||
params.put(SPLIT_BY_PREFIX, splitByPrefix);
|
||||
op.addHint(Suggester.Hint.PARAMS, params);
|
||||
ops.add(op);
|
||||
Long time = lastAboveEventMap.get(r.getCore());
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.MultiTerms;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.solr.cloud.CloudDescriptor;
|
||||
import org.apache.solr.cloud.ZkShardTerms;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -385,16 +386,7 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
|
|||
|
||||
// compare to current prefix bucket and see if this new term shares the same prefix
|
||||
if (term != null && term.length >= currPrefix.length && currPrefix.length > 0) {
|
||||
int i = 0;
|
||||
for (; i < currPrefix.length; i++) {
|
||||
if (currPrefix.bytes[i] != term.bytes[term.offset + i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == currPrefix.length) {
|
||||
// prefix was the same (common-case fast path)
|
||||
// int count = termsEnum.docFreq();
|
||||
if (StringHelper.startsWith(term, currPrefix)) {
|
||||
bucketCount++; // use 1 since we are dealing with unique ids
|
||||
continue;
|
||||
}
|
||||
|
@ -442,7 +434,7 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
|
|||
}
|
||||
}
|
||||
|
||||
log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={}numCollisions={}", idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions);
|
||||
log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numCollisions={}", idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions);
|
||||
|
||||
return counts.values();
|
||||
}
|
||||
|
|
|
@ -40,10 +40,10 @@ import org.junit.Test;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
//
|
||||
// This class tests higher level SPLITSHARD functionality when splitByPrefix is specified.
|
||||
// See SplitHandlerTest for random tests of lower-level split selection logic.
|
||||
//
|
||||
/**
|
||||
* This class tests higher level SPLITSHARD functionality when splitByPrefix is specified.
|
||||
* See SplitHandlerTest for random tests of lower-level split selection logic.
|
||||
*/
|
||||
public class SplitByPrefixTest extends SolrCloudTestCase {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
|
@ -90,7 +90,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// find prefixes (shard keys) matching certain criteria
|
||||
/**
|
||||
* find prefixes (shard keys) matching certain criteria
|
||||
*/
|
||||
public static List<Prefix> findPrefixes(int numToFind, int lowerBound, int upperBound) {
|
||||
CompositeIdRouter router = new CompositeIdRouter();
|
||||
|
||||
|
@ -115,7 +117,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
|
|||
return prefixes;
|
||||
}
|
||||
|
||||
// remove duplicate prefixes from the sorted prefix list
|
||||
/**
|
||||
* remove duplicate prefixes from the SORTED prefix list
|
||||
*/
|
||||
public static List<Prefix> removeDups(List<Prefix> prefixes) {
|
||||
ArrayList<Prefix> result = new ArrayList<>();
|
||||
Prefix last = null;
|
||||
|
|
|
@ -64,6 +64,7 @@ import org.apache.solr.common.util.TimeSource;
|
|||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.metrics.SolrCoreMetricManager;
|
||||
import org.apache.solr.update.SolrIndexSplitter;
|
||||
import org.apache.solr.util.LogLevel;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -155,7 +156,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
//@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
|
||||
public void testTrigger() throws Exception {
|
||||
String collectionName = "testTrigger_collection";
|
||||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
|
||||
|
@ -228,7 +228,11 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
}
|
||||
Map<String, Object> params = (Map<String, Object>)op.getHints().get(Suggester.Hint.PARAMS);
|
||||
assertNotNull("params are null: " + op, params);
|
||||
assertEquals("splitMethod: " + op, "link", params.get(CommonAdminParams.SPLIT_METHOD));
|
||||
|
||||
// verify default split configs
|
||||
assertEquals("splitMethod: " + op, SolrIndexSplitter.SplitMethod.LINK.toLower(),
|
||||
params.get(CommonAdminParams.SPLIT_METHOD));
|
||||
assertEquals("splitByPrefix: " + op, false, params.get(CommonAdminParams.SPLIT_BY_PREFIX));
|
||||
}
|
||||
assertTrue("shard1 should be split", shard1);
|
||||
assertTrue("shard2 should be split", shard2);
|
||||
|
@ -261,7 +265,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
//@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
|
||||
public void testSplitIntegration() throws Exception {
|
||||
String collectionName = "testSplitIntegration_collection";
|
||||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
|
||||
|
@ -385,7 +388,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
//@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
|
||||
public void testMergeIntegration() throws Exception {
|
||||
String collectionName = "testMergeIntegration_collection";
|
||||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
|
||||
|
@ -500,8 +502,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
//@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 05-Jul-2018
|
||||
//@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
|
||||
public void testMixedBounds() throws Exception {
|
||||
if (!realCluster) {
|
||||
log.info("This test doesn't work with a simulated cluster");
|
||||
|
@ -732,7 +732,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
|
||||
response = solrClient.request(req);
|
||||
assertEquals(response.get("result").toString(), "success");
|
||||
// System.exit(-1);
|
||||
|
||||
assertEquals(1, listenerEvents.size());
|
||||
events = listenerEvents.get("capturing4");
|
||||
|
@ -766,7 +765,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
//@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
|
||||
public void testMaxOps() throws Exception {
|
||||
String collectionName = "testMaxOps_collection";
|
||||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
|
||||
|
@ -908,9 +906,10 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
assertEquals("number of ops: " + ops, 3, ops.size());
|
||||
}
|
||||
|
||||
//test that split parameters can be overridden
|
||||
@Test
|
||||
public void testSplitMethodConfig() throws Exception {
|
||||
String collectionName = "testSplitMethod_collection";
|
||||
public void testSplitConfig() throws Exception {
|
||||
String collectionName = "testSplitConfig_collection";
|
||||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
|
||||
"conf", 2, 2).setMaxShardsPerNode(2);
|
||||
create.process(solrClient);
|
||||
|
@ -919,7 +918,9 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
|
||||
long waitForSeconds = 3 + random().nextInt(5);
|
||||
Map<String, Object> props = createTriggerProps(waitForSeconds);
|
||||
props.put(CommonAdminParams.SPLIT_METHOD, "link");
|
||||
props.put(CommonAdminParams.SPLIT_METHOD, SolrIndexSplitter.SplitMethod.REWRITE.toLower());
|
||||
props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, true);
|
||||
|
||||
try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger6")) {
|
||||
trigger.configure(loader, cloudManager, props);
|
||||
trigger.init();
|
||||
|
@ -977,7 +978,11 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
}
|
||||
Map<String, Object> params = (Map<String, Object>)op.getHints().get(Suggester.Hint.PARAMS);
|
||||
assertNotNull("params are null: " + op, params);
|
||||
assertEquals("splitMethod: " + op, "link", params.get(CommonAdminParams.SPLIT_METHOD));
|
||||
|
||||
// verify overrides for split config
|
||||
assertEquals("splitMethod: " + op, SolrIndexSplitter.SplitMethod.REWRITE.toLower(),
|
||||
params.get(CommonAdminParams.SPLIT_METHOD));
|
||||
assertEquals("splitByPrefix: " + op, true, params.get(CommonAdminParams.SPLIT_BY_PREFIX));
|
||||
}
|
||||
assertTrue("shard1 should be split", shard1);
|
||||
assertTrue("shard2 should be split", shard2);
|
||||
|
@ -985,6 +990,30 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
|
|||
|
||||
}
|
||||
|
||||
//validates that trigger configuration will fail for invalid split configs
|
||||
@Test
|
||||
public void testInvalidSplitConfig() throws Exception {
|
||||
long waitForSeconds = 3 + random().nextInt(5);
|
||||
Map<String, Object> props = createTriggerProps(waitForSeconds);
|
||||
props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, "hello");
|
||||
|
||||
try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger7")) {
|
||||
trigger.configure(loader, cloudManager, props);
|
||||
fail("Trigger configuration should have failed with invalid property.");
|
||||
} catch (TriggerValidationException e) {
|
||||
assertTrue(e.getDetails().containsKey(IndexSizeTrigger.SPLIT_BY_PREFIX));
|
||||
}
|
||||
|
||||
props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, true);
|
||||
props.put(CommonAdminParams.SPLIT_METHOD, "hello");
|
||||
try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger8")) {
|
||||
trigger.configure(loader, cloudManager, props);
|
||||
fail("Trigger configuration should have failed with invalid property.");
|
||||
} catch (TriggerValidationException e) {
|
||||
assertTrue(e.getDetails().containsKey(IndexSizeTrigger.SPLIT_METHOD_PROP));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEstimatedIndexSize() throws Exception {
|
||||
|
|
|
@ -227,7 +227,7 @@ public class SplitHandlerTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testHistoramBuilding() throws Exception {
|
||||
public void testHistogramBuilding() throws Exception {
|
||||
List<Prefix> prefixes = SplitByPrefixTest.findPrefixes(20, 0, 0x00ffffff);
|
||||
List<Prefix> uniquePrefixes = SplitByPrefixTest.removeDups(prefixes);
|
||||
assertTrue(prefixes.size() > uniquePrefixes.size()); // make sure we have some duplicates to test hash collisions
|
||||
|
@ -273,6 +273,10 @@ public class SplitHandlerTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
private boolean eqCount(Collection<SplitOp.RangeCount> a, Collection<SplitOp.RangeCount> b) {
|
||||
if (a.size() != b.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Iterator<SplitOp.RangeCount> it1 = a.iterator();
|
||||
Iterator<SplitOp.RangeCount> it2 = b.iterator();
|
||||
while (it1.hasNext()) {
|
||||
|
|
|
@ -330,6 +330,11 @@ Non-zero values are useful for large indexes with aggressively growing size, as
|
|||
avalanches of split shard requests when the total size of the index
|
||||
reaches even multiples of the maximum shard size thresholds.
|
||||
|
||||
`splitByPrefix`::
|
||||
A boolean value (default is false) that specifies whether the aboveOp shard split should try to
|
||||
calculate sub-shard hash ranges according to document prefixes, or do a traditional shard split (i.e.
|
||||
split the hash range into n sub-ranges).
|
||||
|
||||
Events generated by this trigger contain additional details about the shards
|
||||
that exceeded thresholds and the types of violations (upper / lower bounds, bytes / docs metrics).
|
||||
|
||||
|
|
|
@ -56,6 +56,10 @@ class SplitShardSuggester extends Suggester {
|
|||
if (splitMethod != null) {
|
||||
req.setSplitMethod(splitMethod);
|
||||
}
|
||||
Boolean splitByPrefix = (Boolean)params.get(CommonAdminParams.SPLIT_BY_PREFIX);
|
||||
if (splitByPrefix != null) {
|
||||
req.setSplitByPrefix(splitByPrefix);
|
||||
}
|
||||
return req;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue