From 768ca7c5a7fe5fcd89f047275319eafe783eb96c Mon Sep 17 00:00:00 2001 From: Megan Carey Date: Fri, 9 Aug 2019 14:47:14 -0700 Subject: [PATCH] SOLR-13399: Adding splitByPrefix param to IndexSizeTrigger; some splitByPrefix test and code cleanup --- solr/CHANGES.txt | 2 +- .../cloud/autoscaling/IndexSizeTrigger.java | 28 +++++++--- .../apache/solr/handler/admin/SplitOp.java | 14 ++--- .../api/collections/SplitByPrefixTest.java | 16 +++--- .../autoscaling/IndexSizeTriggerTest.java | 53 ++++++++++++++----- .../solr/handler/admin/SplitHandlerTest.java | 6 ++- .../src/solrcloud-autoscaling-triggers.adoc | 5 ++ .../autoscaling/SplitShardSuggester.java | 4 ++ 8 files changed, 91 insertions(+), 37 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d6320e55163..5e421e3e0c8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -98,7 +98,7 @@ New Features * SOLR-13399: SPLITSHARD implements a new splitByPrefix option that takes into account the actual document distribution when using compositeIds. Document distribution is calculated using the "id_prefix" field (if it exists) containing - just the compositeId prefixes, or directly from the indexed "id" field otherwise. (yonik) + just the compositeId prefixes, or directly from the indexed "id" field otherwise. (yonik, Megan Carey) * SOLR-13565: Node level runtime libs loaded from remote urls (noble) diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java index 33eb1d7de6c..f32669c7e8d 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java @@ -30,6 +30,7 @@ import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; +import java.util.Locale; import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo; @@ -69,6 +70,7 @@ public class IndexSizeTrigger extends TriggerBase { public static final String MAX_OPS_PROP = "maxOps"; public static final String SPLIT_FUZZ_PROP = CommonAdminParams.SPLIT_FUZZ; public static final String SPLIT_METHOD_PROP = CommonAdminParams.SPLIT_METHOD; + public static final String SPLIT_BY_PREFIX = CommonAdminParams.SPLIT_BY_PREFIX; public static final String BYTES_SIZE_PROP = "__bytes__"; public static final String TOTAL_BYTES_SIZE_PROP = "__total_bytes__"; @@ -86,6 +88,7 @@ public class IndexSizeTrigger extends TriggerBase { private long aboveBytes, aboveDocs, belowBytes, belowDocs; private int maxOps; private SolrIndexSplitter.SplitMethod splitMethod; + private boolean splitByPrefix; private float splitFuzz; private CollectionParams.CollectionAction aboveOp, belowOp; private final Set collections = new HashSet<>(); @@ -96,7 +99,7 @@ public class IndexSizeTrigger extends TriggerBase { super(TriggerEventType.INDEXSIZE, name); TriggerUtils.validProperties(validProperties, ABOVE_BYTES_PROP, ABOVE_DOCS_PROP, BELOW_BYTES_PROP, BELOW_DOCS_PROP, - COLLECTIONS_PROP, MAX_OPS_PROP, SPLIT_METHOD_PROP, SPLIT_FUZZ_PROP); + COLLECTIONS_PROP, MAX_OPS_PROP, SPLIT_METHOD_PROP, SPLIT_FUZZ_PROP, SPLIT_BY_PREFIX); } @Override @@ -176,11 +179,10 @@ public class IndexSizeTrigger extends TriggerBase { } catch (Exception e) { throw new TriggerValidationException(getName(), MAX_OPS_PROP, "invalid value: '" + maxOpsStr + "': " + e.getMessage()); } - String methodStr = (String)properties.getOrDefault(CommonAdminParams.SPLIT_METHOD, SolrIndexSplitter.SplitMethod.LINK.toLower()); + String methodStr = (String)properties.getOrDefault(SPLIT_METHOD_PROP, SolrIndexSplitter.SplitMethod.LINK.toLower()); splitMethod = SolrIndexSplitter.SplitMethod.get(methodStr); if (splitMethod == null) { - throw new TriggerValidationException(getName(), SPLIT_METHOD_PROP, "Unknown value '" + CommonAdminParams.SPLIT_METHOD + - ": " + methodStr); + throw new TriggerValidationException(getName(), SPLIT_METHOD_PROP, "unrecognized value of: '" + methodStr + "'"); } String fuzzStr = String.valueOf(properties.getOrDefault(SPLIT_FUZZ_PROP, 0.0f)); try { @@ -188,6 +190,19 @@ public class IndexSizeTrigger extends TriggerBase { } catch (Exception e) { throw new TriggerValidationException(getName(), SPLIT_FUZZ_PROP, "invalid value: '" + fuzzStr + "': " + e.getMessage()); } + String splitByPrefixStr = String.valueOf(properties.getOrDefault(SPLIT_BY_PREFIX, false)); + try { + splitByPrefix = getValidBool(splitByPrefixStr); + } catch (Exception e) { + throw new TriggerValidationException(getName(), SPLIT_BY_PREFIX, "invalid value: '" + splitByPrefixStr + "': " + e.getMessage()); + } + } + + private boolean getValidBool(String str) throws Exception { + if (str != null && (str.toLowerCase(Locale.ROOT).equals("true") || str.toLowerCase(Locale.ROOT).equals("false"))) { + return Boolean.parseBoolean(str); + } + throw new IllegalArgumentException("Expected a valid boolean value but got " + str); } @Override @@ -430,10 +445,11 @@ public class IndexSizeTrigger extends TriggerBase { TriggerEvent.Op op = new TriggerEvent.Op(aboveOp); op.addHint(Suggester.Hint.COLL_SHARD, new Pair<>(coll, r.getShard())); Map params = new HashMap<>(); - params.put(CommonAdminParams.SPLIT_METHOD, splitMethod.toLower()); + params.put(SPLIT_METHOD_PROP, splitMethod.toLower()); if (splitFuzz > 0) { - params.put(CommonAdminParams.SPLIT_FUZZ, splitFuzz); + params.put(SPLIT_FUZZ_PROP, splitFuzz); } + params.put(SPLIT_BY_PREFIX, splitByPrefix); op.addHint(Suggester.Hint.PARAMS, params); ops.add(op); Long time = lastAboveEventMap.get(r.getCore()); diff --git a/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java b/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java index d280b116303..584f18386d0 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java @@ -30,6 +30,7 @@ import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; import org.apache.solr.cloud.CloudDescriptor; import org.apache.solr.cloud.ZkShardTerms; import org.apache.solr.common.SolrException; @@ -385,16 +386,7 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp { // compare to current prefix bucket and see if this new term shares the same prefix if (term != null && term.length >= currPrefix.length && currPrefix.length > 0) { - int i = 0; - for (; i < currPrefix.length; i++) { - if (currPrefix.bytes[i] != term.bytes[term.offset + i]) { - break; - } - } - - if (i == currPrefix.length) { - // prefix was the same (common-case fast path) - // int count = termsEnum.docFreq(); + if (StringHelper.startsWith(term, currPrefix)) { bucketCount++; // use 1 since we are dealing with unique ids continue; } @@ -442,7 +434,7 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp { } } - log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={}numCollisions={}", idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions); + log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numCollisions={}", idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions); return counts.values(); } diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java index ca2aefc5b46..5ec53e5cbbf 100644 --- a/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java @@ -40,10 +40,10 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -// -// This class tests higher level SPLITSHARD functionality when splitByPrefix is specified. -// See SplitHandlerTest for random tests of lower-level split selection logic. -// +/** + * This class tests higher level SPLITSHARD functionality when splitByPrefix is specified. + * See SplitHandlerTest for random tests of lower-level split selection logic. + */ public class SplitByPrefixTest extends SolrCloudTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -90,7 +90,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase { } } - // find prefixes (shard keys) matching certain criteria + /** + * find prefixes (shard keys) matching certain criteria + */ public static List findPrefixes(int numToFind, int lowerBound, int upperBound) { CompositeIdRouter router = new CompositeIdRouter(); @@ -115,7 +117,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase { return prefixes; } - // remove duplicate prefixes from the sorted prefix list + /** + * remove duplicate prefixes from the SORTED prefix list + */ public static List removeDups(List prefixes) { ArrayList result = new ArrayList<>(); Prefix last = null; diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java index 59c7d67fafe..d54cf0b9ef2 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java @@ -64,6 +64,7 @@ import org.apache.solr.common.util.TimeSource; import org.apache.solr.common.util.Utils; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.metrics.SolrCoreMetricManager; +import org.apache.solr.update.SolrIndexSplitter; import org.apache.solr.util.LogLevel; import org.junit.After; import org.junit.AfterClass; @@ -155,7 +156,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testTrigger() throws Exception { String collectionName = "testTrigger_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, @@ -228,7 +228,11 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } Map params = (Map)op.getHints().get(Suggester.Hint.PARAMS); assertNotNull("params are null: " + op, params); - assertEquals("splitMethod: " + op, "link", params.get(CommonAdminParams.SPLIT_METHOD)); + + // verify default split configs + assertEquals("splitMethod: " + op, SolrIndexSplitter.SplitMethod.LINK.toLower(), + params.get(CommonAdminParams.SPLIT_METHOD)); + assertEquals("splitByPrefix: " + op, false, params.get(CommonAdminParams.SPLIT_BY_PREFIX)); } assertTrue("shard1 should be split", shard1); assertTrue("shard2 should be split", shard2); @@ -261,7 +265,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testSplitIntegration() throws Exception { String collectionName = "testSplitIntegration_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, @@ -385,7 +388,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testMergeIntegration() throws Exception { String collectionName = "testMergeIntegration_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, @@ -500,8 +502,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 05-Jul-2018 - //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testMixedBounds() throws Exception { if (!realCluster) { log.info("This test doesn't work with a simulated cluster"); @@ -732,7 +732,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand); response = solrClient.request(req); assertEquals(response.get("result").toString(), "success"); -// System.exit(-1); assertEquals(1, listenerEvents.size()); events = listenerEvents.get("capturing4"); @@ -766,7 +765,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testMaxOps() throws Exception { String collectionName = "testMaxOps_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, @@ -908,9 +906,10 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { assertEquals("number of ops: " + ops, 3, ops.size()); } + //test that split parameters can be overridden @Test - public void testSplitMethodConfig() throws Exception { - String collectionName = "testSplitMethod_collection"; + public void testSplitConfig() throws Exception { + String collectionName = "testSplitConfig_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2).setMaxShardsPerNode(2); create.process(solrClient); @@ -919,7 +918,9 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { long waitForSeconds = 3 + random().nextInt(5); Map props = createTriggerProps(waitForSeconds); - props.put(CommonAdminParams.SPLIT_METHOD, "link"); + props.put(CommonAdminParams.SPLIT_METHOD, SolrIndexSplitter.SplitMethod.REWRITE.toLower()); + props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, true); + try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger6")) { trigger.configure(loader, cloudManager, props); trigger.init(); @@ -977,13 +978,41 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } Map params = (Map)op.getHints().get(Suggester.Hint.PARAMS); assertNotNull("params are null: " + op, params); - assertEquals("splitMethod: " + op, "link", params.get(CommonAdminParams.SPLIT_METHOD)); + + // verify overrides for split config + assertEquals("splitMethod: " + op, SolrIndexSplitter.SplitMethod.REWRITE.toLower(), + params.get(CommonAdminParams.SPLIT_METHOD)); + assertEquals("splitByPrefix: " + op, true, params.get(CommonAdminParams.SPLIT_BY_PREFIX)); } assertTrue("shard1 should be split", shard1); assertTrue("shard2 should be split", shard2); } } + + //validates that trigger configuration will fail for invalid split configs + @Test + public void testInvalidSplitConfig() throws Exception { + long waitForSeconds = 3 + random().nextInt(5); + Map props = createTriggerProps(waitForSeconds); + props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, "hello"); + + try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger7")) { + trigger.configure(loader, cloudManager, props); + fail("Trigger configuration should have failed with invalid property."); + } catch (TriggerValidationException e) { + assertTrue(e.getDetails().containsKey(IndexSizeTrigger.SPLIT_BY_PREFIX)); + } + + props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, true); + props.put(CommonAdminParams.SPLIT_METHOD, "hello"); + try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger8")) { + trigger.configure(loader, cloudManager, props); + fail("Trigger configuration should have failed with invalid property."); + } catch (TriggerValidationException e) { + assertTrue(e.getDetails().containsKey(IndexSizeTrigger.SPLIT_METHOD_PROP)); + } + } @Test diff --git a/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java index 02174b70b21..dcfc74968f6 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java @@ -227,7 +227,7 @@ public class SplitHandlerTest extends SolrTestCaseJ4 { } @Test - public void testHistoramBuilding() throws Exception { + public void testHistogramBuilding() throws Exception { List prefixes = SplitByPrefixTest.findPrefixes(20, 0, 0x00ffffff); List uniquePrefixes = SplitByPrefixTest.removeDups(prefixes); assertTrue(prefixes.size() > uniquePrefixes.size()); // make sure we have some duplicates to test hash collisions @@ -273,6 +273,10 @@ public class SplitHandlerTest extends SolrTestCaseJ4 { } private boolean eqCount(Collection a, Collection b) { + if (a.size() != b.size()) { + return false; + } + Iterator it1 = a.iterator(); Iterator it2 = b.iterator(); while (it1.hasNext()) { diff --git a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc index 190381d7e37..484f514a19b 100644 --- a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc +++ b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc @@ -330,6 +330,11 @@ Non-zero values are useful for large indexes with aggressively growing size, as avalanches of split shard requests when the total size of the index reaches even multiples of the maximum shard size thresholds. +`splitByPrefix`:: +A boolean value (default is false) that specifies whether the aboveOp shard split should try to +calculate sub-shard hash ranges according to document prefixes, or do a traditional shard split (i.e. +split the hash range into n sub-ranges). + Events generated by this trigger contain additional details about the shards that exceeded thresholds and the types of violations (upper / lower bounds, bytes / docs metrics). diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/SplitShardSuggester.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/SplitShardSuggester.java index 8aafef8da39..a244a10dbbf 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/SplitShardSuggester.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/SplitShardSuggester.java @@ -56,6 +56,10 @@ class SplitShardSuggester extends Suggester { if (splitMethod != null) { req.setSplitMethod(splitMethod); } + Boolean splitByPrefix = (Boolean)params.get(CommonAdminParams.SPLIT_BY_PREFIX); + if (splitByPrefix != null) { + req.setSplitByPrefix(splitByPrefix); + } return req; } }