diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 186ba9bb3c6..ea877eb2a83 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -61,6 +61,7 @@ Bug Fixes * SOLR-13469: Fix rejected requests during full disk full + IndexFetch to use 503 not 403 (hossman) +* SOLR-12941: Fix IndexSizeTrigger to correctly work with "aboveBytes" and "splitMethod=link" parameters. (ab) Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java index 6a62be915e2..407d5482f0e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java +++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java @@ -236,7 +236,7 @@ public class CloudUtil { *

Note: for shards marked as inactive the current Solr behavior is that replicas remain active. * {@link org.apache.solr.cloud.autoscaling.sim.SimCloudManager} follows this behavior.

* @param expectedShards expected number of shards - * @param expectedReplicas expected number of active replicas + * @param expectedReplicas expected number of active replicas per shard * @param withInactive if true then count also inactive shards * @param requireLeaders if true then require that each shard has a leader */ diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java index 76edd44923d..33eb1d7de6c 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java @@ -71,7 +71,10 @@ public class IndexSizeTrigger extends TriggerBase { public static final String SPLIT_METHOD_PROP = CommonAdminParams.SPLIT_METHOD; public static final String BYTES_SIZE_PROP = "__bytes__"; + public static final String TOTAL_BYTES_SIZE_PROP = "__total_bytes__"; public static final String DOCS_SIZE_PROP = "__docs__"; + public static final String MAX_DOC_PROP = "__maxDoc__"; + public static final String COMMIT_SIZE_PROP = "__commitBytes__"; public static final String ABOVE_SIZE_PROP = "aboveSize"; public static final String BELOW_SIZE_PROP = "belowSize"; public static final String VIOLATION_PROP = "violationType"; @@ -290,6 +293,10 @@ public class IndexSizeTrigger extends TriggerBase { metricTags.put(tag, info); tag = "metrics:" + registry + ":SEARCHER.searcher.numDocs"; metricTags.put(tag, info); + tag = "metrics:" + registry + ":SEARCHER.searcher.maxDoc"; + metricTags.put(tag, info); + tag = "metrics:" + registry + ":SEARCHER.searcher.indexCommitSize"; + metricTags.put(tag, info); }); }); if (metricTags.isEmpty()) { @@ -309,9 +316,13 @@ public class IndexSizeTrigger extends TriggerBase { ReplicaInfo currentInfo = currentSizes.computeIfAbsent(info.getCore(), k -> (ReplicaInfo)info.clone()); if (tag.contains("INDEX")) { - currentInfo.getVariables().put(BYTES_SIZE_PROP, ((Number) size).longValue()); - } else if (tag.contains("SEARCHER")) { + currentInfo.getVariables().put(TOTAL_BYTES_SIZE_PROP, ((Number) size).longValue()); + } else if (tag.endsWith("SEARCHER.searcher.numDocs")) { currentInfo.getVariables().put(DOCS_SIZE_PROP, ((Number) size).longValue()); + } else if (tag.endsWith("SEARCHER.searcher.maxDoc")) { + currentInfo.getVariables().put(MAX_DOC_PROP, ((Number) size).longValue()); + } else if (tag.endsWith("SEARCHER.searcher.indexCommitSize")) { + currentInfo.getVariables().put(COMMIT_SIZE_PROP, ((Number) size).longValue()); } } }); @@ -331,6 +342,17 @@ public class IndexSizeTrigger extends TriggerBase { Set splittable = new HashSet<>(); currentSizes.forEach((coreName, info) -> { + // calculate estimated bytes + long maxDoc = (Long)info.getVariable(MAX_DOC_PROP); + long numDocs = (Long)info.getVariable(DOCS_SIZE_PROP); + long commitSize = (Long)info.getVariable(COMMIT_SIZE_PROP, 0L); + if (commitSize <= 0) { + commitSize = (Long)info.getVariable(TOTAL_BYTES_SIZE_PROP); + } + // calculate estimated size as a side-effect + commitSize = estimatedSize(maxDoc, numDocs, commitSize); + info.getVariables().put(BYTES_SIZE_PROP, commitSize); + if ((Long)info.getVariable(BYTES_SIZE_PROP) > aboveBytes || (Long)info.getVariable(DOCS_SIZE_PROP) > aboveDocs) { if (waitForElapsed(coreName, now, lastAboveEventMap)) { @@ -479,6 +501,16 @@ public class IndexSizeTrigger extends TriggerBase { } } + public static long estimatedSize(long maxDoc, long numDocs, long commitSize) { + if (maxDoc == 0) { + return 0; + } + if (maxDoc == numDocs) { + return commitSize; + } + return commitSize * numDocs / maxDoc; + } + private boolean waitForElapsed(String name, long now, Map lastEventMap) { Long lastTime = lastEventMap.computeIfAbsent(name, s -> now); long elapsed = TimeUnit.SECONDS.convert(now - lastTime, TimeUnit.NANOSECONDS); diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java index 92da16a97ce..a15e4a51604 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java @@ -90,6 +90,8 @@ import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CommonAdminParams; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CoreAdminParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.UpdateParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.Utils; import org.apache.solr.core.SolrInfoBean; @@ -1739,6 +1741,28 @@ public class SimClusterStateProvider implements ClusterStateProvider { lock.unlock(); } } + SolrParams params = req.getParams(); + if (params != null && (params.getBool(UpdateParams.OPTIMIZE, false) || params.getBool(UpdateParams.EXPUNGE_DELETES, false))) { + lock.lockInterruptibly(); + try { + coll.getSlices().forEach(s -> { + Replica leader = s.getLeader(); + ReplicaInfo ri = getReplicaInfo(leader); + Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs"); + if (numDocs == null || numDocs.intValue() == 0) { + numDocs = 0; + } + try { + simSetShardValue(ri.getCollection(), ri.getShard(), "SEARCHER.searcher.maxDoc", numDocs, false, false); + simSetShardValue(ri.getCollection(), ri.getShard(), "SEARCHER.searcher.deletedDocs", 0, false, false); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } finally { + lock.unlock(); + } + } return new UpdateResponse(); } diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimUtils.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimUtils.java index 64fda4df1d8..acfaa0f17bb 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimUtils.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimUtils.java @@ -52,6 +52,7 @@ public class SimUtils { Variable.Type.CORE_IDX.tagName, "SEARCHER.searcher.numDocs", "SEARCHER.searcher.maxDoc", + "SEARCHER.searcher.indexCommitSize", "QUERY./select.requests", "UPDATE./update.requests" )); diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 210e0ad66a9..8b121e04c6d 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Date; @@ -2281,6 +2282,19 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI manager.registerGauge(this, registry, () -> reader.toString(), tag, true, "reader", Category.SEARCHER.toString(), scope); manager.registerGauge(this, registry, () -> reader.directory().toString(), tag, true, "readerDir", Category.SEARCHER.toString(), scope); manager.registerGauge(this, registry, () -> reader.getVersion(), tag, true, "indexVersion", Category.SEARCHER.toString(), scope); + // size of the currently opened commit + manager.registerGauge(this, registry, () -> { + try { + Collection files = reader.getIndexCommit().getFileNames(); + long total = 0; + for (String file : files) { + total += DirectoryFactory.sizeOf(reader.directory(), file); + } + return total; + } catch (Exception e) { + return -1; + } + }, tag, true, "indexCommitSize", Category.SEARCHER.toString(), scope); } diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java index 46d990ae6b8..faf1407801d 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java @@ -19,7 +19,10 @@ package org.apache.solr.cloud.autoscaling; import java.lang.invoke.MethodHandles; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -29,7 +32,9 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; @@ -44,17 +49,22 @@ import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest; import org.apache.solr.cloud.CloudUtil; import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.cloud.autoscaling.sim.SimCloudManager; +import org.apache.solr.cloud.autoscaling.sim.SimUtils; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CommonAdminParams; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.UpdateParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.Pair; import org.apache.solr.common.util.TimeSource; import org.apache.solr.common.util.Utils; import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.metrics.SolrCoreMetricManager; import org.apache.solr.util.LogLevel; import org.junit.After; import org.junit.AfterClass; @@ -69,6 +79,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_P * */ @LogLevel("org.apache.solr.cloud.autoscaling=DEBUG") +@LuceneTestCase.Slow public class IndexSizeTriggerTest extends SolrCloudTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -88,13 +99,16 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { static Map> listenerEvents = new ConcurrentHashMap<>(); static CountDownLatch listenerCreated = new CountDownLatch(1); static CountDownLatch finished = new CountDownLatch(1); + static boolean realCluster; @BeforeClass public static void setupCluster() throws Exception { configureCluster(2) .addConfig("conf", configset("cloud-minimal")) .configure(); - if (random().nextBoolean()) { + realCluster = random().nextBoolean(); + realCluster = true; + if (realCluster) { cloudManager = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getSolrCloudManager(); solrClient = cluster.getSolrClient(); loader = cluster.getJettySolrRunner(0).getCoreContainer().getResourceLoader(); @@ -114,7 +128,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { @After public void restoreDefaults() throws Exception { - if (cloudManager instanceof SimCloudManager) { + if (!realCluster) { log.info(((SimCloudManager) cloudManager).dumpClusterState(true)); ((SimCloudManager) cloudManager).getSimClusterStateProvider().simDeleteAllCollections(); ((SimCloudManager) cloudManager).simClearSystemCollection(); @@ -134,7 +148,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { @AfterClass public static void teardown() throws Exception { - if (cloudManager instanceof SimCloudManager) { + if (!realCluster) { cloudManager.close(); } solrClient = null; @@ -142,7 +156,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") + //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testTrigger() throws Exception { String collectionName = "testTrigger_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, @@ -248,7 +262,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") + //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testSplitIntegration() throws Exception { String collectionName = "testSplitIntegration_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, @@ -372,7 +386,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") + //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testMergeIntegration() throws Exception { String collectionName = "testMergeIntegration_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, @@ -488,8 +502,12 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { @Test //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 05-Jul-2018 - @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") + //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testMixedBounds() throws Exception { + if (!realCluster) { + log.info("This test doesn't work with a simulated cluster"); + return; + } String collectionName = "testMixedBounds_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, @@ -749,20 +767,16 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } @Test - @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") + //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testMaxOps() throws Exception { String collectionName = "testMaxOps_collection"; CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf", 5, 2).setMaxShardsPerNode(10); create.process(solrClient); - if (SPEED == 1) { - cluster.waitForActiveCollection(collectionName, 5, 10); - } else { - CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName, - CloudUtil.clusterShape(5, 2, false, true)); - } - + CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName, + CloudUtil.clusterShape(5, 2, false, true)); + long waitForSeconds = 3 + random().nextInt(5); // add disabled trigger String setTriggerCommand = "{" + @@ -972,6 +986,197 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase { } + + @Test + public void testEstimatedIndexSize() throws Exception { + if (!realCluster) { + log.info("This test doesn't work with a simulated cluster"); + return; + } + String collectionName = "testEstimatedIndexSize_collection"; + CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, + "conf", 2, 2).setMaxShardsPerNode(2); + create.process(solrClient); + + CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName, + CloudUtil.clusterShape(2, 2, false, true)); + + int NUM_DOCS = 20; + for (int i = 0; i < NUM_DOCS; i++) { + SolrInputDocument doc = new SolrInputDocument("id", "id-" + (i * 100)); + solrClient.add(collectionName, doc); + } + solrClient.commit(collectionName); + + // get the size of the leader's index + DocCollection coll = cloudManager.getClusterStateProvider().getCollection(collectionName); + Replica leader = coll.getSlice("shard1").getLeader(); + String replicaName = Utils.parseMetricsReplicaName(collectionName, leader.getCoreName()); + assertNotNull("replicaName could not be constructed from " + leader, replicaName); + final String registry = SolrCoreMetricManager.createRegistryName(true, collectionName, "shard1", replicaName, null); + Set tags = SimUtils.COMMON_REPLICA_TAGS.stream() + .map(s -> "metrics:" + registry + ":" + s).collect(Collectors.toSet()); + Map sizes = cloudManager.getNodeStateProvider().getNodeValues(leader.getNodeName(), tags); + String commitSizeTag = "metrics:" + registry + ":SEARCHER.searcher.indexCommitSize"; + String numDocsTag = "metrics:" + registry + ":SEARCHER.searcher.numDocs"; + String maxDocTag = "metrics:" + registry + ":SEARCHER.searcher.maxDoc"; + assertNotNull(sizes.toString(), sizes.get(commitSizeTag)); + assertNotNull(sizes.toString(), sizes.get(numDocsTag)); + assertNotNull(sizes.toString(), sizes.get(maxDocTag)); + long commitSize = ((Number)sizes.get(commitSizeTag)).longValue(); + long maxDoc = ((Number)sizes.get(maxDocTag)).longValue(); + long numDocs = ((Number)sizes.get(numDocsTag)).longValue(); + + assertEquals("maxDoc != numDocs", maxDoc, numDocs); + assertTrue("unexpected numDocs=" + numDocs, numDocs > NUM_DOCS / 3); + + long aboveBytes = commitSize * 9 / 10; + long waitForSeconds = 3 + random().nextInt(5); + String setTriggerCommand = "{" + + "'set-trigger' : {" + + "'name' : 'index_size_trigger7'," + + "'event' : 'indexSize'," + + "'waitFor' : '" + waitForSeconds + "s'," + + "'splitMethod' : 'link'," + + "'aboveBytes' : " + aboveBytes + "," + + "'enabled' : false," + + "'actions' : [{'name' : 'compute_plan', 'class' : 'solr.ComputePlanAction'}," + + "{'name' : 'execute_plan', 'class' : '" + ExecutePlanAction.class.getName() + "'}]" + + "}}"; + SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand); + NamedList response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + String setListenerCommand = "{" + + "'set-listener' : " + + "{" + + "'name' : 'capturing7'," + + "'trigger' : 'index_size_trigger7'," + + "'stage' : ['STARTED','ABORTED','SUCCEEDED','FAILED']," + + "'beforeAction' : ['compute_plan','execute_plan']," + + "'afterAction' : ['compute_plan','execute_plan']," + + "'class' : '" + CapturingTriggerListener.class.getName() + "'" + + "}" + + "}"; + req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand); + response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + setListenerCommand = "{" + + "'set-listener' : " + + "{" + + "'name' : 'finished'," + + "'trigger' : 'index_size_trigger7'," + + "'stage' : ['SUCCEEDED']," + + "'class' : '" + FinishedProcessingListener.class.getName() + "'" + + "}" + + "}"; + req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand); + response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + // enable the trigger + String resumeTriggerCommand = "{" + + "'resume-trigger' : {" + + "'name' : 'index_size_trigger7'" + + "}" + + "}"; + req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand); + response = solrClient.request(req); + assertEquals("success", response.get("result").toString()); + + // aboveBytes was set to be slightly lower than the actual size of at least one shard, so + // we're expecting a SPLITSHARD - but with 'link' method the actual size of the resulting shards + // will likely not go down. However, the estimated size of the latest commit point will go down + // (see SOLR-12941). + + timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS)); + + boolean await = finished.await(90000 / SPEED, TimeUnit.MILLISECONDS); + assertTrue("did not finish processing in time", await); + // suspend the trigger + String suspendTriggerCommand = "{" + + "'suspend-trigger' : {" + + "'name' : 'index_size_trigger7'" + + "}" + + "}"; + req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand); + response = solrClient.request(req); + assertEquals("success", response.get("result").toString()); + + assertEquals(1, listenerEvents.size()); + List events = listenerEvents.get("capturing7"); + assertNotNull(listenerEvents.toString(), events); + assertFalse("empty events?", events.isEmpty()); + CapturedEvent ev = events.get(0); + List ops = (List< TriggerEvent.Op>)ev.event.properties.get(TriggerEvent.REQUESTED_OPS); + assertNotNull("no requested ops in " + ev, ops); + assertFalse("empty list of ops in " + ev, ops.isEmpty()); + Set parentShards = new HashSet<>(); + ops.forEach(op -> { + assertTrue(op.toString(), op.getAction() == CollectionParams.CollectionAction.SPLITSHARD); + Collection> hints = (Collection>)op.getHints().get(Suggester.Hint.COLL_SHARD); + assertNotNull("no hints in op " + op, hints); + hints.forEach(h -> parentShards.add(h.second())); + }); + + // allow for recovery of at least some sub-shards + timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS)); + + coll = cloudManager.getClusterStateProvider().getCollection(collectionName); + + int checkedSubShards = 0; + + for (String parentShard : parentShards) { + for (String subShard : Arrays.asList(parentShard + "_0", parentShard + "_1")) { + leader = coll.getSlice(subShard).getLeader(); + if (leader == null) { + // no leader yet - skip it + } + checkedSubShards++; + replicaName = Utils.parseMetricsReplicaName(collectionName, leader.getCoreName()); + assertNotNull("replicaName could not be constructed from " + leader, replicaName); + final String subregistry = SolrCoreMetricManager.createRegistryName(true, collectionName, subShard, replicaName, null); + Set subtags = SimUtils.COMMON_REPLICA_TAGS.stream() + .map(s -> "metrics:" + subregistry + ":" + s).collect(Collectors.toSet()); + sizes = cloudManager.getNodeStateProvider().getNodeValues(leader.getNodeName(), subtags); + commitSizeTag = "metrics:" + subregistry + ":SEARCHER.searcher.indexCommitSize"; + numDocsTag = "metrics:" + subregistry + ":SEARCHER.searcher.numDocs"; + maxDocTag = "metrics:" + subregistry + ":SEARCHER.searcher.maxDoc"; + assertNotNull(sizes.toString(), sizes.get(commitSizeTag)); + assertNotNull(sizes.toString(), sizes.get(numDocsTag)); + assertNotNull(sizes.toString(), sizes.get(maxDocTag)); + long subCommitSize = ((Number)sizes.get(commitSizeTag)).longValue(); + long subMaxDoc = ((Number)sizes.get(maxDocTag)).longValue(); + long subNumDocs = ((Number)sizes.get(numDocsTag)).longValue(); + assertTrue("subNumDocs=" + subNumDocs + " should be less than subMaxDoc=" + subMaxDoc + + " due to link split", subNumDocs < subMaxDoc); + assertTrue("subCommitSize=" + subCommitSize + " should be still greater than aboveBytes=" + aboveBytes + + " due to link split", subCommitSize > aboveBytes); + // calculate estimated size using the same formula + long estimatedSize = IndexSizeTrigger.estimatedSize(subMaxDoc, subNumDocs, subCommitSize); + assertTrue("estimatedSize=" + estimatedSize + " should be lower than aboveBytes=" + aboveBytes, + estimatedSize < aboveBytes); + } + } + + assertTrue("didn't find any leaders in new sub-shards", checkedSubShards > 0); + + // reset & resume + listenerEvents.clear(); + finished = new CountDownLatch(1); + req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand); + response = solrClient.request(req); + assertEquals("success", response.get("result").toString()); + timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS)); + + // estimated shard size should fall well below the aboveBytes, even though the real commitSize + // still remains larger due to the splitMethod=link side-effects + await = finished.await(10000 / SPEED, TimeUnit.MILLISECONDS); + assertFalse("should not fire the trigger again! " + listenerEvents, await); + + } + private Map createTriggerProps(long waitForSeconds) { Map props = new HashMap<>(); props.put("event", "indexSize"); diff --git a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc index c5263ede8d4..85f273e9742 100644 --- a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc +++ b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc @@ -255,8 +255,8 @@ In addition to the parameters described at <>, this trigg This trigger can be used for monitoring the size of collection shards, measured either by the number of documents in a shard or the physical size of the shard's index in bytes. -When either of the upper thresholds is exceeded the trigger will generate an event with -a (configurable) requested operation to perform on the offending shards - by default +When either of the upper thresholds is exceeded for a particular shard the trigger will generate +an event with a (configurable) requested operation to perform on the offending shards - by default this is a SPLITSHARD operation. Similarly, when either of the lower thresholds is exceeded the trigger will generate an @@ -264,13 +264,22 @@ event with a (configurable) requested operation to perform on two of the smalles shards. By default this is a MERGESHARDS operation, and is currently ignored because that operation is not yet implemented (see https://issues.apache.org/jira/browse/SOLR-9407[SOLR-9407]). +When `splitMethod=link` is used the resulting sub-shards will initially have nearly the same size +as the parent shard due to the hard-linking of parent index files, and will differ just in the lists of +deleted documents. In order to correctly recognize the effectively reduced index size an estimate +is calculated using a simple formula: `indexCommitSize * numDocs / maxDoc`. This value is then +compared with `aboveBytes` and `belowBytes` limits. + Additionally, monitoring can be restricted to a list of collections; by default all collections are monitored. In addition to the parameters described at <>, this trigger supports the following configuration parameters (all thresholds are exclusive): `aboveBytes`:: -A upper threshold in bytes. This value is compared to the `INDEX.sizeInBytes` metric. +An upper threshold in bytes. This value is compared to the `SEARCHER.searcher.indexCommitSize` metric, which +reports the size of the latest commit point (ignoring any data related to earlier commit points, which may be +still present for replication or snapshot purposes). See also the note above how this value is used with +`splitMethod=link`. `belowBytes`:: A lower threshold in bytes. Note that this value should be at least 2x smaller than @@ -280,7 +289,7 @@ A lower threshold in bytes. Note that this value should be at least 2x smaller t An upper threshold expressed as the number of documents. This value is compared with `SEARCHER.searcher.numDocs` metric. + NOTE: Due to the way Lucene indexes work, a shard may exceed the `aboveBytes` threshold -even if the number of documents is relatively small, because replaced and deleted documents keep +on disk even if the number of documents is relatively small, because replaced and deleted documents keep occupying disk space until they are actually removed during Lucene index merging. `belowDocs`::