SOLR-12941: Fix IndexSizeTrigger to correctly work with "aboveBytes" and "splitMethod=link" parameters.

2019-05-27 18:28:59 +02:00 · 2019-05-27 18:28:59 +02:00 · 32ad372433
parent 171d7f131f
commit 32ad372433
8 changed files with 309 additions and 22 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -104,6 +104,8 @@ Bug Fixes

 * SOLR-13469: Fix rejected requests during full disk full + IndexFetch to use 503 not 403 (hossman)

+* SOLR-12941: Fix IndexSizeTrigger to correctly work with "aboveBytes" and "splitMethod=link" parameters. (ab)
+
 Other Changes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
@ -236,7 +236,7 @@ public class CloudUtil {
   * <p>Note: for shards marked as inactive the current Solr behavior is that replicas remain active.
   * {@link org.apache.solr.cloud.autoscaling.sim.SimCloudManager} follows this behavior.</p>
   * @param expectedShards expected number of shards
-   * @param expectedReplicas expected number of active replicas
+   * @param expectedReplicas expected number of active replicas per shard
   * @param withInactive if true then count also inactive shards
   * @param requireLeaders if true then require that each shard has a leader
   */
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
@ -71,7 +71,10 @@ public class IndexSizeTrigger extends TriggerBase {
  public static final String SPLIT_METHOD_PROP = CommonAdminParams.SPLIT_METHOD;

  public static final String BYTES_SIZE_PROP = "__bytes__";
+  public static final String TOTAL_BYTES_SIZE_PROP = "__total_bytes__";
  public static final String DOCS_SIZE_PROP = "__docs__";
+  public static final String MAX_DOC_PROP = "__maxDoc__";
+  public static final String COMMIT_SIZE_PROP = "__commitBytes__";
  public static final String ABOVE_SIZE_PROP = "aboveSize";
  public static final String BELOW_SIZE_PROP = "belowSize";
  public static final String VIOLATION_PROP = "violationType";
@ -290,6 +293,10 @@ public class IndexSizeTrigger extends TriggerBase {
            metricTags.put(tag, info);
            tag = "metrics:" + registry + ":SEARCHER.searcher.numDocs";
            metricTags.put(tag, info);
+            tag = "metrics:" + registry + ":SEARCHER.searcher.maxDoc";
+            metricTags.put(tag, info);
+            tag = "metrics:" + registry + ":SEARCHER.searcher.indexCommitSize";
+            metricTags.put(tag, info);
          });
        });
        if (metricTags.isEmpty()) {
@ -309,9 +316,13 @@ public class IndexSizeTrigger extends TriggerBase {

            ReplicaInfo currentInfo = currentSizes.computeIfAbsent(info.getCore(), k -> (ReplicaInfo)info.clone());
            if (tag.contains("INDEX")) {
-              currentInfo.getVariables().put(BYTES_SIZE_PROP, ((Number) size).longValue());
-            } else if (tag.contains("SEARCHER")) {
+              currentInfo.getVariables().put(TOTAL_BYTES_SIZE_PROP, ((Number) size).longValue());
+            } else if (tag.endsWith("SEARCHER.searcher.numDocs")) {
              currentInfo.getVariables().put(DOCS_SIZE_PROP, ((Number) size).longValue());
+            } else if (tag.endsWith("SEARCHER.searcher.maxDoc")) {
+              currentInfo.getVariables().put(MAX_DOC_PROP, ((Number) size).longValue());
+            } else if (tag.endsWith("SEARCHER.searcher.indexCommitSize")) {
+              currentInfo.getVariables().put(COMMIT_SIZE_PROP, ((Number) size).longValue());
            }
          }
        });
@ -331,6 +342,17 @@ public class IndexSizeTrigger extends TriggerBase {
    Set<String> splittable = new HashSet<>();

    currentSizes.forEach((coreName, info) -> {
+      // calculate estimated bytes
+      long maxDoc = (Long)info.getVariable(MAX_DOC_PROP);
+      long numDocs = (Long)info.getVariable(DOCS_SIZE_PROP);
+      long commitSize = (Long)info.getVariable(COMMIT_SIZE_PROP, 0L);
+      if (commitSize <= 0) {
+        commitSize = (Long)info.getVariable(TOTAL_BYTES_SIZE_PROP);
+      }
+      // calculate estimated size as a side-effect
+      commitSize = estimatedSize(maxDoc, numDocs, commitSize);
+      info.getVariables().put(BYTES_SIZE_PROP, commitSize);
+
      if ((Long)info.getVariable(BYTES_SIZE_PROP) > aboveBytes ||
          (Long)info.getVariable(DOCS_SIZE_PROP) > aboveDocs) {
        if (waitForElapsed(coreName, now, lastAboveEventMap)) {
@ -479,6 +501,16 @@ public class IndexSizeTrigger extends TriggerBase {
    }
  }

+  public static long estimatedSize(long maxDoc, long numDocs, long commitSize) {
+    if (maxDoc == 0) {
+      return 0;
+    }
+    if (maxDoc == numDocs) {
+      return commitSize;
+    }
+    return commitSize * numDocs / maxDoc;
+  }
+
  private boolean waitForElapsed(String name, long now, Map<String, Long> lastEventMap) {
    Long lastTime = lastEventMap.computeIfAbsent(name, s -> now);
    long elapsed = TimeUnit.SECONDS.convert(now - lastTime, TimeUnit.NANOSECONDS);
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@ -90,6 +90,8 @@ import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.CommonAdminParams;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.CoreAdminParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.UpdateParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.SolrInfoBean;
@ -1739,6 +1741,28 @@ public class SimClusterStateProvider implements ClusterStateProvider {
        lock.unlock();
      }
    }
+    SolrParams params = req.getParams();
+    if (params != null && (params.getBool(UpdateParams.OPTIMIZE, false) || params.getBool(UpdateParams.EXPUNGE_DELETES, false))) {
+      lock.lockInterruptibly();
+      try {
+        coll.getSlices().forEach(s -> {
+          Replica leader = s.getLeader();
+          ReplicaInfo ri = getReplicaInfo(leader);
+          Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
+          if (numDocs == null || numDocs.intValue() == 0) {
+            numDocs = 0;
+          }
+          try {
+            simSetShardValue(ri.getCollection(), ri.getShard(), "SEARCHER.searcher.maxDoc", numDocs, false, false);
+            simSetShardValue(ri.getCollection(), ri.getShard(), "SEARCHER.searcher.deletedDocs", 0, false, false);
+          } catch (Exception e) {
+            throw new RuntimeException(e);
+          }
+        });
+      } finally {
+        lock.unlock();
+      }
+    }
    return new UpdateResponse();
  }

--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimUtils.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimUtils.java
@ -52,6 +52,7 @@ public class SimUtils {
      Variable.Type.CORE_IDX.tagName,
      "SEARCHER.searcher.numDocs",
      "SEARCHER.searcher.maxDoc",
+      "SEARCHER.searcher.indexCommitSize",
      "QUERY./select.requests",
      "UPDATE./update.requests"
  ));
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -21,6 +21,7 @@ import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
@ -2281,6 +2282,19 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    manager.registerGauge(this, registry, () -> reader.toString(), tag, true, "reader", Category.SEARCHER.toString(), scope);
    manager.registerGauge(this, registry, () -> reader.directory().toString(), tag, true, "readerDir", Category.SEARCHER.toString(), scope);
    manager.registerGauge(this, registry, () -> reader.getVersion(), tag, true, "indexVersion", Category.SEARCHER.toString(), scope);
+    // size of the currently opened commit
+    manager.registerGauge(this, registry, () -> {
+      try {
+        Collection<String> files = reader.getIndexCommit().getFileNames();
+        long total = 0;
+        for (String file : files) {
+          total += DirectoryFactory.sizeOf(reader.directory(), file);
+        }
+        return total;
+      } catch (Exception e) {
+        return -1;
+      }
+    }, tag, true, "indexCommitSize", Category.SEARCHER.toString(), scope);

  }

--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
@ -19,7 +19,10 @@ package org.apache.solr.cloud.autoscaling;

 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@ -29,7 +32,9 @@ import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;

+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrRequest;
@ -44,17 +49,22 @@ import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
 import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
+import org.apache.solr.cloud.autoscaling.sim.SimUtils;
 import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.CommonAdminParams;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.UpdateParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.Pair;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.metrics.SolrCoreMetricManager;
 import org.apache.solr.util.LogLevel;
 import org.junit.After;
 import org.junit.AfterClass;
@ -69,6 +79,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_P
 *
 */
@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG")
+@LuceneTestCase.Slow
 public class IndexSizeTriggerTest extends SolrCloudTestCase {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

@ -88,13 +99,16 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
  static Map<String, List<CapturedEvent>> listenerEvents = new ConcurrentHashMap<>();
  static CountDownLatch listenerCreated = new CountDownLatch(1);
  static CountDownLatch finished = new CountDownLatch(1);
+  static boolean realCluster;

  @BeforeClass
  public static void setupCluster() throws Exception {
    configureCluster(2)
        .addConfig("conf", configset("cloud-minimal"))
        .configure();
-    if (random().nextBoolean()) {
+    realCluster = random().nextBoolean();
+    realCluster = true;
+    if (realCluster) {
      cloudManager = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getSolrCloudManager();
      solrClient = cluster.getSolrClient();
      loader = cluster.getJettySolrRunner(0).getCoreContainer().getResourceLoader();
@ -114,7 +128,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {

  @After
  public void restoreDefaults() throws Exception {
-    if (cloudManager instanceof SimCloudManager) {
+    if (!realCluster) {
      log.info(((SimCloudManager) cloudManager).dumpClusterState(true));
      ((SimCloudManager) cloudManager).getSimClusterStateProvider().simDeleteAllCollections();
      ((SimCloudManager) cloudManager).simClearSystemCollection();
@ -134,7 +148,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {

  @AfterClass
  public static void teardown() throws Exception {
-    if (cloudManager instanceof SimCloudManager) {
+    if (!realCluster) {
      cloudManager.close();
    }
    solrClient = null;
@ -142,7 +156,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
  }

  @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
  public void testTrigger() throws Exception {
    String collectionName = "testTrigger_collection";
    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
@ -248,7 +262,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
  }

  @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
  public void testSplitIntegration() throws Exception {
    String collectionName = "testSplitIntegration_collection";
    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
@ -372,7 +386,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
  }

  @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
  public void testMergeIntegration() throws Exception {
    String collectionName = "testMergeIntegration_collection";
    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
@ -488,8 +502,12 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {

  @Test
  //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 05-Jul-2018
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
  public void testMixedBounds() throws Exception {
+    if (!realCluster) {
+      log.info("This test doesn't work with a simulated cluster");
+      return;
+    }

    String collectionName = "testMixedBounds_collection";
    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
@ -749,20 +767,16 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
  }

  @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
  public void testMaxOps() throws Exception {
    String collectionName = "testMaxOps_collection";
    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
        "conf", 5, 2).setMaxShardsPerNode(10);
    create.process(solrClient);
    
-    if (SPEED == 1) {
-      cluster.waitForActiveCollection(collectionName, 5, 10);
-    } else {
-      CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
-          CloudUtil.clusterShape(5, 2, false, true));
-    }
-    
+    CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+        CloudUtil.clusterShape(5, 2, false, true));
+
    long waitForSeconds = 3 + random().nextInt(5);
    // add disabled trigger
    String setTriggerCommand = "{" +
@ -972,6 +986,197 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {

  }

+
+  @Test
+  public void testEstimatedIndexSize() throws Exception {
+    if (!realCluster) {
+      log.info("This test doesn't work with a simulated cluster");
+      return;
+    }
+    String collectionName = "testEstimatedIndexSize_collection";
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+        "conf", 2, 2).setMaxShardsPerNode(2);
+    create.process(solrClient);
+
+    CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+        CloudUtil.clusterShape(2, 2, false, true));
+
+    int NUM_DOCS = 20;
+    for (int i = 0; i < NUM_DOCS; i++) {
+      SolrInputDocument doc = new SolrInputDocument("id", "id-" + (i * 100));
+      solrClient.add(collectionName, doc);
+    }
+    solrClient.commit(collectionName);
+
+    // get the size of the leader's index
+    DocCollection coll = cloudManager.getClusterStateProvider().getCollection(collectionName);
+    Replica leader = coll.getSlice("shard1").getLeader();
+    String replicaName = Utils.parseMetricsReplicaName(collectionName, leader.getCoreName());
+    assertNotNull("replicaName could not be constructed from " + leader, replicaName);
+    final String registry = SolrCoreMetricManager.createRegistryName(true, collectionName, "shard1", replicaName, null);
+    Set<String> tags = SimUtils.COMMON_REPLICA_TAGS.stream()
+        .map(s -> "metrics:" + registry + ":" + s).collect(Collectors.toSet());
+    Map<String, Object> sizes = cloudManager.getNodeStateProvider().getNodeValues(leader.getNodeName(), tags);
+    String commitSizeTag = "metrics:" + registry + ":SEARCHER.searcher.indexCommitSize";
+    String numDocsTag = "metrics:" + registry + ":SEARCHER.searcher.numDocs";
+    String maxDocTag = "metrics:" + registry + ":SEARCHER.searcher.maxDoc";
+    assertNotNull(sizes.toString(), sizes.get(commitSizeTag));
+    assertNotNull(sizes.toString(), sizes.get(numDocsTag));
+    assertNotNull(sizes.toString(), sizes.get(maxDocTag));
+    long commitSize = ((Number)sizes.get(commitSizeTag)).longValue();
+    long maxDoc = ((Number)sizes.get(maxDocTag)).longValue();
+    long numDocs = ((Number)sizes.get(numDocsTag)).longValue();
+
+    assertEquals("maxDoc != numDocs", maxDoc, numDocs);
+    assertTrue("unexpected numDocs=" + numDocs, numDocs > NUM_DOCS / 3);
+
+    long aboveBytes = commitSize * 9 / 10;
+    long waitForSeconds = 3 + random().nextInt(5);
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'index_size_trigger7'," +
+        "'event' : 'indexSize'," +
+        "'waitFor' : '" + waitForSeconds + "s'," +
+        "'splitMethod' : 'link'," +
+        "'aboveBytes' : " + aboveBytes + "," +
+        "'enabled' : false," +
+        "'actions' : [{'name' : 'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+        "{'name' : 'execute_plan', 'class' : '" + ExecutePlanAction.class.getName() + "'}]" +
+        "}}";
+    SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    String setListenerCommand = "{" +
+        "'set-listener' : " +
+        "{" +
+        "'name' : 'capturing7'," +
+        "'trigger' : 'index_size_trigger7'," +
+        "'stage' : ['STARTED','ABORTED','SUCCEEDED','FAILED']," +
+        "'beforeAction' : ['compute_plan','execute_plan']," +
+        "'afterAction' : ['compute_plan','execute_plan']," +
+        "'class' : '" + CapturingTriggerListener.class.getName() + "'" +
+        "}" +
+        "}";
+    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    setListenerCommand = "{" +
+        "'set-listener' : " +
+        "{" +
+        "'name' : 'finished'," +
+        "'trigger' : 'index_size_trigger7'," +
+        "'stage' : ['SUCCEEDED']," +
+        "'class' : '" + FinishedProcessingListener.class.getName() + "'" +
+        "}" +
+        "}";
+    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    // enable the trigger
+    String resumeTriggerCommand = "{" +
+        "'resume-trigger' : {" +
+        "'name' : 'index_size_trigger7'" +
+        "}" +
+        "}";
+    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals("success", response.get("result").toString());
+
+    // aboveBytes was set to be slightly lower than the actual size of at least one shard, so
+    // we're expecting a SPLITSHARD - but with 'link' method the actual size of the resulting shards
+    // will likely not go down. However, the estimated size of the latest commit point will go down
+    // (see SOLR-12941).
+
+    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+
+    boolean await = finished.await(90000 / SPEED, TimeUnit.MILLISECONDS);
+    assertTrue("did not finish processing in time", await);
+    // suspend the trigger
+    String suspendTriggerCommand = "{" +
+        "'suspend-trigger' : {" +
+        "'name' : 'index_size_trigger7'" +
+        "}" +
+        "}";
+    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals("success", response.get("result").toString());
+
+    assertEquals(1, listenerEvents.size());
+    List<CapturedEvent> events = listenerEvents.get("capturing7");
+    assertNotNull(listenerEvents.toString(), events);
+    assertFalse("empty events?", events.isEmpty());
+    CapturedEvent ev = events.get(0);
+    List<TriggerEvent.Op> ops = (List< TriggerEvent.Op>)ev.event.properties.get(TriggerEvent.REQUESTED_OPS);
+    assertNotNull("no requested ops in " + ev, ops);
+    assertFalse("empty list of ops in " + ev, ops.isEmpty());
+    Set<String> parentShards = new HashSet<>();
+    ops.forEach(op -> {
+      assertTrue(op.toString(), op.getAction() == CollectionParams.CollectionAction.SPLITSHARD);
+      Collection<Pair<String, String>> hints = (Collection<Pair<String, String>>)op.getHints().get(Suggester.Hint.COLL_SHARD);
+      assertNotNull("no hints in op " + op, hints);
+      hints.forEach(h -> parentShards.add(h.second()));
+    });
+
+    // allow for recovery of at least some sub-shards
+    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+
+    coll = cloudManager.getClusterStateProvider().getCollection(collectionName);
+
+    int checkedSubShards = 0;
+
+    for (String parentShard : parentShards) {
+      for (String subShard : Arrays.asList(parentShard + "_0", parentShard + "_1")) {
+        leader = coll.getSlice(subShard).getLeader();
+        if (leader == null) {
+          // no leader yet - skip it
+        }
+        checkedSubShards++;
+        replicaName = Utils.parseMetricsReplicaName(collectionName, leader.getCoreName());
+        assertNotNull("replicaName could not be constructed from " + leader, replicaName);
+        final String subregistry = SolrCoreMetricManager.createRegistryName(true, collectionName, subShard, replicaName, null);
+        Set<String> subtags = SimUtils.COMMON_REPLICA_TAGS.stream()
+            .map(s -> "metrics:" + subregistry + ":" + s).collect(Collectors.toSet());
+        sizes = cloudManager.getNodeStateProvider().getNodeValues(leader.getNodeName(), subtags);
+        commitSizeTag = "metrics:" + subregistry + ":SEARCHER.searcher.indexCommitSize";
+        numDocsTag = "metrics:" + subregistry + ":SEARCHER.searcher.numDocs";
+        maxDocTag = "metrics:" + subregistry + ":SEARCHER.searcher.maxDoc";
+        assertNotNull(sizes.toString(), sizes.get(commitSizeTag));
+        assertNotNull(sizes.toString(), sizes.get(numDocsTag));
+        assertNotNull(sizes.toString(), sizes.get(maxDocTag));
+        long subCommitSize = ((Number)sizes.get(commitSizeTag)).longValue();
+        long subMaxDoc = ((Number)sizes.get(maxDocTag)).longValue();
+        long subNumDocs = ((Number)sizes.get(numDocsTag)).longValue();
+        assertTrue("subNumDocs=" + subNumDocs + " should be less than subMaxDoc=" + subMaxDoc +
+            " due to link split", subNumDocs < subMaxDoc);
+        assertTrue("subCommitSize=" + subCommitSize + " should be still greater than aboveBytes=" + aboveBytes +
+            " due to link split", subCommitSize > aboveBytes);
+        // calculate estimated size using the same formula
+        long estimatedSize = IndexSizeTrigger.estimatedSize(subMaxDoc, subNumDocs, subCommitSize);
+        assertTrue("estimatedSize=" + estimatedSize + " should be lower than aboveBytes=" + aboveBytes,
+            estimatedSize < aboveBytes);
+      }
+    }
+
+    assertTrue("didn't find any leaders in new sub-shards", checkedSubShards > 0);
+
+    // reset & resume
+    listenerEvents.clear();
+    finished = new CountDownLatch(1);
+    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals("success", response.get("result").toString());
+    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+
+    // estimated shard size should fall well below the aboveBytes, even though the real commitSize
+    // still remains larger due to the splitMethod=link side-effects
+    await = finished.await(10000 / SPEED, TimeUnit.MILLISECONDS);
+    assertFalse("should not fire the trigger again! " + listenerEvents, await);
+
+  }
+
  private Map<String, Object> createTriggerProps(long waitForSeconds) {
    Map<String, Object> props = new HashMap<>();
    props.put("event", "indexSize");
--- a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
+++ b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
@ -255,8 +255,8 @@ In addition to the parameters described at <<Trigger Configuration>>, this trigg
 This trigger can be used for monitoring the size of collection shards, measured either by the
 number of documents in a shard or the physical size of the shard's index in bytes.

-When either of the upper thresholds is exceeded the trigger will generate an event with
-a (configurable) requested operation to perform on the offending shards - by default
+When either of the upper thresholds is exceeded for a particular shard the trigger will generate
+an event with a (configurable) requested operation to perform on the offending shards - by default
 this is a SPLITSHARD operation.

 Similarly, when either of the lower thresholds is exceeded the trigger will generate an
@ -264,13 +264,22 @@ event with a (configurable) requested operation to perform on two of the smalles
 shards. By default this is a MERGESHARDS operation, and is currently ignored because
 that operation is not yet implemented (see https://issues.apache.org/jira/browse/SOLR-9407[SOLR-9407]).

+When `splitMethod=link` is used the resulting sub-shards will initially have nearly the same size
+as the parent shard due to the hard-linking of parent index files, and will differ just in the lists of
+deleted documents. In order to correctly recognize the effectively reduced index size an estimate
+is calculated using a simple formula: `indexCommitSize * numDocs / maxDoc`. This value is then
+compared with `aboveBytes` and `belowBytes` limits.
+
 Additionally, monitoring can be restricted to a list of collections; by default
 all collections are monitored.

 In addition to the parameters described at <<Trigger Configuration>>, this trigger supports the following configuration parameters (all thresholds are exclusive):

 `aboveBytes`::
-A upper threshold in bytes. This value is compared to the `INDEX.sizeInBytes` metric.
+An upper threshold in bytes. This value is compared to the `SEARCHER.searcher.indexCommitSize` metric, which
+reports the size of the latest commit point (ignoring any data related to earlier commit points, which may be
+still present for replication or snapshot purposes). See also the note above how this value is used with
+`splitMethod=link`.

 `belowBytes`::
 A lower threshold in bytes. Note that this value should be at least 2x smaller than
@ -280,7 +289,7 @@ A lower threshold in bytes. Note that this value should be at least 2x smaller t
 An upper threshold expressed as the number of documents. This value is compared with `SEARCHER.searcher.numDocs` metric.
 +
 NOTE: Due to the way Lucene indexes work, a shard may exceed the `aboveBytes` threshold
-even if the number of documents is relatively small, because replaced and deleted documents keep
+on disk even if the number of documents is relatively small, because replaced and deleted documents keep
 occupying disk space until they are actually removed during Lucene index merging.

 `belowDocs`::