Leverage doc value skip lists in DocValuesRewriteMethod if indexed (#13672)

2025-02-06 18:18:38 +00:00 · 2024-08-26 10:04:12 -07:00 · 2024-08-26 10:04:12 -07:00 · 68882c8b89
commit 68882c8b89
parent 4e3945ed54
9 changed files with 479 additions and 302 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -376,6 +376,8 @@ Optimizations

 * GITHUB#13587: Use Max WAND optimizations with ToParentBlockJoinQuery when using ScoreMode.Max (Mike Pellegrini)

+* GITHUB#13672: Leverage doc value skip lists in DocValuesRewriteMethod if indexed. (Greg Miller)
+
 Changes in runtime behavior
 ---------------------

--- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java
@ -28,6 +28,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.DocValuesRangeIterator;
 import org.apache.lucene.search.FieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchNoDocsQuery;
@ -179,7 +180,7 @@ final class SortedNumericDocValuesRangeQuery extends Query {
              };
        }
        if (skipper != null) {
-          iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue);
+          iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue, false);
        }
        final var scorer = new ConstantScoreScorer(score(), scoreMode, iterator);
        return new DefaultScorerSupplier(scorer);
--- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
@ -28,6 +28,7 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.DocValuesRangeIterator;
 import org.apache.lucene.search.FieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@ -217,7 +218,7 @@ final class SortedSetDocValuesRangeQuery extends Query {
                  };
            }
            if (skipper != null) {
-              iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd);
+              iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd, false);
            }
            return new ConstantScoreScorer(score(), scoreMode, iterator);
          }
--- a/lucene/core/src/java/org/apache/lucene/document/DocValuesRangeIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/document/DocValuesRangeIterator.java
@ -14,18 +14,18 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.lucene.document;
+package org.apache.lucene.search;

 import java.io.IOException;
 import org.apache.lucene.index.DocValuesSkipper;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.TwoPhaseIterator;

 /**
 * Wrapper around a {@link TwoPhaseIterator} for a doc-values range query that speeds things up by
 * taking advantage of a {@link DocValuesSkipper}.
+ *
+ * @lucene.experimental
 */
-final class DocValuesRangeIterator extends TwoPhaseIterator {
+public final class DocValuesRangeIterator extends TwoPhaseIterator {

  enum Match {
    /** None of the documents in the range match */
@ -41,19 +41,29 @@ final class DocValuesRangeIterator extends TwoPhaseIterator {
  private final Approximation approximation;
  private final TwoPhaseIterator innerTwoPhase;

-  DocValuesRangeIterator(
-      TwoPhaseIterator twoPhase, DocValuesSkipper skipper, long lowerValue, long upperValue) {
-    super(new Approximation(twoPhase.approximation(), skipper, lowerValue, upperValue));
+  public DocValuesRangeIterator(
+      TwoPhaseIterator twoPhase,
+      DocValuesSkipper skipper,
+      long lowerValue,
+      long upperValue,
+      boolean queryRangeHasGaps) {
+    super(
+        queryRangeHasGaps
+            ? new RangeWithGapsApproximation(
+                twoPhase.approximation(), skipper, lowerValue, upperValue)
+            : new RangeNoGapsApproximation(
+                twoPhase.approximation(), skipper, lowerValue, upperValue));
    this.approximation = (Approximation) approximation();
    this.innerTwoPhase = twoPhase;
  }

-  static class Approximation extends DocIdSetIterator {
+  abstract static class Approximation extends DocIdSetIterator {

    private final DocIdSetIterator innerApproximation;
-    private final DocValuesSkipper skipper;
-    private final long lowerValue;
-    private final long upperValue;
+
+    protected final DocValuesSkipper skipper;
+    protected final long lowerValue;
+    protected final long upperValue;

    private int doc = -1;

@ -137,7 +147,21 @@ final class DocValuesRangeIterator extends TwoPhaseIterator {
      return innerApproximation.cost();
    }

-    private Match match(int level) {
+    protected abstract Match match(int level);
+  }
+
+  private static final class RangeNoGapsApproximation extends Approximation {
+
+    RangeNoGapsApproximation(
+        DocIdSetIterator innerApproximation,
+        DocValuesSkipper skipper,
+        long lowerValue,
+        long upperValue) {
+      super(innerApproximation, skipper, lowerValue, upperValue);
+    }
+
+    @Override
+    protected Match match(int level) {
      long minValue = skipper.minValue(level);
      long maxValue = skipper.maxValue(level);
      if (minValue > upperValue || maxValue < lowerValue) {
@ -154,6 +178,28 @@ final class DocValuesRangeIterator extends TwoPhaseIterator {
    }
  }

+  private static final class RangeWithGapsApproximation extends Approximation {
+
+    RangeWithGapsApproximation(
+        DocIdSetIterator innerApproximation,
+        DocValuesSkipper skipper,
+        long lowerValue,
+        long upperValue) {
+      super(innerApproximation, skipper, lowerValue, upperValue);
+    }
+
+    @Override
+    protected Match match(int level) {
+      long minValue = skipper.minValue(level);
+      long maxValue = skipper.maxValue(level);
+      if (minValue > upperValue || maxValue < lowerValue) {
+        return Match.NO;
+      } else {
+        return Match.MAYBE;
+      }
+    }
+  }
+
  @Override
  public final boolean matches() throws IOException {
    return switch (approximation.match) {
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@ -18,6 +18,7 @@ package org.apache.lucene.search;

 import java.io.IOException;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesSkipper;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
@ -166,27 +167,29 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
                return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.empty());
              }

+              // Leverage a DV skipper if one was indexed for the field:
+              DocValuesSkipper skipper = context.reader().getDocValuesSkipper(query.field);
+
              // Create a bit set for the "term set" ordinals (these are the terms provided by the
              // query that are actually present in the doc values field). Cannot use FixedBitSet
              // because we require long index (ord):
              final LongBitSet termSet = new LongBitSet(values.getValueCount());
+              long minOrd = termsEnum.ord();
+              assert minOrd >= 0;
              long maxOrd = -1;
              do {
                long ord = termsEnum.ord();
-                if (ord >= 0) {
-                  assert ord > maxOrd;
-                  maxOrd = ord;
-                  termSet.set(ord);
-                }
+                assert ord >= 0 && ord > maxOrd;
+                maxOrd = ord;
+                termSet.set(ord);
              } while (termsEnum.next() != null);

-              // no terms matched in this segment
-              if (maxOrd < 0) {
+              if (skipper != null && (minOrd > skipper.maxValue() || maxOrd < skipper.minValue())) {
                return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.empty());
              }

              final SortedDocValues singleton = DocValues.unwrapSingleton(values);
-              final TwoPhaseIterator iterator;
+              TwoPhaseIterator iterator;
              final long max = maxOrd;
              if (singleton != null) {
                iterator =
@ -224,6 +227,9 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
                    };
              }

+              if (skipper != null) {
+                iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd, true);
+              }
              return new ConstantScoreScorer(score(), scoreMode, iterator);
            }

--- a/lucene/core/src/test/org/apache/lucene/document/TestDocValuesRangeIterator.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestDocValuesRangeIterator.java
@ -1,273 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.document;
-
-import java.io.IOException;
-import java.util.concurrent.atomic.AtomicBoolean;
-import org.apache.lucene.index.DocValuesSkipper;
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.TwoPhaseIterator;
-import org.apache.lucene.tests.util.LuceneTestCase;
-
-public class TestDocValuesRangeIterator extends LuceneTestCase {
-
-  public void testSingleLevel() throws IOException {
-    doTestBasics(false);
-  }
-
-  public void testMultipleLevels() throws IOException {
-    doTestBasics(true);
-  }
-
-  private void doTestBasics(boolean doLevels) throws IOException {
-    long queryMin = 10;
-    long queryMax = 20;
-
-    // Fake numeric doc values so that:
-    // docs 0-256 all match
-    // docs in 256-512 are all greater than queryMax
-    // docs in 512-768 are all less than queryMin
-    // docs in 768-1024 have some docs that match the range, others not
-    // docs in 1024-2048 follow a similar pattern as docs in 0-1024 except that not all docs have a
-    // value
-    NumericDocValues values =
-        new NumericDocValues() {
-
-          int doc = -1;
-
-          @Override
-          public boolean advanceExact(int target) throws IOException {
-            throw new UnsupportedOperationException();
-          }
-
-          @Override
-          public int docID() {
-            return doc;
-          }
-
-          @Override
-          public int nextDoc() throws IOException {
-            return advance(doc + 1);
-          }
-
-          @Override
-          public int advance(int target) throws IOException {
-            if (target < 1024) {
-              // dense up to 1024
-              return doc = target;
-            } else if (doc < 2047) {
-              // 50% docs have a value up to 2048
-              return doc = target + (target & 1);
-            } else {
-              return doc = DocIdSetIterator.NO_MORE_DOCS;
-            }
-          }
-
-          @Override
-          public long longValue() throws IOException {
-            int d = doc % 1024;
-            if (d < 128) {
-              return (queryMin + queryMax) >> 1;
-            } else if (d < 256) {
-              return queryMax + 1;
-            } else if (d < 512) {
-              return queryMin - 1;
-            } else {
-              return switch ((d / 2) % 3) {
-                case 0 -> queryMin - 1;
-                case 1 -> queryMax + 1;
-                case 2 -> (queryMin + queryMax) >> 1;
-                default -> throw new AssertionError();
-              };
-            }
-          }
-
-          @Override
-          public long cost() {
-            return 42;
-          }
-        };
-
-    AtomicBoolean twoPhaseCalled = new AtomicBoolean();
-    TwoPhaseIterator twoPhase =
-        new TwoPhaseIterator(values) {
-
-          @Override
-          public boolean matches() throws IOException {
-            twoPhaseCalled.set(true);
-            long v = values.longValue();
-            return v >= queryMin && v <= queryMax;
-          }
-
-          @Override
-          public float matchCost() {
-            return 2f; // 2 comparisons
-          }
-        };
-
-    DocValuesSkipper skipper =
-        new DocValuesSkipper() {
-
-          int doc = -1;
-
-          @Override
-          public void advance(int target) throws IOException {
-            doc = target;
-          }
-
-          @Override
-          public int numLevels() {
-            return doLevels ? 3 : 1;
-          }
-
-          @Override
-          public int minDocID(int level) {
-            int rangeLog = 9 - numLevels() + level;
-
-            // the level is the log2 of the interval
-            if (doc < 0) {
-              return -1;
-            } else if (doc >= 2048) {
-              return DocIdSetIterator.NO_MORE_DOCS;
-            } else {
-              int mask = (1 << rangeLog) - 1;
-              // prior multiple of 2^level
-              return doc & ~mask;
-            }
-          }
-
-          @Override
-          public int maxDocID(int level) {
-            int rangeLog = 9 - numLevels() + level;
-
-            int minDocID = minDocID(level);
-            return switch (minDocID) {
-              case -1 -> -1;
-              case DocIdSetIterator.NO_MORE_DOCS -> DocIdSetIterator.NO_MORE_DOCS;
-              default -> minDocID + (1 << rangeLog) - 1;
-            };
-          }
-
-          @Override
-          public long minValue(int level) {
-            int d = doc % 1024;
-            if (d < 128) {
-              return queryMin;
-            } else if (d < 256) {
-              return queryMax + 1;
-            } else if (d < 768) {
-              return queryMin - 1;
-            } else {
-              return queryMin - 1;
-            }
-          }
-
-          @Override
-          public long maxValue(int level) {
-            int d = doc % 1024;
-            if (d < 128) {
-              return queryMax;
-            } else if (d < 256) {
-              return queryMax + 1;
-            } else if (d < 768) {
-              return queryMin - 1;
-            } else {
-              return queryMax + 1;
-            }
-          }
-
-          @Override
-          public int docCount(int level) {
-            int rangeLog = 9 - numLevels() + level;
-
-            if (doc < 1024) {
-              return 1 << rangeLog;
-            } else {
-              // half docs have a value
-              return 1 << rangeLog >> 1;
-            }
-          }
-
-          @Override
-          public long minValue() {
-            return Long.MIN_VALUE;
-          }
-
-          @Override
-          public long maxValue() {
-            return Long.MAX_VALUE;
-          }
-
-          @Override
-          public int docCount() {
-            return 1024 + 1024 / 2;
-          }
-        };
-
-    DocValuesRangeIterator rangeIterator =
-        new DocValuesRangeIterator(twoPhase, skipper, queryMin, queryMax);
-    DocValuesRangeIterator.Approximation rangeApproximation =
-        (DocValuesRangeIterator.Approximation) rangeIterator.approximation();
-
-    assertEquals(100, rangeApproximation.advance(100));
-    assertEquals(DocValuesRangeIterator.Match.YES, rangeApproximation.match);
-    assertEquals(255, rangeApproximation.upTo);
-    assertTrue(rangeIterator.matches());
-    assertTrue(values.docID() < rangeApproximation.docID()); // we did not advance doc values
-    assertFalse(twoPhaseCalled.get());
-
-    assertEquals(768, rangeApproximation.advance(300));
-    assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
-    if (doLevels) {
-      assertEquals(831, rangeApproximation.upTo);
-    } else {
-      assertEquals(1023, rangeApproximation.upTo);
-    }
-    for (int i = 0; i < 10; ++i) {
-      assertEquals(values.docID(), rangeApproximation.docID());
-      assertEquals(twoPhase.matches(), rangeIterator.matches());
-      assertTrue(twoPhaseCalled.get());
-      twoPhaseCalled.set(false);
-      rangeApproximation.nextDoc();
-    }
-
-    assertEquals(1100, rangeApproximation.advance(1099));
-    assertEquals(DocValuesRangeIterator.Match.IF_DOC_HAS_VALUE, rangeApproximation.match);
-    assertEquals(1024 + 256 - 1, rangeApproximation.upTo);
-    assertEquals(values.docID(), rangeApproximation.docID());
-    assertTrue(rangeIterator.matches());
-    assertFalse(twoPhaseCalled.get());
-
-    assertEquals(1024 + 768, rangeApproximation.advance(1024 + 300));
-    assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
-    if (doLevels) {
-      assertEquals(1024 + 831, rangeApproximation.upTo);
-    } else {
-      assertEquals(2047, rangeApproximation.upTo);
-    }
-    for (int i = 0; i < 10; ++i) {
-      assertEquals(values.docID(), rangeApproximation.docID());
-      assertEquals(twoPhase.matches(), rangeIterator.matches());
-      assertTrue(twoPhaseCalled.get());
-      twoPhaseCalled.set(false);
-      rangeApproximation.nextDoc();
-    }
-
-    assertEquals(DocIdSetIterator.NO_MORE_DOCS, rangeApproximation.advance(2048));
-  }
-}
--- a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesRangeIterator.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesRangeIterator.java
@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.lucene.index.DocValuesSkipper;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.tests.util.LuceneTestCase;
+
+public class TestDocValuesRangeIterator extends LuceneTestCase {
+
+  public void testSingleLevel() throws IOException {
+    doTestBasics(false);
+  }
+
+  public void testMultipleLevels() throws IOException {
+    doTestBasics(true);
+  }
+
+  private void doTestBasics(boolean doLevels) throws IOException {
+    long queryMin = 10;
+    long queryMax = 20;
+
+    // Test with both gaps and no-gaps in the ranges:
+    NumericDocValues values = docValues(queryMin, queryMax);
+    NumericDocValues values2 = docValues(queryMin, queryMax);
+
+    AtomicBoolean twoPhaseCalled = new AtomicBoolean();
+    TwoPhaseIterator twoPhase = twoPhaseIterator(values, queryMin, queryMax, twoPhaseCalled);
+    AtomicBoolean twoPhaseCalled2 = new AtomicBoolean();
+    TwoPhaseIterator twoPhase2 = twoPhaseIterator(values2, queryMin, queryMax, twoPhaseCalled2);
+
+    DocValuesSkipper skipper = docValuesSkipper(queryMin, queryMax, doLevels);
+    DocValuesSkipper skipper2 = docValuesSkipper(queryMin, queryMax, doLevels);
+
+    DocValuesRangeIterator rangeIterator =
+        new DocValuesRangeIterator(twoPhase, skipper, queryMin, queryMax, false);
+    DocValuesRangeIterator rangeIteratorWithGaps =
+        new DocValuesRangeIterator(twoPhase2, skipper2, queryMin, queryMax, true);
+    DocValuesRangeIterator.Approximation rangeApproximation =
+        (DocValuesRangeIterator.Approximation) rangeIterator.approximation();
+    DocValuesRangeIterator.Approximation rangeApproximationWithGaps =
+        (DocValuesRangeIterator.Approximation) rangeIteratorWithGaps.approximation();
+
+    assertEquals(100, rangeApproximation.advance(100));
+    assertEquals(100, rangeApproximationWithGaps.advance(100));
+    assertEquals(DocValuesRangeIterator.Match.YES, rangeApproximation.match);
+    assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximationWithGaps.match);
+    assertEquals(255, rangeApproximation.upTo);
+    if (doLevels) {
+      assertEquals(127, rangeApproximationWithGaps.upTo);
+    } else {
+      assertEquals(255, rangeApproximationWithGaps.upTo);
+    }
+    assertTrue(rangeIterator.matches());
+    assertTrue(rangeIteratorWithGaps.matches());
+    assertTrue(values.docID() < rangeApproximation.docID()); // we did not advance doc values
+    assertEquals(
+        values2.docID(), rangeApproximationWithGaps.docID()); // we _did_ advance doc values
+    assertFalse(twoPhaseCalled.get());
+    assertTrue(twoPhaseCalled2.get());
+    twoPhaseCalled2.set(false);
+
+    assertEquals(768, rangeApproximation.advance(300));
+    assertEquals(768, rangeApproximationWithGaps.advance(300));
+    assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
+    assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximationWithGaps.match);
+    if (doLevels) {
+      assertEquals(831, rangeApproximation.upTo);
+      assertEquals(831, rangeApproximationWithGaps.upTo);
+    } else {
+      assertEquals(1023, rangeApproximation.upTo);
+      assertEquals(1023, rangeApproximationWithGaps.upTo);
+    }
+    for (int i = 0; i < 10; ++i) {
+      assertEquals(values.docID(), rangeApproximation.docID());
+      assertEquals(values2.docID(), rangeApproximationWithGaps.docID());
+      assertEquals(twoPhase.matches(), rangeIterator.matches());
+      assertEquals(twoPhase2.matches(), rangeIteratorWithGaps.matches());
+      assertTrue(twoPhaseCalled.get());
+      assertTrue(twoPhaseCalled2.get());
+      twoPhaseCalled.set(false);
+      twoPhaseCalled2.set(false);
+      rangeApproximation.nextDoc();
+      rangeApproximationWithGaps.nextDoc();
+    }
+
+    assertEquals(1100, rangeApproximation.advance(1099));
+    assertEquals(1100, rangeApproximationWithGaps.advance(1099));
+    assertEquals(DocValuesRangeIterator.Match.IF_DOC_HAS_VALUE, rangeApproximation.match);
+    assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximationWithGaps.match);
+    assertEquals(1024 + 256 - 1, rangeApproximation.upTo);
+    if (doLevels) {
+      assertEquals(1024 + 128 - 1, rangeApproximationWithGaps.upTo);
+    } else {
+      assertEquals(1024 + 256 - 1, rangeApproximationWithGaps.upTo);
+    }
+    assertEquals(values.docID(), rangeApproximation.docID());
+    assertEquals(values2.docID(), rangeApproximationWithGaps.docID());
+    assertTrue(rangeIterator.matches());
+    assertTrue(rangeIteratorWithGaps.matches());
+    assertFalse(twoPhaseCalled.get());
+    assertTrue(twoPhaseCalled2.get());
+    twoPhaseCalled2.set(false);
+
+    assertEquals(1024 + 768, rangeApproximation.advance(1024 + 300));
+    assertEquals(1024 + 768, rangeApproximationWithGaps.advance(1024 + 300));
+    assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
+    assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximationWithGaps.match);
+    if (doLevels) {
+      assertEquals(1024 + 831, rangeApproximation.upTo);
+      assertEquals(1024 + 831, rangeApproximationWithGaps.upTo);
+    } else {
+      assertEquals(2047, rangeApproximation.upTo);
+      assertEquals(2047, rangeApproximationWithGaps.upTo);
+    }
+    for (int i = 0; i < 10; ++i) {
+      assertEquals(values.docID(), rangeApproximation.docID());
+      assertEquals(values2.docID(), rangeApproximationWithGaps.docID());
+      assertEquals(twoPhase.matches(), rangeIterator.matches());
+      assertEquals(twoPhase2.matches(), rangeIteratorWithGaps.matches());
+      assertTrue(twoPhaseCalled.get());
+      assertTrue(twoPhaseCalled2.get());
+      twoPhaseCalled.set(false);
+      twoPhaseCalled2.set(false);
+      rangeApproximation.nextDoc();
+      rangeApproximationWithGaps.nextDoc();
+    }
+
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, rangeApproximation.advance(2048));
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, rangeApproximationWithGaps.advance(2048));
+  }
+
+  // Fake numeric doc values so that:
+  // docs 0-256 all match
+  // docs in 256-512 are all greater than queryMax
+  // docs in 512-768 are all less than queryMin
+  // docs in 768-1024 have some docs that match the range, others not
+  // docs in 1024-2048 follow a similar pattern as docs in 0-1024 except that not all docs have a
+  // value
+  private static NumericDocValues docValues(long queryMin, long queryMax) {
+    return new NumericDocValues() {
+
+      int doc = -1;
+
+      @Override
+      public boolean advanceExact(int target) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int docID() {
+        return doc;
+      }
+
+      @Override
+      public int nextDoc() throws IOException {
+        return advance(doc + 1);
+      }
+
+      @Override
+      public int advance(int target) throws IOException {
+        if (target < 1024) {
+          // dense up to 1024
+          return doc = target;
+        } else if (doc < 2047) {
+          // 50% docs have a value up to 2048
+          return doc = target + (target & 1);
+        } else {
+          return doc = DocIdSetIterator.NO_MORE_DOCS;
+        }
+      }
+
+      @Override
+      public long longValue() throws IOException {
+        int d = doc % 1024;
+        if (d < 128) {
+          return (queryMin + queryMax) >> 1;
+        } else if (d < 256) {
+          return queryMax + 1;
+        } else if (d < 512) {
+          return queryMin - 1;
+        } else {
+          return switch ((d / 2) % 3) {
+            case 0 -> queryMin - 1;
+            case 1 -> queryMax + 1;
+            case 2 -> (queryMin + queryMax) >> 1;
+            default -> throw new AssertionError();
+          };
+        }
+      }
+
+      @Override
+      public long cost() {
+        return 42;
+      }
+    };
+  }
+
+  private static TwoPhaseIterator twoPhaseIterator(
+      NumericDocValues values, long queryMin, long queryMax, AtomicBoolean twoPhaseCalled) {
+    return new TwoPhaseIterator(values) {
+
+      @Override
+      public boolean matches() throws IOException {
+        twoPhaseCalled.set(true);
+        long v = values.longValue();
+        return v >= queryMin && v <= queryMax;
+      }
+
+      @Override
+      public float matchCost() {
+        return 2f; // 2 comparisons
+      }
+    };
+  }
+
+  private static DocValuesSkipper docValuesSkipper(long queryMin, long queryMax, boolean doLevels) {
+    return new DocValuesSkipper() {
+
+      int doc = -1;
+
+      @Override
+      public void advance(int target) throws IOException {
+        doc = target;
+      }
+
+      @Override
+      public int numLevels() {
+        return doLevels ? 3 : 1;
+      }
+
+      @Override
+      public int minDocID(int level) {
+        int rangeLog = 9 - numLevels() + level;
+
+        // the level is the log2 of the interval
+        if (doc < 0) {
+          return -1;
+        } else if (doc >= 2048) {
+          return DocIdSetIterator.NO_MORE_DOCS;
+        } else {
+          int mask = (1 << rangeLog) - 1;
+          // prior multiple of 2^level
+          return doc & ~mask;
+        }
+      }
+
+      @Override
+      public int maxDocID(int level) {
+        int rangeLog = 9 - numLevels() + level;
+
+        int minDocID = minDocID(level);
+        return switch (minDocID) {
+          case -1 -> -1;
+          case DocIdSetIterator.NO_MORE_DOCS -> DocIdSetIterator.NO_MORE_DOCS;
+          default -> minDocID + (1 << rangeLog) - 1;
+        };
+      }
+
+      @Override
+      public long minValue(int level) {
+        int d = doc % 1024;
+        if (d < 128) {
+          return queryMin;
+        } else if (d < 256) {
+          return queryMax + 1;
+        } else if (d < 768) {
+          return queryMin - 1;
+        } else {
+          return queryMin - 1;
+        }
+      }
+
+      @Override
+      public long maxValue(int level) {
+        int d = doc % 1024;
+        if (d < 128) {
+          return queryMax;
+        } else if (d < 256) {
+          return queryMax + 1;
+        } else if (d < 768) {
+          return queryMin - 1;
+        } else {
+          return queryMax + 1;
+        }
+      }
+
+      @Override
+      public int docCount(int level) {
+        int rangeLog = 9 - numLevels() + level;
+
+        if (doc < 1024) {
+          return 1 << rangeLog;
+        } else {
+          // half docs have a value
+          return 1 << rangeLog >> 1;
+        }
+      }
+
+      @Override
+      public long minValue() {
+        return Long.MIN_VALUE;
+      }
+
+      @Override
+      public long maxValue() {
+        return Long.MAX_VALUE;
+      }
+
+      @Override
+      public int docCount() {
+        return 1024 + 1024 / 2;
+      }
+    };
+  }
+}
--- a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesRewriteMethod.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesRewriteMethod.java
@ -41,8 +41,7 @@ import org.apache.lucene.util.automaton.RegExp;

 /** Tests the DocValuesRewriteMethod */
 public class TestDocValuesRewriteMethod extends LuceneTestCase {
-  protected IndexSearcher searcher1;
-  protected IndexSearcher searcher2;
+  protected IndexSearcher searcher;
  private IndexReader reader;
  private Directory dir;
  protected String fieldName;
@ -69,6 +68,7 @@ public class TestDocValuesRewriteMethod extends LuceneTestCase {
        String s = TestUtil.randomUnicodeString(random());
        doc.add(newStringField(fieldName, s, Field.Store.NO));
        doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(s)));
+        doc.add(SortedSetDocValuesField.indexedField(fieldName + "_with-skip", new BytesRef(s)));
        terms.add(s);
      }
      writer.addDocument(doc);
@ -89,8 +89,7 @@ public class TestDocValuesRewriteMethod extends LuceneTestCase {
    }

    reader = writer.getReader();
-    searcher1 = newSearcher(reader);
-    searcher2 = newSearcher(reader);
+    searcher = newSearcher(reader);
    writer.close();
  }

@ -123,12 +122,22 @@ public class TestDocValuesRewriteMethod extends LuceneTestCase {
            name -> null,
            Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
            new DocValuesRewriteMethod());
+    RegexpQuery docValuesWithSkip =
+        new RegexpQuery(
+            new Term(fieldName + "_with-skip", regexp),
+            RegExp.NONE,
+            0,
+            name -> null,
+            Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
+            new DocValuesRewriteMethod());
    RegexpQuery inverted = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);

-    TopDocs invertedDocs = searcher1.search(inverted, 25);
-    TopDocs docValuesDocs = searcher2.search(docValues, 25);
+    TopDocs invertedDocs = searcher.search(inverted, 25);
+    TopDocs docValuesDocs = searcher.search(docValues, 25);
+    TopDocs docValuesWithSkipDocs = searcher.search(docValuesWithSkip, 25);

    CheckHits.checkEqual(inverted, invertedDocs.scoreDocs, docValuesDocs.scoreDocs);
+    CheckHits.checkEqual(inverted, invertedDocs.scoreDocs, docValuesWithSkipDocs.scoreDocs);
  }

  public void testEquals() throws Exception {
--- a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java
@ -31,6 +31,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.KeywordField;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FilterDirectoryReader;
@ -119,11 +120,14 @@ public class TestTermInSetQuery extends LuceneTestCase {
      }
      Directory dir = newDirectory();
      RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
-      final int numDocs = atLeast(100);
+      final int numDocs = atLeast(10_000);
      for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        final BytesRef term = allTerms.get(random().nextInt(allTerms.size()));
        doc.add(new StringField(field, term, Store.NO));
+        // Also include a doc values field with a skip-list so we can test doc-value rewrite as
+        // well:
+        doc.add(SortedSetDocValuesField.indexedField(field, term));
        iw.addDocument(doc);
      }
      if (numTerms > 1 && random().nextBoolean()) {
@ -154,7 +158,9 @@ public class TestTermInSetQuery extends LuceneTestCase {
        }
        final Query q1 = new ConstantScoreQuery(bq.build());
        final Query q2 = new TermInSetQuery(field, queryTerms);
+        final Query q3 = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, queryTerms);
        assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q2, boost), true);
+        assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q3, boost), false);
      }

      reader.close();
@ -225,6 +231,53 @@ public class TestTermInSetQuery extends LuceneTestCase {
    }
  }

+  /**
+   * Make sure the doc values skipper isn't making the incorrect assumption that the min/max terms
+   * from a TermInSetQuery don't form a continuous range.
+   */
+  public void testSkipperOptimizationGapAssumption() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    // Index the first 10,000 docs all with the term "b" to get some skip list blocks with the range
+    // [b, b]:
+    for (int i = 0; i < 10_000; i++) {
+      Document doc = new Document();
+      BytesRef term = new BytesRef("b");
+      doc.add(new SortedSetDocValuesField("field", term));
+      doc.add(SortedSetDocValuesField.indexedField("idx_field", term));
+      iw.addDocument(doc);
+    }
+
+    // Index a couple more docs with terms "a" and "c":
+    Document doc = new Document();
+    BytesRef term = new BytesRef("a");
+    doc.add(new SortedSetDocValuesField("field", term));
+    doc.add(SortedSetDocValuesField.indexedField("idx_field", term));
+    iw.addDocument(doc);
+    doc = new Document();
+    term = new BytesRef("c");
+    doc.add(new SortedSetDocValuesField("field", term));
+    doc.add(SortedSetDocValuesField.indexedField("idx_field", term));
+    iw.addDocument(doc);
+
+    iw.commit();
+    IndexReader reader = iw.getReader();
+    IndexSearcher searcher = newSearcher(reader);
+    iw.close();
+
+    // Our query is for (or "a" "c") which should use a skip-list optimization to exclude blocks of
+    // documents that fall outside the range [a, c]. We want to test that they don't incorrectly do
+    // the inverse and include all docs in a block that fall within [a, c] (which is why we have
+    // blocks of only "b" docs up-front):
+    List<BytesRef> queryTerms = List.of(new BytesRef("a"), new BytesRef("c"));
+    Query q1 = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", queryTerms);
+    Query q2 = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "idx_field", queryTerms);
+    assertSameMatches(searcher, q1, q2, false);
+
+    reader.close();
+    dir.close();
+  }
+
  private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
      throws IOException {
    final int maxDoc = searcher.getIndexReader().maxDoc();