Merge branch 'apache:main' into bpv21_main

2024-10-16 15:17:20 +05:30 · 2024-10-16 15:17:20 +05:30 · 7312d91394
parent 295531c9a3 789658819f
commit 7312d91394
9 changed files with 96 additions and 64 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -47,8 +47,16 @@ Improvements
 Optimizations
 ---------------------
 * GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
 * GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
  minimum competitive allows for a more favorable partitioning. (Adrien Grand)
 * GITHUB#13904: BlockMaxConjunctionBulkScorer can now early exit when the
  leading clause has a single impact block (e.g. ConstantScoreQuery).
  (Adrien Grand)
 Bug Fixes
 ---------------------
 * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java
@ -115,9 +115,6 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
   */
  protected BackwardsCompatibilityTestBase(
      @Name("version") Version version, @Name("pattern") String indexPattern) {
    // TODO: add 10.0.0 bw indices after 10.0.0 has been released, see
    // https://github.com/apache/lucene/issues/13847
    assumeTrue("Can only test with 10.0.0 has been released", version.major < 10);
    this.version = version;
    this.indexPattern = indexPattern;
  }
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java
@ -832,7 +832,7 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
          expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader));
      assertEquals(
          e.getMessage(),
-          "Cannot merge a segment that has been created with major version 9 into this index which has been created by major version 10");
+          "Cannot merge a segment that has been created with major version 10 into this index which has been created by major version 11");
      w.close();
      targetDir2.close();
--- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java
@ -85,9 +85,20 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
    int windowMin = Math.max(lead1.docID(), min);
    while (windowMin < max) {
-      // Use impacts of the least costly scorer to compute windows
+      // Use impacts of the least costly scorer to compute windows to keep the per-block overhead
-      // NOTE: windowMax is inclusive
+      // under control.
-      int windowMax = Math.min(scorers[0].advanceShallow(windowMin), max - 1);
+      // NOTE: windowMax is inclusive.
      int windowMax = scorer1.advanceShallow(windowMin);
      if (windowMax == DocIdSetIterator.NO_MORE_DOCS) {
        // If the query doesn't have impacts anymore, or has a single block for the whole doc ID
        // space (e.g. ConstantScoreQuery), then we try to create a block that has ~128 docs of the
        // leading clause. This gives us higher chances to exit early based on the maximum scores of
        // other clauses.
        long windowSize = 128L * maxDoc / Math.max(1, lead1.cost());
        windowSize = Math.max(windowSize, 128L);
        windowMax = (int) Math.min(Integer.MAX_VALUE, windowMin + windowSize);
      }
      windowMax = Math.min(windowMax, max - 1);
      float maxWindowScore = Float.POSITIVE_INFINITY;
      if (0 < scorable.minCompetitiveScore) {
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
  // Index of the first scorer that is required, this scorer and all following scorers are required
  // for a document to match.
  int firstRequiredScorer;
  // The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
  float nextMinCompetitiveScore;
  private final long cost;
  float minCompetitiveScore;
  private final Score scorable = new Score();
@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer {
      while (top.doc < outerWindowMax) {
        scoreInnerWindow(collector, acceptDocs, outerWindowMax);
        top = essentialQueue.top();
        if (minCompetitiveScore >= nextMinCompetitiveScore) {
          // The minimum competitive score increased substantially, so we can now partition scorers
          // in a more favorable way.
          break;
        }
      }
-      outerWindowMin = outerWindowMax;
+      outerWindowMin = Math.min(top.doc, outerWindowMax);
    }
    return nextCandidate(max);
@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
        });
    double maxScoreSum = 0;
    firstEssentialScorer = 0;
    nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
    for (int i = 0; i < allScorers.length; ++i) {
      final DisiWrapper w = scratch[i];
      double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
        firstEssentialScorer++;
      } else {
        allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
        nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore);
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
@ -181,7 +181,7 @@ public abstract class PointInSetQuery extends Query implements Accountable {
            @Override
            public Scorer get(long leadCost) throws IOException {
              DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
-              values.intersect(new MergePointVisitor(sortedPackedPoints, result));
+              values.intersect(new MergePointVisitor(sortedPackedPoints.iterator(), result));
              DocIdSetIterator iterator = result.build().iterator();
              return new ConstantScoreScorer(score(), scoreMode, iterator);
            }
@ -192,7 +192,9 @@ public abstract class PointInSetQuery extends Query implements Accountable {
                if (cost == -1) {
                  // Computing the cost may be expensive, so only do it if necessary
                  DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
-                  cost = values.estimateDocCount(new MergePointVisitor(sortedPackedPoints, result));
+                  cost =
                      values.estimateDocCount(
                          new MergePointVisitor(sortedPackedPoints.iterator(), result));
                  assert cost >= 0;
                }
                return cost;
@ -260,18 +262,15 @@ public abstract class PointInSetQuery extends Query implements Accountable {
  private class MergePointVisitor implements IntersectVisitor {
    private final DocIdSetBuilder result;
-    private TermIterator iterator;
+    private final TermIterator iterator;
    private BytesRef nextQueryPoint;
    private final ByteArrayComparator comparator;
    private final PrefixCodedTerms sortedPackedPoints;
    private DocIdSetBuilder.BulkAdder adder;
-    public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result)
+    public MergePointVisitor(TermIterator iterator, DocIdSetBuilder result) throws IOException {
        throws IOException {
      this.result = result;
      this.sortedPackedPoints = sortedPackedPoints;
      this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
-      this.iterator = this.sortedPackedPoints.iterator();
+      this.iterator = iterator;
      nextQueryPoint = iterator.next();
    }
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
@ -775,10 +775,11 @@ public final class Util {
  /** Just takes unsigned byte values from the BytesRef and converts into an IntsRef. */
  public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
-    scratch.clear();
+    scratch.growNoCopy(input.length);
    for (int i = 0; i < input.length; i++) {
-      scratch.append(input.bytes[i + input.offset] & 0xFF);
+      scratch.setIntAt(i, input.bytes[i + input.offset] & 0xFF);
    }
    scratch.setLength(input.length);
    return scratch.get();
  }
--- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java
@ -530,7 +530,29 @@ abstract class MemorySegmentIndexInput extends IndexInput
  @Override
  public final MemorySegmentIndexInput clone() {
-    final MemorySegmentIndexInput clone = buildSlice((String) null, 0L, this.length);
+    ensureOpen();
    ensureAccessible();
    final MemorySegmentIndexInput clone;
    if (segments.length == 1) {
      clone =
          new SingleSegmentImpl(
              toString(),
              null, // clones don't have an Arena, as they can't close)
              segments[0],
              length,
              chunkSizePower,
              confined);
    } else {
      clone =
          new MultiSegmentImpl(
              toString(),
              null, // clones don't have an Arena, as they can't close)
              segments,
              ((MultiSegmentImpl) this).offset,
              length,
              chunkSizePower,
              confined);
    }
    try {
      clone.seek(getFilePointer());
    } catch (IOException ioe) {
@ -570,6 +592,13 @@ abstract class MemorySegmentIndexInput extends IndexInput
    if (NATIVE_ACCESS.isPresent() && advice != ReadAdvice.NORMAL) {
      // No need to madvise with a normal advice, since it's the OS' default.
      final NativeAccess nativeAccess = NATIVE_ACCESS.get();
      if (length >= nativeAccess.getPageSize()) {
        // Only set the read advice if the inner file is large enough. Otherwise the cons are likely
        // outweighing the pros as we're:
        //  - potentially overriding the advice of other files that share the same pages,
        //  - paying the cost of a madvise system call for little value.
        // We could align inner files with the page size to avoid the first issue, but again the
        // pros don't clearly overweigh the cons.
        slice.advise(
            0,
            slice.length,
@ -577,6 +606,7 @@ abstract class MemorySegmentIndexInput extends IndexInput
              nativeAccess.madvise(segment, advice);
            });
      }
    }
    return slice;
  }
@ -584,26 +614,30 @@ abstract class MemorySegmentIndexInput extends IndexInput
  MemorySegmentIndexInput buildSlice(String sliceDescription, long offset, long length) {
    ensureOpen();
    ensureAccessible();
-
+    final MemorySegment[] slices;
    final boolean isClone = offset == 0 && length == this.length;
    if (isClone) {
      slices = segments;
    } else {
      final long sliceEnd = offset + length;
      final int startIndex = (int) (offset >>> chunkSizePower);
      final int endIndex = (int) (sliceEnd >>> chunkSizePower);
      // we always allocate one more slice, the last one may be a 0 byte one after truncating with
      // asSlice():
-    final MemorySegment slices[] = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
+      slices = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
      // set the last segment's limit for the sliced view.
      slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
      offset = offset & chunkSizeMask;
    }
    final String newResourceDescription = getFullSliceDescription(sliceDescription);
    if (slices.length == 1) {
      return new SingleSegmentImpl(
          newResourceDescription,
          null, // clones don't have an Arena, as they can't close)
-          slices[0].asSlice(offset, length),
+          isClone ? slices[0] : slices[0].asSlice(offset, length),
          length,
          chunkSizePower,
          confined);
--- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java
@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits;
 // These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
 public class TestMaxScoreBulkScorer extends LuceneTestCase {
  private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
    public CapMaxScoreWindowAt2048Scorer(Scorer in) {
      super(in);
    }
    @Override
    public int advanceShallow(int target) throws IOException {
      return Math.min(target | 0x7FF, in.advanceShallow(target));
    }
    @Override
    public float getMaxScore(int upTo) throws IOException {
      return in.getMaxScore(upTo);
    }
  }
  private void writeDocuments(Directory dir) throws IOException {
    try (IndexWriter w =
        new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
            searcher
                .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
        Scorer scorer2 =
            searcher
                .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
        BulkScorer scorer =
            new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
            searcher
                .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
        Scorer scorer2 =
            searcher
                .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
        BulkScorer scorer =
            new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
            searcher
                .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
        Scorer scorer2 =
            searcher
                .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
        Scorer scorer3 =
            searcher
                .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
        BulkScorer scorer =
            new MaxScoreBulkScorer(
@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
            searcher
                .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
        Scorer scorer2 =
            searcher
                .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
        Scorer scorer3 =
            searcher
                .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
                .scorer(context);
        scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
        BulkScorer scorer =
            new MaxScoreBulkScorer(