Merge branch 'main' into bit_set_iterator_into_bit_set

2024-12-23 15:08:49 +01:00 · 2024-12-23 15:08:49 +01:00 · 1720dc065d
parent c97b352487 0494c824e0
commit 1720dc065d
14 changed files with 57 additions and 12 deletions
--- a/dev-tools/doap/lucene.rdf
+++ b/dev-tools/doap/lucene.rdf
@ -67,6 +67,13 @@
    </maintainer>

    <!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
+    <release>
+       <Version>
+         <name>lucene-10.1.0</name>
+         <created>2024-12-20</created>
+         <revision>10.1.0</revision>
+       </Version>
+    </release>
    <release>
       <Version>
         <name>lucene-10.0.0</name>
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -182,6 +182,9 @@ Bug Fixes
 * GITHUB#13990: Added filter to the toString() method of Knn[Float|Byte]VectorQuery
  and DiversifyingChildren[Float|Byte]KnnVectorQuery. (Viswanath Kuchibhotla)
 * GITHUB#13819: Prevent flattening of ordered and unordered interval sources (Jim Ferenczi)
+* GITHUB#14008: Counts provided by taxonomy facets in addition to another aggregation are now returned together with
+  their corresponding ordinals. (Paul King)
+* GITHUB#14027: Make SegmentInfos#readCommit(Directory, String, int) public (Luca Cavanna)

 Build
 ---------------------
@ -192,16 +195,6 @@ Other
 ---------------------
 * GITHUB#13982: Remove duplicate test code. (Lu Xugang)

-======================== Lucene 10.0.1 =======================
-
-Bug Fixes
---------------------
-
-* GITHUB#14008: Counts provided by taxonomy facets in addition to another aggregation are now returned together with
-  their corresponding ordinals. (Paul King)
-
-* GITHUB#14027: Make SegmentInfos#readCommit(Directory, String, int) public (Luca Cavanna)
-
 ======================= Lucene 10.0.0 =======================

 API Changes
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.1.0-cfs.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.1.0-cfs.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.1.0-nocfs.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.1.0-nocfs.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int7_hnsw.10.1.0.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int7_hnsw.10.1.0.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.10.1.0.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.10.1.0.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.1-cfs.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.1-cfs.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.1-nocfs.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.1-nocfs.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.12.1.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.12.1.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.12.1.zip
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.12.1.zip
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt
@ -17,4 +17,5 @@
 9.11.1
 9.12.0
 9.12.1
-10.0.0
+10.0.0
+10.1.0
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@ -845,7 +845,9 @@ public class IndexSearcher {
        scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
      }
      try {
-        scorer.score(leafCollector, ctx.reader().getLiveDocs(), minDocId, maxDocId);
+        // Optimize for the case when live docs are stored in a FixedBitSet.
+        Bits acceptDocs = ScorerUtil.likelyFixedBitSet(ctx.reader().getLiveDocs());
+        scorer.score(leafCollector, acceptDocs, minDocId, maxDocId);
      } catch (
          @SuppressWarnings("unused")
          CollectionTerminatedException e) {
--- a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java
@ -30,7 +30,9 @@ import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.ByteBuffersDirectory;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.PriorityQueue;

 /** Util class for Scorer related methods */
@ -108,4 +110,40 @@ class ScorerUtil {
    }
    return scorable;
  }
+
+  /**
+   * Optimize {@link Bits} representing the set of accepted documents for the case when it is likely
+   * implemented via a {@link FixedBitSet}. This helps make calls to {@link Bits#get(int)}
+   * inlinable, which in-turn helps speed up query evaluation. This is especially helpful as
+   * inlining will sometimes enable auto-vectorizing shifts and masks that are done in {@link
+   * FixedBitSet#get(int)}.
+   */
+  static Bits likelyFixedBitSet(Bits acceptDocs) {
+    if (acceptDocs instanceof FixedBitSet) {
+      return acceptDocs;
+    } else if (acceptDocs != null) {
+      return new FilterBits(acceptDocs);
+    } else {
+      return null;
+    }
+  }
+
+  private static class FilterBits implements Bits {
+
+    private final Bits in;
+
+    FilterBits(Bits in) {
+      this.in = in;
+    }
+
+    @Override
+    public boolean get(int index) {
+      return in.get(index);
+    }
+
+    @Override
+    public int length() {
+      return in.length();
+    }
+  }
 }
--- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
+++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
@ -771,6 +771,10 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
  @Override
  public int findNextGEQ(int[] buffer, int target, int from, int to) {
    if (ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO) {
+      // This effectively implements the V1 intersection algorithm from
+      // D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of Sorted Integers
+      // with T = INT_SPECIES.length(), ie. T=8 with AVX2 and T=16 with AVX-512
+      // https://arxiv.org/pdf/1401.6399
      for (; from + INT_SPECIES.length() < to; from += INT_SPECIES.length() + 1) {
        if (buffer[from + INT_SPECIES.length()] >= target) {
          IntVector vector = IntVector.fromArray(INT_SPECIES, buffer, from);