Merge branch 'main' into bit_set_iterator_into_bit_set

This commit is contained in:
Adrien Grand 2024-12-23 15:08:49 +01:00
commit 1720dc065d
14 changed files with 57 additions and 12 deletions

View File

@ -67,6 +67,13 @@
</maintainer>
<!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
<release>
<Version>
<name>lucene-10.1.0</name>
<created>2024-12-20</created>
<revision>10.1.0</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-10.0.0</name>

View File

@ -182,6 +182,9 @@ Bug Fixes
* GITHUB#13990: Added filter to the toString() method of Knn[Float|Byte]VectorQuery
and DiversifyingChildren[Float|Byte]KnnVectorQuery. (Viswanath Kuchibhotla)
* GITHUB#13819: Prevent flattening of ordered and unordered interval sources (Jim Ferenczi)
* GITHUB#14008: Counts provided by taxonomy facets in addition to another aggregation are now returned together with
their corresponding ordinals. (Paul King)
* GITHUB#14027: Make SegmentInfos#readCommit(Directory, String, int) public (Luca Cavanna)
Build
---------------------
@ -192,16 +195,6 @@ Other
---------------------
* GITHUB#13982: Remove duplicate test code. (Lu Xugang)
======================== Lucene 10.0.1 =======================
Bug Fixes
---------------------
* GITHUB#14008: Counts provided by taxonomy facets in addition to another aggregation are now returned together with
their corresponding ordinals. (Paul King)
* GITHUB#14027: Make SegmentInfos#readCommit(Directory, String, int) public (Luca Cavanna)
======================= Lucene 10.0.0 =======================
API Changes

View File

@ -17,4 +17,5 @@
9.11.1
9.12.0
9.12.1
10.0.0
10.0.0
10.1.0

View File

@ -845,7 +845,9 @@ public class IndexSearcher {
scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
}
try {
scorer.score(leafCollector, ctx.reader().getLiveDocs(), minDocId, maxDocId);
// Optimize for the case when live docs are stored in a FixedBitSet.
Bits acceptDocs = ScorerUtil.likelyFixedBitSet(ctx.reader().getLiveDocs());
scorer.score(leafCollector, acceptDocs, minDocId, maxDocId);
} catch (
@SuppressWarnings("unused")
CollectionTerminatedException e) {

View File

@ -30,7 +30,9 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PriorityQueue;
/** Util class for Scorer related methods */
@ -108,4 +110,40 @@ class ScorerUtil {
}
return scorable;
}
/**
* Optimize {@link Bits} representing the set of accepted documents for the case when it is likely
* implemented via a {@link FixedBitSet}. This helps make calls to {@link Bits#get(int)}
* inlinable, which in-turn helps speed up query evaluation. This is especially helpful as
* inlining will sometimes enable auto-vectorizing shifts and masks that are done in {@link
* FixedBitSet#get(int)}.
*/
static Bits likelyFixedBitSet(Bits acceptDocs) {
if (acceptDocs instanceof FixedBitSet) {
return acceptDocs;
} else if (acceptDocs != null) {
return new FilterBits(acceptDocs);
} else {
return null;
}
}
private static class FilterBits implements Bits {
private final Bits in;
FilterBits(Bits in) {
this.in = in;
}
@Override
public boolean get(int index) {
return in.get(index);
}
@Override
public int length() {
return in.length();
}
}
}

View File

@ -771,6 +771,10 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
@Override
public int findNextGEQ(int[] buffer, int target, int from, int to) {
if (ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO) {
// This effectively implements the V1 intersection algorithm from
// D. Lemire, L. Boytsov, N. Kurz SIMD Compression and the Intersection of Sorted Integers
// with T = INT_SPECIES.length(), ie. T=8 with AVX2 and T=16 with AVX-512
// https://arxiv.org/pdf/1401.6399
for (; from + INT_SPECIES.length() < to; from += INT_SPECIES.length() + 1) {
if (buffer[from + INT_SPECIES.length()] >= target) {
IntVector vector = IntVector.fromArray(INT_SPECIES, buffer, from);