A few small tweaks to VectorUtil#findNextGEQ: (#13972)

1. Rearrange/rename the parameters to be more idiomatic (e.g., follow conventions of Arrays#... methods)
2. Add assert to ensure expected sortedness we may rely on in the future (so we're not trappy)
3. Migrate PostingsReader to call VectorUtil instead of VectorUtilSupport (so it benefits from the common assert)
This commit is contained in:
Greg Miller 2024-11-05 01:09:27 +01:00
parent 8ae03d66ad
commit c29dd588e4
8 changed files with 35 additions and 35 deletions

View File

@ -593,7 +593,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
}
int next = findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
int next = findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
this.doc = (int) docBuffer[next];
docBufferUpto = next + 1;
return doc;
@ -937,7 +937,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
refillDocs();
}
int next = findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
int next = findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
this.freq = (int) freqBuffer[next];
this.docBufferUpto = next + 1;
@ -1423,7 +1423,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
needsRefilling = false;
}
int next = findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
int next = findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
this.doc = (int) docBuffer[next];
docBufferUpto = next + 1;
return doc;
@ -1654,7 +1654,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
needsRefilling = false;
}
int next = findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
int next = findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
freq = (int) freqBuffer[next];
docBufferUpto = next + 1;
@ -1755,13 +1755,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
}
private static int findNextGEQ(long[] buffer, int length, long target, int from) {
for (int i = from; i < length; ++i) {
private static int findNextGEQ(long[] buffer, long target, int from, int to) {
for (int i = from; i < to; ++i) {
if (buffer[i] >= target) {
return i;
}
}
return length;
return to;
}
private static void prefetchPostings(IndexInput docIn, IntBlockTermState state)

View File

@ -142,13 +142,13 @@ public class AdvanceBenchmark {
@Benchmark
public void vectorUtilSearch() {
for (int i = 0; i < startIndexes.length; ++i) {
VectorUtil.findNextGEQ(values, 128, targets[i], startIndexes[i]);
VectorUtil.findNextGEQ(values, targets[i], startIndexes[i], 128);
}
}
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
private static int vectorUtilSearch(int[] values, int target, int startIndex) {
return VectorUtil.findNextGEQ(values, 128, target, startIndex);
return VectorUtil.findNextGEQ(values, target, startIndex, 128);
}
private static void assertEquals(int expected, int actual) {

View File

@ -46,7 +46,6 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
import org.apache.lucene.internal.vectorization.VectorUtilSupport;
import org.apache.lucene.internal.vectorization.VectorizationProvider;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
@ -57,6 +56,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.VectorUtil;
/**
* Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format.
@ -66,8 +66,6 @@ import org.apache.lucene.util.IOUtils;
public final class Lucene101PostingsReader extends PostingsReaderBase {
static final VectorizationProvider VECTORIZATION_PROVIDER = VectorizationProvider.getInstance();
private static final VectorUtilSupport VECTOR_SUPPORT =
VECTORIZATION_PROVIDER.getVectorUtilSupport();
// Dummy impacts, composed of the maximum possible term frequency and the lowest possible
// (unsigned) norm value. This is typically used on tail blocks, which don't actually record
// impacts as the storage overhead would not be worth any query evaluation speedup, since there's
@ -601,7 +599,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
}
}
int next = VECTOR_SUPPORT.findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
this.doc = docBuffer[next];
docBufferUpto = next + 1;
return doc;
@ -950,7 +948,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
refillDocs();
}
int next = VECTOR_SUPPORT.findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
this.freq = freqBuffer[next];
this.docBufferUpto = next + 1;
@ -1437,7 +1435,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
needsRefilling = false;
}
int next = VECTOR_SUPPORT.findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
this.doc = docBuffer[next];
docBufferUpto = next + 1;
return doc;
@ -1670,7 +1668,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
needsRefilling = false;
}
int next = VECTOR_SUPPORT.findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
freq = freqBuffer[next];
docBufferUpto = next + 1;

View File

@ -199,12 +199,12 @@ final class DefaultVectorUtilSupport implements VectorUtilSupport {
}
@Override
public int findNextGEQ(int[] buffer, int length, int target, int from) {
for (int i = from; i < length; ++i) {
public int findNextGEQ(int[] buffer, int target, int from, int to) {
for (int i = from; i < to; ++i) {
if (buffer[i] >= target) {
return i;
}
}
return length;
return to;
}
}

View File

@ -46,10 +46,10 @@ public interface VectorUtilSupport {
int squareDistance(byte[] a, byte[] b);
/**
* Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code
* length} exclusive, find the first array index whose value is greater than or equal to {@code
* target}. This index is guaranteed to be at least {@code from}. If there is no such array index,
* {@code length} is returned.
* Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code to}
* exclusive, find the first array index whose value is greater than or equal to {@code target}.
* This index is guaranteed to be at least {@code from}. If there is no such array index, {@code
* to} is returned.
*/
int findNextGEQ(int[] buffer, int length, int target, int from);
int findNextGEQ(int[] buffer, int target, int from, int to);
}

View File

@ -17,6 +17,7 @@
package org.apache.lucene.util;
import java.util.stream.IntStream;
import org.apache.lucene.internal.vectorization.VectorUtilSupport;
import org.apache.lucene.internal.vectorization.VectorizationProvider;
@ -309,12 +310,13 @@ public final class VectorUtil {
}
/**
* Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code
* length} exclusive, find the first array index whose value is greater than or equal to {@code
* target}. This index is guaranteed to be at least {@code from}. If there is no such array index,
* {@code length} is returned.
* Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code to}
* exclusive, find the first array index whose value is greater than or equal to {@code target}.
* This index is guaranteed to be at least {@code from}. If there is no such array index, {@code
* to} is returned.
*/
public static int findNextGEQ(int[] buffer, int length, int target, int from) {
return IMPL.findNextGEQ(buffer, length, target, from);
public static int findNextGEQ(int[] buffer, int target, int from, int to) {
assert IntStream.range(0, to - 1).noneMatch(i -> buffer[i] > buffer[i + 1]);
return IMPL.findNextGEQ(buffer, target, from, to);
}
}

View File

@ -769,9 +769,9 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
private static final boolean ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO = INT_SPECIES.length() >= 8;
@Override
public int findNextGEQ(int[] buffer, int length, int target, int from) {
public int findNextGEQ(int[] buffer, int target, int from, int to) {
if (ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO) {
for (; from + INT_SPECIES.length() < length; from += INT_SPECIES.length() + 1) {
for (; from + INT_SPECIES.length() < to; from += INT_SPECIES.length() + 1) {
if (buffer[from + INT_SPECIES.length()] >= target) {
IntVector vector = IntVector.fromArray(INT_SPECIES, buffer, from);
VectorMask<Integer> mask = vector.compare(VectorOperators.LT, target);
@ -779,11 +779,11 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
}
}
}
for (int i = from; i < length; ++i) {
for (int i = from; i < to; ++i) {
if (buffer[i] >= target) {
return i;
}
}
return length;
return to;
}
}

View File

@ -372,7 +372,7 @@ public class TestVectorUtil extends LuceneTestCase {
- 5;
assertEquals(
slowFindNextGEQ(values, 128, target, from),
VectorUtil.findNextGEQ(values, 128, target, from));
VectorUtil.findNextGEQ(values, target, from, 128));
}
}