mirror of https://github.com/apache/lucene.git
A few small tweaks to VectorUtil#findNextGEQ: (#13972)
1. Rearrange/rename the parameters to be more idiomatic (e.g., follow conventions of Arrays#... methods) 2. Add assert to ensure expected sortedness we may rely on in the future (so we're not trappy) 3. Migrate PostingsReader to call VectorUtil instead of VectorUtilSupport (so it benefits from the common assert)
This commit is contained in:
parent
8ae03d66ad
commit
c29dd588e4
|
@ -593,7 +593,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
|
||||
int next = findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
|
||||
int next = findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
this.doc = (int) docBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
return doc;
|
||||
|
@ -937,7 +937,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
refillDocs();
|
||||
}
|
||||
|
||||
int next = findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
|
||||
int next = findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
|
||||
this.freq = (int) freqBuffer[next];
|
||||
this.docBufferUpto = next + 1;
|
||||
|
@ -1423,7 +1423,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
needsRefilling = false;
|
||||
}
|
||||
|
||||
int next = findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
|
||||
int next = findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
this.doc = (int) docBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
return doc;
|
||||
|
@ -1654,7 +1654,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
needsRefilling = false;
|
||||
}
|
||||
|
||||
int next = findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
|
||||
int next = findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
|
||||
freq = (int) freqBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
|
@ -1755,13 +1755,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
|
||||
private static int findNextGEQ(long[] buffer, int length, long target, int from) {
|
||||
for (int i = from; i < length; ++i) {
|
||||
private static int findNextGEQ(long[] buffer, long target, int from, int to) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
if (buffer[i] >= target) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return length;
|
||||
return to;
|
||||
}
|
||||
|
||||
private static void prefetchPostings(IndexInput docIn, IntBlockTermState state)
|
||||
|
|
|
@ -142,13 +142,13 @@ public class AdvanceBenchmark {
|
|||
@Benchmark
|
||||
public void vectorUtilSearch() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
VectorUtil.findNextGEQ(values, 128, targets[i], startIndexes[i]);
|
||||
VectorUtil.findNextGEQ(values, targets[i], startIndexes[i], 128);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int vectorUtilSearch(int[] values, int target, int startIndex) {
|
||||
return VectorUtil.findNextGEQ(values, 128, target, startIndex);
|
||||
return VectorUtil.findNextGEQ(values, target, startIndex, 128);
|
||||
}
|
||||
|
||||
private static void assertEquals(int expected, int actual) {
|
||||
|
|
|
@ -46,7 +46,6 @@ import org.apache.lucene.index.PostingsEnum;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
|
||||
import org.apache.lucene.internal.vectorization.VectorUtilSupport;
|
||||
import org.apache.lucene.internal.vectorization.VectorizationProvider;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
|
@ -57,6 +56,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.BitUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
|
||||
/**
|
||||
* Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format.
|
||||
|
@ -66,8 +66,6 @@ import org.apache.lucene.util.IOUtils;
|
|||
public final class Lucene101PostingsReader extends PostingsReaderBase {
|
||||
|
||||
static final VectorizationProvider VECTORIZATION_PROVIDER = VectorizationProvider.getInstance();
|
||||
private static final VectorUtilSupport VECTOR_SUPPORT =
|
||||
VECTORIZATION_PROVIDER.getVectorUtilSupport();
|
||||
// Dummy impacts, composed of the maximum possible term frequency and the lowest possible
|
||||
// (unsigned) norm value. This is typically used on tail blocks, which don't actually record
|
||||
// impacts as the storage overhead would not be worth any query evaluation speedup, since there's
|
||||
|
@ -601,7 +599,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
|
||||
int next = VECTOR_SUPPORT.findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
|
||||
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
this.doc = docBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
return doc;
|
||||
|
@ -950,7 +948,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
refillDocs();
|
||||
}
|
||||
|
||||
int next = VECTOR_SUPPORT.findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
|
||||
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
|
||||
this.freq = freqBuffer[next];
|
||||
this.docBufferUpto = next + 1;
|
||||
|
@ -1437,7 +1435,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
needsRefilling = false;
|
||||
}
|
||||
|
||||
int next = VECTOR_SUPPORT.findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
|
||||
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
this.doc = docBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
return doc;
|
||||
|
@ -1670,7 +1668,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
needsRefilling = false;
|
||||
}
|
||||
|
||||
int next = VECTOR_SUPPORT.findNextGEQ(docBuffer, docBufferSize, target, docBufferUpto);
|
||||
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
|
||||
freq = freqBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
|
|
|
@ -199,12 +199,12 @@ final class DefaultVectorUtilSupport implements VectorUtilSupport {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int findNextGEQ(int[] buffer, int length, int target, int from) {
|
||||
for (int i = from; i < length; ++i) {
|
||||
public int findNextGEQ(int[] buffer, int target, int from, int to) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
if (buffer[i] >= target) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return length;
|
||||
return to;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,10 +46,10 @@ public interface VectorUtilSupport {
|
|||
int squareDistance(byte[] a, byte[] b);
|
||||
|
||||
/**
|
||||
* Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code
|
||||
* length} exclusive, find the first array index whose value is greater than or equal to {@code
|
||||
* target}. This index is guaranteed to be at least {@code from}. If there is no such array index,
|
||||
* {@code length} is returned.
|
||||
* Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code to}
|
||||
* exclusive, find the first array index whose value is greater than or equal to {@code target}.
|
||||
* This index is guaranteed to be at least {@code from}. If there is no such array index, {@code
|
||||
* to} is returned.
|
||||
*/
|
||||
int findNextGEQ(int[] buffer, int length, int target, int from);
|
||||
int findNextGEQ(int[] buffer, int target, int from, int to);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.lucene.internal.vectorization.VectorUtilSupport;
|
||||
import org.apache.lucene.internal.vectorization.VectorizationProvider;
|
||||
|
||||
|
@ -309,12 +310,13 @@ public final class VectorUtil {
|
|||
}
|
||||
|
||||
/**
|
||||
* Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code
|
||||
* length} exclusive, find the first array index whose value is greater than or equal to {@code
|
||||
* target}. This index is guaranteed to be at least {@code from}. If there is no such array index,
|
||||
* {@code length} is returned.
|
||||
* Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code to}
|
||||
* exclusive, find the first array index whose value is greater than or equal to {@code target}.
|
||||
* This index is guaranteed to be at least {@code from}. If there is no such array index, {@code
|
||||
* to} is returned.
|
||||
*/
|
||||
public static int findNextGEQ(int[] buffer, int length, int target, int from) {
|
||||
return IMPL.findNextGEQ(buffer, length, target, from);
|
||||
public static int findNextGEQ(int[] buffer, int target, int from, int to) {
|
||||
assert IntStream.range(0, to - 1).noneMatch(i -> buffer[i] > buffer[i + 1]);
|
||||
return IMPL.findNextGEQ(buffer, target, from, to);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -769,9 +769,9 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
private static final boolean ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO = INT_SPECIES.length() >= 8;
|
||||
|
||||
@Override
|
||||
public int findNextGEQ(int[] buffer, int length, int target, int from) {
|
||||
public int findNextGEQ(int[] buffer, int target, int from, int to) {
|
||||
if (ENABLE_FIND_NEXT_GEQ_VECTOR_OPTO) {
|
||||
for (; from + INT_SPECIES.length() < length; from += INT_SPECIES.length() + 1) {
|
||||
for (; from + INT_SPECIES.length() < to; from += INT_SPECIES.length() + 1) {
|
||||
if (buffer[from + INT_SPECIES.length()] >= target) {
|
||||
IntVector vector = IntVector.fromArray(INT_SPECIES, buffer, from);
|
||||
VectorMask<Integer> mask = vector.compare(VectorOperators.LT, target);
|
||||
|
@ -779,11 +779,11 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
}
|
||||
}
|
||||
}
|
||||
for (int i = from; i < length; ++i) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
if (buffer[i] >= target) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return length;
|
||||
return to;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -372,7 +372,7 @@ public class TestVectorUtil extends LuceneTestCase {
|
|||
- 5;
|
||||
assertEquals(
|
||||
slowFindNextGEQ(values, 128, target, from),
|
||||
VectorUtil.findNextGEQ(values, 128, target, from));
|
||||
VectorUtil.findNextGEQ(values, target, from, 128));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue