mirror of https://github.com/apache/lucene.git
Revert "Speed up advancing within a block. (#13692)"
This reverts commit 79fd9fee97
.
This commit is contained in:
parent
ebeb5c3281
commit
73173a40c1
|
@ -1,376 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.benchmark.jmh;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.CompilerControl;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Level;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
@Warmup(iterations = 5, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
@Fork(
|
||||
value = 1,
|
||||
jvmArgsAppend = {"-Xmx1g", "-Xms1g", "-XX:+AlwaysPreTouch"})
|
||||
public class AdvanceBenchmark {
|
||||
|
||||
private final long[] values = new long[129];
|
||||
private final int[] startIndexes = new int[1_000];
|
||||
private final long[] targets = new long[startIndexes.length];
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void setup() throws Exception {
|
||||
for (int i = 0; i < 128; ++i) {
|
||||
values[i] = i;
|
||||
}
|
||||
values[128] = DocIdSetIterator.NO_MORE_DOCS;
|
||||
Random r = new Random(0);
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
startIndexes[i] = r.nextInt(64);
|
||||
targets[i] = startIndexes[i] + 1 + r.nextInt(1 << r.nextInt(7));
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void binarySearch() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
binarySearch(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int binarySearch(long[] values, long target, int startIndex) {
|
||||
// Standard binary search
|
||||
int i = Arrays.binarySearch(values, startIndex, values.length, target);
|
||||
if (i < 0) {
|
||||
i = -1 - i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void binarySearch2() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
binarySearch2(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int binarySearch2(long[] values, long target, int startIndex) {
|
||||
// Try to help the compiler by providing predictable start/end offsets.
|
||||
int i = Arrays.binarySearch(values, 0, 128, target);
|
||||
if (i < 0) {
|
||||
i = -1 - i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void binarySearch3() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
binarySearch3(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int binarySearch3(long[] values, long target, int startIndex) {
|
||||
// Organize code the same way as suggested in https://quickwit.io/blog/search-a-sorted-block,
|
||||
// which proved to help with LLVM.
|
||||
int start = 0;
|
||||
int length = 128;
|
||||
|
||||
while (length > 1) {
|
||||
length /= 2;
|
||||
if (values[start + length - 1] < target) {
|
||||
start += length;
|
||||
}
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void binarySearch4() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
binarySearch4(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int binarySearch4(long[] values, long target, int startIndex) {
|
||||
// Explicitly inline the binary-search logic to see if it helps the compiler.
|
||||
int start = 0;
|
||||
|
||||
if (values[63] < target) {
|
||||
start += 64;
|
||||
}
|
||||
if (values[start + 31] < target) {
|
||||
start += 32;
|
||||
}
|
||||
if (values[start + 15] < target) {
|
||||
start += 16;
|
||||
}
|
||||
if (values[start + 7] < target) {
|
||||
start += 8;
|
||||
}
|
||||
if (values[start + 3] < target) {
|
||||
start += 4;
|
||||
}
|
||||
if (values[start + 1] < target) {
|
||||
start += 2;
|
||||
}
|
||||
if (values[start] < target) {
|
||||
start += 1;
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void binarySearch5() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
binarySearch5(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int binarySearch5(long[] values, long target, int startIndex) {
|
||||
// Other way to write a binary search
|
||||
int start = 0;
|
||||
|
||||
for (int shift = 6; shift >= 0; --shift) {
|
||||
int halfRange = 1 << shift;
|
||||
if (values[start + halfRange - 1] < target) {
|
||||
start += halfRange;
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void binarySearch6() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
binarySearch6(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int binarySearch6(long[] values, long target, int startIndex) {
|
||||
// Other way to write a binary search
|
||||
int start = 0;
|
||||
|
||||
for (int halfRange = 64; halfRange > 0; halfRange >>= 1) {
|
||||
if (values[start + halfRange - 1] < target) {
|
||||
start += halfRange;
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void linearSearch() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
linearSearch(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int linearSearch(long[] values, long target, int startIndex) {
|
||||
// Naive linear search.
|
||||
for (int i = startIndex; i < values.length; ++i) {
|
||||
if (values[i] >= target) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bruteForceSearch() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
bruteForceSearch(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int bruteForceSearch(long[] values, long target, int startIndex) {
|
||||
// Linear search with predictable start/end offsets to see if it helps the compiler.
|
||||
for (int i = 0; i < 128; ++i) {
|
||||
if (values[i] >= target) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void linearSearch2() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
linearSearch2(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int linearSearch2(long[] values, long target, int startIndex) {
|
||||
// Two-level linear search, first checking every 8-th value, then values within an 8-value range
|
||||
int rangeStart = values.length - 8;
|
||||
|
||||
for (int i = startIndex; i + 8 <= values.length; i += 8) {
|
||||
if (values[i + 7] >= target) {
|
||||
rangeStart = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
if (values[rangeStart + i] >= target) {
|
||||
return rangeStart + i;
|
||||
}
|
||||
}
|
||||
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void linearSearch3() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
linearSearch3(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int linearSearch3(long[] values, long target, int startIndex) {
|
||||
// Iteration over linearSearch that tries to reduce branches
|
||||
while (startIndex + 4 <= values.length) {
|
||||
int count = values[startIndex] < target ? 1 : 0;
|
||||
if (values[startIndex + 1] < target) {
|
||||
count++;
|
||||
}
|
||||
if (values[startIndex + 2] < target) {
|
||||
count++;
|
||||
}
|
||||
if (values[startIndex + 3] < target) {
|
||||
count++;
|
||||
}
|
||||
if (count != 4) {
|
||||
return startIndex + count;
|
||||
}
|
||||
startIndex += 4;
|
||||
}
|
||||
|
||||
for (int i = startIndex; i < values.length; ++i) {
|
||||
if (values[i] >= target) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void hybridSearch() {
|
||||
for (int i = 0; i < startIndexes.length; ++i) {
|
||||
hybridSearch(values, targets[i], startIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
private static int hybridSearch(long[] values, long target, int startIndex) {
|
||||
// Two-level linear search, first checking every 8-th value, then values within an 8-value range
|
||||
int rangeStart = values.length - 8;
|
||||
|
||||
for (int i = startIndex; i + 8 <= values.length; i += 8) {
|
||||
if (values[i + 7] >= target) {
|
||||
rangeStart = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return binarySearchHelper8(values, target, rangeStart);
|
||||
}
|
||||
|
||||
// branchless binary search over 8 values
|
||||
private static int binarySearchHelper8(long[] values, long target, int start) {
|
||||
if (values[start + 3] < target) {
|
||||
start += 4;
|
||||
}
|
||||
if (values[start + 1] < target) {
|
||||
start += 2;
|
||||
}
|
||||
if (values[start] < target) {
|
||||
start += 1;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
private static void assertEquals(int expected, int actual) {
|
||||
if (expected != actual) {
|
||||
throw new AssertionError("Expected: " + expected + ", got " + actual);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
// For testing purposes
|
||||
long[] values = new long[129];
|
||||
for (int i = 0; i < 128; ++i) {
|
||||
values[i] = i;
|
||||
}
|
||||
values[128] = DocIdSetIterator.NO_MORE_DOCS;
|
||||
for (int start = 0; start < 128; ++start) {
|
||||
for (int targetIndex = start; targetIndex < 128; ++targetIndex) {
|
||||
int actualIndex = binarySearch(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = binarySearch2(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = binarySearch3(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = binarySearch4(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = binarySearch5(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = binarySearch6(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = bruteForceSearch(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = hybridSearch(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = linearSearch(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = linearSearch2(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
actualIndex = linearSearch3(values, values[targetIndex], start);
|
||||
assertEquals(targetIndex, actualIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -67,7 +67,6 @@ import org.apache.lucene.util.IOUtils;
|
|||
public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||
|
||||
static final VectorizationProvider VECTORIZATION_PROVIDER = VectorizationProvider.getInstance();
|
||||
static int BINARY_SEARCH_WINDOW_SIZE = 4;
|
||||
|
||||
private final IndexInput docIn;
|
||||
private final IndexInput posIn;
|
||||
|
@ -213,74 +212,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
|
||||
private static boolean assertDocBuffer(long[] values, int start) {
|
||||
assert values.length == BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE;
|
||||
assert values[BLOCK_SIZE] == DocIdSetIterator.NO_MORE_DOCS;
|
||||
assert start < BLOCK_SIZE;
|
||||
|
||||
int endOffset;
|
||||
if (values[0] == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
endOffset = 0;
|
||||
} else {
|
||||
endOffset = -1;
|
||||
for (int i = 1; i < values.length; ++i) {
|
||||
assert values[i] > values[i - 1] : Arrays.toString(values);
|
||||
if (values[i] == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
endOffset = i;
|
||||
break;
|
||||
}
|
||||
static int findFirstGreater(long[] buffer, int target, int from) {
|
||||
for (int i = from; i < BLOCK_SIZE; ++i) {
|
||||
if (buffer[i] >= target) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < BINARY_SEARCH_WINDOW_SIZE; ++i) {
|
||||
assert values[endOffset + i] == DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the first index in sorted array {@code values} whose value is greater than or equal to
|
||||
* {@code target}. For correctness, it requires the last 4 values to be set to {@code
|
||||
* NO_MORE_DOCS}.
|
||||
*/
|
||||
static int findNextGEQ(long[] values, long target, int start) {
|
||||
assert assertDocBuffer(values, start);
|
||||
|
||||
if (values[start] >= target) {
|
||||
// Surprisingly this is a likely condition in practice, so optimizing for it helps.
|
||||
return start;
|
||||
}
|
||||
|
||||
// We just ruled out that our target index is at `start`.
|
||||
start += 1;
|
||||
|
||||
// Now find the first interval of 4 values that contains our target.
|
||||
for (int i = start;
|
||||
i + BINARY_SEARCH_WINDOW_SIZE <= values.length;
|
||||
i += BINARY_SEARCH_WINDOW_SIZE) {
|
||||
if (values[i + BINARY_SEARCH_WINDOW_SIZE - 1] >= target) {
|
||||
start = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Binary search in this interval of 4 values.
|
||||
return binarySearch4(values, target, start);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the first index whose value is greater than or equal to {@code target} among the 4
|
||||
* values starting at {@code start}. If none of the values is greater than or equal to {@code
|
||||
* target}, this returns {@code start+3}.
|
||||
*/
|
||||
private static int binarySearch4(long[] values, long target, int start) {
|
||||
// This code is organized in a way that compiles to a branchless binary search.
|
||||
if (values[start + 1] < target) {
|
||||
start += 2;
|
||||
}
|
||||
if (values[start] < target) {
|
||||
start += 1;
|
||||
}
|
||||
return start;
|
||||
return BLOCK_SIZE;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -410,7 +348,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
|
||||
private int docBufferUpto;
|
||||
|
@ -452,9 +390,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|
||||
>= 0
|
||||
|| fieldInfo.hasPayloads();
|
||||
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||
// advance()
|
||||
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
||||
|
@ -560,7 +498,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
docCountUpto += BLOCK_SIZE;
|
||||
prevDocID = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert assertDocBuffer(docBuffer, 0);
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
private void refillRemainder() throws IOException {
|
||||
|
@ -571,14 +509,15 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
if (docFreq == 1) {
|
||||
docBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = totalTermFreq;
|
||||
docBuffer[1] = NO_MORE_DOCS;
|
||||
docCountUpto++;
|
||||
} else {
|
||||
// Read vInts:
|
||||
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq);
|
||||
prefixSum(docBuffer, left, prevDocID);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
docCountUpto += left;
|
||||
}
|
||||
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBufferUpto = 0;
|
||||
freqFP = -1;
|
||||
}
|
||||
|
@ -676,7 +615,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
|
||||
int next = findNextGEQ(docBuffer, target, docBufferUpto);
|
||||
int next = findFirstGreater(docBuffer, target, docBufferUpto);
|
||||
this.doc = (int) docBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
return doc;
|
||||
|
@ -694,8 +633,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
|
||||
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
|
||||
|
||||
private final long[] payloadLengthBuffer;
|
||||
|
@ -816,9 +755,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
payload = null;
|
||||
}
|
||||
|
||||
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||
// advance()
|
||||
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
||||
|
@ -910,18 +849,18 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
} else if (docFreq == 1) {
|
||||
docBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = totalTermFreq;
|
||||
Arrays.fill(docBuffer, 1, 1 + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[1] = NO_MORE_DOCS;
|
||||
docCountUpto++;
|
||||
} else {
|
||||
// Read vInts:
|
||||
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
|
||||
prefixSum(docBuffer, left, prevDocID);
|
||||
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
docCountUpto += left;
|
||||
}
|
||||
prevDocID = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert assertDocBuffer(docBuffer, 0);
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
private void skipLevel1To(int target) throws IOException {
|
||||
|
@ -1083,7 +1022,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
refillDocs();
|
||||
}
|
||||
|
||||
int next = findNextGEQ(docBuffer, target, docBufferUpto);
|
||||
int next = findFirstGreater(docBuffer, target, docBufferUpto);
|
||||
for (int i = docBufferUpto; i <= next; ++i) {
|
||||
posPendingCount += freqBuffer[i];
|
||||
}
|
||||
|
@ -1284,7 +1223,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
|
||||
private int docBufferUpto;
|
||||
|
@ -1332,9 +1271,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|
||||
>= 0
|
||||
|| fieldInfo.hasPayloads();
|
||||
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||
// advance()
|
||||
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
||||
|
||||
docFreq = termState.docFreq;
|
||||
if (docFreq > 1) {
|
||||
|
@ -1424,13 +1363,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
// Read vInts:
|
||||
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
|
||||
prefixSum(docBuffer, left, prevDocID);
|
||||
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
freqFP = -1;
|
||||
docCountUpto += left;
|
||||
}
|
||||
prevDocID = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert assertDocBuffer(docBuffer, 0);
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
private void skipLevel1To(int target) throws IOException {
|
||||
|
@ -1556,7 +1495,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
needsRefilling = false;
|
||||
}
|
||||
|
||||
int next = findNextGEQ(docBuffer, target, docBufferUpto);
|
||||
int next = findFirstGreater(docBuffer, target, docBufferUpto);
|
||||
this.doc = (int) docBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
return doc;
|
||||
|
@ -1639,7 +1578,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
|
||||
|
||||
|
@ -1719,9 +1658,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
this.posIn = Lucene912PostingsReader.this.posIn.clone();
|
||||
posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn);
|
||||
|
||||
// We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||
// advance()
|
||||
Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
||||
|
||||
docFreq = termState.docFreq;
|
||||
posTermStartFP = termState.posStartFP;
|
||||
|
@ -1791,18 +1730,18 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
} else if (docFreq == 1) {
|
||||
docBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = totalTermFreq;
|
||||
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[1] = NO_MORE_DOCS;
|
||||
docCountUpto++;
|
||||
} else {
|
||||
// Read vInts:
|
||||
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
|
||||
prefixSum(docBuffer, left, prevDocID);
|
||||
Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
docCountUpto += left;
|
||||
}
|
||||
prevDocID = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert assertDocBuffer(docBuffer, 0);
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
private void skipLevel1To(int target) throws IOException {
|
||||
|
@ -1999,7 +1938,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
|||
needsRefilling = false;
|
||||
}
|
||||
|
||||
int next = findNextGEQ(docBuffer, target, docBufferUpto);
|
||||
int next = findFirstGreater(docBuffer, target, docBufferUpto);
|
||||
for (int i = docBufferUpto; i <= next; ++i) {
|
||||
posPendingCount += freqBuffer[i];
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.index.DirectoryReader;
|
|||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -155,19 +154,4 @@ public class TestLucene912PostingsFormat extends BasePostingsFormatTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testFindNextGEQ() {
|
||||
long[] values =
|
||||
new long[ForUtil.BLOCK_SIZE + Lucene912PostingsReader.BINARY_SEARCH_WINDOW_SIZE];
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
values[i] = i * 2;
|
||||
}
|
||||
Arrays.fill(values, ForUtil.BLOCK_SIZE, values.length, DocIdSetIterator.NO_MORE_DOCS);
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
for (int start = 0; start <= i; ++start) {
|
||||
assertEquals(i, Lucene912PostingsReader.findNextGEQ(values, i * 2, start));
|
||||
assertEquals(i + 1, Lucene912PostingsReader.findNextGEQ(values, i * 2 + 1, start));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue