mirror of https://github.com/apache/lucene.git
LUCENE-9056: Fewer conditionals in #advance. (#1021)
This commit is contained in:
parent
2220f99e3d
commit
4b3739f083
|
@ -38,7 +38,7 @@ public class ForDeltaUtil {
|
|||
private static void prefixSumOfOnes(long[] arr, long base) {
|
||||
System.arraycopy(IDENTITY_PLUS_ONE, 0, arr, 0, ForUtil.BLOCK_SIZE);
|
||||
// This loop gets auto-vectorized
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
|
||||
arr[i] += base;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -267,7 +267,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE];
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE+1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
|
||||
private int docBufferUpto;
|
||||
|
@ -285,7 +285,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
|
||||
private int docFreq; // number of docs in this posting list
|
||||
private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
|
||||
private int docUpto; // how many docs we've read
|
||||
private int blockUpto; // number of docs in or before the current block
|
||||
private int doc; // doc we last read
|
||||
private long accum; // accumulator for doc deltas
|
||||
|
||||
|
@ -313,7 +313,9 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
indexHasPayloads = fieldInfo.hasPayloads();
|
||||
indexHasPayloads = fieldInfo.hasPayloads();
|
||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in advance()
|
||||
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
||||
|
@ -346,7 +348,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
accum = 0;
|
||||
docUpto = 0;
|
||||
blockUpto = 0;
|
||||
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
skipped = false;
|
||||
|
@ -394,7 +396,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
isFreqsRead = true;
|
||||
}
|
||||
|
||||
final int left = docFreq - docUpto;
|
||||
final int left = docFreq - blockUpto;
|
||||
assert left >= 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
|
@ -407,18 +409,22 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all
|
||||
}
|
||||
}
|
||||
blockUpto += BLOCK_SIZE;
|
||||
} else if (docFreq == 1) {
|
||||
docBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = totalTermFreq;
|
||||
Arrays.fill(docBuffer, 1, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
||||
blockUpto++;
|
||||
} else {
|
||||
// Read vInts:
|
||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
|
||||
prefixSum(docBuffer, left, accum);
|
||||
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
||||
blockUpto += left;
|
||||
}
|
||||
accum = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -428,7 +434,6 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
doc = (int) docBuffer[docBufferUpto];
|
||||
docUpto++;
|
||||
docBufferUpto++;
|
||||
return doc;
|
||||
}
|
||||
|
@ -460,10 +465,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
// is a little different from MultiLevelSkipListReader
|
||||
final int newDocUpto = skipper.skipTo(target) + 1;
|
||||
|
||||
if (newDocUpto > docUpto) {
|
||||
if (newDocUpto >= blockUpto) {
|
||||
// Skipper moved
|
||||
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
|
||||
docUpto = newDocUpto;
|
||||
blockUpto = newDocUpto;
|
||||
|
||||
// Force to read next block
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
|
@ -487,14 +492,11 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
long doc;
|
||||
while (true) {
|
||||
doc = docBuffer[docBufferUpto];
|
||||
docUpto++;
|
||||
|
||||
if (doc >= target) {
|
||||
break;
|
||||
}
|
||||
if (++docBufferUpto == BLOCK_SIZE) {
|
||||
return this.doc = NO_MORE_DOCS;
|
||||
}
|
||||
++docBufferUpto;
|
||||
}
|
||||
|
||||
docBufferUpto++;
|
||||
|
@ -989,7 +991,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE];
|
||||
private final long[] docBuffer = new long[BLOCK_SIZE+1];
|
||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||
|
||||
private int docBufferUpto;
|
||||
|
@ -1001,16 +1003,18 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
final boolean indexHasFreqs;
|
||||
|
||||
private int docFreq; // number of docs in this posting list
|
||||
private int docUpto; // how many docs we've read
|
||||
private int blockUpto; // number of documents in or before the current block
|
||||
private int doc; // doc we last read
|
||||
private long accum; // accumulator for doc deltas
|
||||
private int freq; // freq we last read
|
||||
|
||||
|
||||
private int nextSkipDoc = -1;
|
||||
|
||||
private long seekTo = -1;
|
||||
|
||||
// as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block
|
||||
// always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer (needsFreq=false)
|
||||
private boolean isFreqsRead;
|
||||
|
||||
public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState) throws IOException {
|
||||
indexHasFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
final boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
|
@ -1024,7 +1028,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
|
||||
doc = -1;
|
||||
accum = 0;
|
||||
docUpto = 0;
|
||||
blockUpto = 0;
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
|
||||
skipper = new Lucene84ScoreSkipReader(docIn.clone(),
|
||||
|
@ -1034,6 +1038,9 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
indexHasPayloads);
|
||||
skipper.init(termState.docStartFP+termState.skipOffset, termState.docStartFP, termState.posStartFP, termState.payStartFP, docFreq);
|
||||
|
||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in advance()
|
||||
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
||||
this.isFreqsRead = true;
|
||||
if (indexHasFreqs == false) {
|
||||
Arrays.fill(freqBuffer, 1L);
|
||||
}
|
||||
|
@ -1041,7 +1048,11 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
if (isFreqsRead == false) {
|
||||
pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block
|
||||
isFreqsRead = true;
|
||||
}
|
||||
return (int) freqBuffer[docBufferUpto-1];
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1050,7 +1061,13 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
private void refillDocs() throws IOException {
|
||||
final int left = docFreq - docUpto;
|
||||
// Check if we skipped reading the previous block of freqBuffer, and if yes, position docIn after it
|
||||
if (isFreqsRead == false) {
|
||||
pforUtil.skip(docIn);
|
||||
isFreqsRead = true;
|
||||
}
|
||||
|
||||
final int left = docFreq - blockUpto;
|
||||
assert left >= 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
|
@ -1058,13 +1075,16 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
if (indexHasFreqs) {
|
||||
pforUtil.decode(docIn, freqBuffer);
|
||||
}
|
||||
blockUpto += BLOCK_SIZE;
|
||||
} else {
|
||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs);
|
||||
prefixSum(docBuffer, left, accum);
|
||||
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
||||
blockUpto += left;
|
||||
}
|
||||
accum = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1074,10 +1094,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
// is a little different from MultiLevelSkipListReader
|
||||
final int newDocUpto = skipper.skipTo(target) + 1;
|
||||
|
||||
if (newDocUpto > docUpto) {
|
||||
if (newDocUpto >= blockUpto) {
|
||||
// Skipper moved
|
||||
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
|
||||
docUpto = newDocUpto;
|
||||
blockUpto = newDocUpto;
|
||||
|
||||
// Force to read next block
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
|
@ -1110,18 +1130,14 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
if (seekTo >= 0) {
|
||||
docIn.seek(seekTo);
|
||||
isFreqsRead = true; // reset isFreqsRead
|
||||
seekTo = -1;
|
||||
}
|
||||
refillDocs();
|
||||
}
|
||||
|
||||
int next = findFirstGreater(docBuffer, target, docBufferUpto);
|
||||
if (next == BLOCK_SIZE) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
this.doc = (int) docBuffer[next];
|
||||
this.freq = (int) freqBuffer[next];
|
||||
docUpto += next - docBufferUpto + 1;
|
||||
docBufferUpto = next + 1;
|
||||
return doc;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue