LUCENE-9027: Try to get back some indexing speed.

This commit is contained in:
Adrien Grand 2019-11-22 11:42:25 +01:00
parent acd56b350d
commit c51006c3c4
3 changed files with 35 additions and 29 deletions

View File

@ -19,7 +19,6 @@
package org.apache.lucene.codecs.lucene84;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
@ -237,15 +236,18 @@ final class ForUtil {
}
final int numLongsPerShift = bitsPerValue * 2;
Arrays.fill(tmp, 0L);
int idx = 0;
for (int shift = nextPrimitive - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
int shift = nextPrimitive - bitsPerValue;
for (int i = 0; i < numLongsPerShift; ++i) {
tmp[i] = longs[idx++] << shift;
}
for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
for (int i = 0; i < numLongsPerShift; ++i) {
tmp[i] |= longs[idx++] << shift;
}
}
final int remainingBitsPerLong = nextPrimitive % bitsPerValue;
final int remainingBitsPerLong = shift + bitsPerValue;
final long maskRemainingBitsPerLong;
if (nextPrimitive == 8) {
maskRemainingBitsPerLong = mask8(remainingBitsPerLong);
@ -254,6 +256,7 @@ final class ForUtil {
} else {
maskRemainingBitsPerLong = mask32(remainingBitsPerLong);
}
int tmpIdx = 0;
int remainingBitsPerValue = bitsPerValue;
while (idx < numLongs) {

View File

@ -41,7 +41,6 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@ -413,13 +412,13 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} else if (docFreq == 1) {
docBuffer[0] = singletonDocID;
freqBuffer[0] = totalTermFreq;
Arrays.fill(docBuffer, 1, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
docBuffer[1] = NO_MORE_DOCS;
blockUpto++;
} else {
// Read vInts:
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
docBuffer[left] = NO_MORE_DOCS;
blockUpto += left;
}
accum = docBuffer[BLOCK_SIZE - 1];
@ -516,8 +515,8 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
final PForUtil pforUtil = new PForUtil(forUtil);
private final long[] docBuffer = new long[BLOCK_SIZE];
private final long[] freqBuffer = new long[BLOCK_SIZE];
private final long[] docBuffer = new long[BLOCK_SIZE+1];
private final long[] freqBuffer = new long[BLOCK_SIZE+1];
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
private final long[] payloadLengthBuffer;
@ -550,7 +549,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
private int docFreq; // number of docs in this posting list
private long totalTermFreq; // number of positions in this posting list
private int docUpto; // how many docs we've read
private int blockUpto; // number of docs in or before the current block
private int doc; // doc we last read
private long accum; // accumulator for doc deltas
private int freq; // freq we last read
@ -625,6 +624,9 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
payloadBytes = null;
payload = null;
}
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
}
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@ -664,7 +666,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
doc = -1;
accum = 0;
docUpto = 0;
blockUpto = 0;
if (docFreq > BLOCK_SIZE) {
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
} else {
@ -686,23 +688,27 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
}
private void refillDocs() throws IOException {
final int left = docFreq - docUpto;
final int left = docFreq - blockUpto;
assert left >= 0;
if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
pforUtil.decode(docIn, freqBuffer);
blockUpto += BLOCK_SIZE;
} else if (docFreq == 1) {
docBuffer[0] = singletonDocID;
freqBuffer[0] = totalTermFreq;
Arrays.fill(docBuffer, 1, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
docBuffer[1] = NO_MORE_DOCS;
blockUpto++;
} else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
docBuffer[left] = NO_MORE_DOCS;
blockUpto += left;
}
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0;
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
}
private void refillPositions() throws IOException {
@ -784,7 +790,6 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq;
docBufferUpto++;
docUpto++;
position = 0;
lastStartOffset = 0;
@ -813,10 +818,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
docUpto = newDocUpto;
blockUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
@ -841,15 +846,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq;
docBufferUpto++;
docUpto++;
if (doc >= target) {
break;
}
if (docBufferUpto == BLOCK_SIZE) {
return this.doc = NO_MORE_DOCS;
}
}
position = 0;
@ -1079,7 +1079,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs);
prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
docBuffer[left] = NO_MORE_DOCS;
blockUpto += left;
}
accum = docBuffer[BLOCK_SIZE - 1];
@ -1282,7 +1282,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
docBuffer[left] = NO_MORE_DOCS;
}
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0;
@ -1664,7 +1664,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
docBuffer[left] = NO_MORE_DOCS;
}
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0;

View File

@ -42,7 +42,6 @@ HEADER = """// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.codecs.lucene84;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
@ -260,15 +259,18 @@ final class ForUtil {
}
final int numLongsPerShift = bitsPerValue * 2;
Arrays.fill(tmp, 0L);
int idx = 0;
for (int shift = nextPrimitive - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
int shift = nextPrimitive - bitsPerValue;
for (int i = 0; i < numLongsPerShift; ++i) {
tmp[i] = longs[idx++] << shift;
}
for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
for (int i = 0; i < numLongsPerShift; ++i) {
tmp[i] |= longs[idx++] << shift;
}
}
final int remainingBitsPerLong = nextPrimitive % bitsPerValue;
final int remainingBitsPerLong = shift + bitsPerValue;
final long maskRemainingBitsPerLong;
if (nextPrimitive == 8) {
maskRemainingBitsPerLong = mask8(remainingBitsPerLong);
@ -277,6 +279,7 @@ final class ForUtil {
} else {
maskRemainingBitsPerLong = mask32(remainingBitsPerLong);
}
int tmpIdx = 0;
int remainingBitsPerValue = bitsPerValue;
while (idx < numLongs) {