LUCENE-9027: Try to get back some indexing speed.

This commit is contained in:
Adrien Grand 2019-11-22 11:42:25 +01:00
parent acd56b350d
commit c51006c3c4
3 changed files with 35 additions and 29 deletions

View File

@ -19,7 +19,6 @@
package org.apache.lucene.codecs.lucene84; package org.apache.lucene.codecs.lucene84;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
@ -237,15 +236,18 @@ final class ForUtil {
} }
final int numLongsPerShift = bitsPerValue * 2; final int numLongsPerShift = bitsPerValue * 2;
Arrays.fill(tmp, 0L);
int idx = 0; int idx = 0;
for (int shift = nextPrimitive - bitsPerValue; shift >= 0; shift -= bitsPerValue) { int shift = nextPrimitive - bitsPerValue;
for (int i = 0; i < numLongsPerShift; ++i) {
tmp[i] = longs[idx++] << shift;
}
for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
for (int i = 0; i < numLongsPerShift; ++i) { for (int i = 0; i < numLongsPerShift; ++i) {
tmp[i] |= longs[idx++] << shift; tmp[i] |= longs[idx++] << shift;
} }
} }
final int remainingBitsPerLong = nextPrimitive % bitsPerValue; final int remainingBitsPerLong = shift + bitsPerValue;
final long maskRemainingBitsPerLong; final long maskRemainingBitsPerLong;
if (nextPrimitive == 8) { if (nextPrimitive == 8) {
maskRemainingBitsPerLong = mask8(remainingBitsPerLong); maskRemainingBitsPerLong = mask8(remainingBitsPerLong);
@ -254,6 +256,7 @@ final class ForUtil {
} else { } else {
maskRemainingBitsPerLong = mask32(remainingBitsPerLong); maskRemainingBitsPerLong = mask32(remainingBitsPerLong);
} }
int tmpIdx = 0; int tmpIdx = 0;
int remainingBitsPerValue = bitsPerValue; int remainingBitsPerValue = bitsPerValue;
while (idx < numLongs) { while (idx < numLongs) {

View File

@ -41,7 +41,6 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
@ -413,13 +412,13 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} else if (docFreq == 1) { } else if (docFreq == 1) {
docBuffer[0] = singletonDocID; docBuffer[0] = singletonDocID;
freqBuffer[0] = totalTermFreq; freqBuffer[0] = totalTermFreq;
Arrays.fill(docBuffer, 1, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS); docBuffer[1] = NO_MORE_DOCS;
blockUpto++; blockUpto++;
} else { } else {
// Read vInts: // Read vInts:
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq); readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS); docBuffer[left] = NO_MORE_DOCS;
blockUpto += left; blockUpto += left;
} }
accum = docBuffer[BLOCK_SIZE - 1]; accum = docBuffer[BLOCK_SIZE - 1];
@ -516,8 +515,8 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil); final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
final PForUtil pforUtil = new PForUtil(forUtil); final PForUtil pforUtil = new PForUtil(forUtil);
private final long[] docBuffer = new long[BLOCK_SIZE]; private final long[] docBuffer = new long[BLOCK_SIZE+1];
private final long[] freqBuffer = new long[BLOCK_SIZE]; private final long[] freqBuffer = new long[BLOCK_SIZE+1];
private final long[] posDeltaBuffer = new long[BLOCK_SIZE]; private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
private final long[] payloadLengthBuffer; private final long[] payloadLengthBuffer;
@ -550,7 +549,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
private int docFreq; // number of docs in this posting list private int docFreq; // number of docs in this posting list
private long totalTermFreq; // number of positions in this posting list private long totalTermFreq; // number of positions in this posting list
private int docUpto; // how many docs we've read private int blockUpto; // number of docs in or before the current block
private int doc; // doc we last read private int doc; // doc we last read
private long accum; // accumulator for doc deltas private long accum; // accumulator for doc deltas
private int freq; // freq we last read private int freq; // freq we last read
@ -625,6 +624,9 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
payloadBytes = null; payloadBytes = null;
payload = null; payload = null;
} }
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
} }
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@ -664,7 +666,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
doc = -1; doc = -1;
accum = 0; accum = 0;
docUpto = 0; blockUpto = 0;
if (docFreq > BLOCK_SIZE) { if (docFreq > BLOCK_SIZE) {
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
} else { } else {
@ -686,23 +688,27 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} }
private void refillDocs() throws IOException { private void refillDocs() throws IOException {
final int left = docFreq - docUpto; final int left = docFreq - blockUpto;
assert left >= 0; assert left >= 0;
if (left >= BLOCK_SIZE) { if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
pforUtil.decode(docIn, freqBuffer); pforUtil.decode(docIn, freqBuffer);
blockUpto += BLOCK_SIZE;
} else if (docFreq == 1) { } else if (docFreq == 1) {
docBuffer[0] = singletonDocID; docBuffer[0] = singletonDocID;
freqBuffer[0] = totalTermFreq; freqBuffer[0] = totalTermFreq;
Arrays.fill(docBuffer, 1, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS); docBuffer[1] = NO_MORE_DOCS;
blockUpto++;
} else { } else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, true); readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS); docBuffer[left] = NO_MORE_DOCS;
blockUpto += left;
} }
accum = docBuffer[BLOCK_SIZE - 1]; accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0; docBufferUpto = 0;
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
} }
private void refillPositions() throws IOException { private void refillPositions() throws IOException {
@ -784,7 +790,6 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
freq = (int) freqBuffer[docBufferUpto]; freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq; posPendingCount += freq;
docBufferUpto++; docBufferUpto++;
docUpto++;
position = 0; position = 0;
lastStartOffset = 0; lastStartOffset = 0;
@ -813,10 +818,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
final int newDocUpto = skipper.skipTo(target) + 1; final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) { if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
// Skipper moved // Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
docUpto = newDocUpto; blockUpto = newDocUpto;
// Force to read next block // Force to read next block
docBufferUpto = BLOCK_SIZE; docBufferUpto = BLOCK_SIZE;
@ -841,15 +846,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
freq = (int) freqBuffer[docBufferUpto]; freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq; posPendingCount += freq;
docBufferUpto++; docBufferUpto++;
docUpto++;
if (doc >= target) { if (doc >= target) {
break; break;
} }
if (docBufferUpto == BLOCK_SIZE) {
return this.doc = NO_MORE_DOCS;
}
} }
position = 0; position = 0;
@ -1079,7 +1079,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} else { } else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs); readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS); docBuffer[left] = NO_MORE_DOCS;
blockUpto += left; blockUpto += left;
} }
accum = docBuffer[BLOCK_SIZE - 1]; accum = docBuffer[BLOCK_SIZE - 1];
@ -1282,7 +1282,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} else { } else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, true); readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS); docBuffer[left] = NO_MORE_DOCS;
} }
accum = docBuffer[BLOCK_SIZE - 1]; accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0; docBufferUpto = 0;
@ -1664,7 +1664,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
} else { } else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq); readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS); docBuffer[left] = NO_MORE_DOCS;
} }
accum = docBuffer[BLOCK_SIZE - 1]; accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0; docBufferUpto = 0;

View File

@ -42,7 +42,6 @@ HEADER = """// This file has been automatically generated, DO NOT EDIT
package org.apache.lucene.codecs.lucene84; package org.apache.lucene.codecs.lucene84;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
@ -260,15 +259,18 @@ final class ForUtil {
} }
final int numLongsPerShift = bitsPerValue * 2; final int numLongsPerShift = bitsPerValue * 2;
Arrays.fill(tmp, 0L);
int idx = 0; int idx = 0;
for (int shift = nextPrimitive - bitsPerValue; shift >= 0; shift -= bitsPerValue) { int shift = nextPrimitive - bitsPerValue;
for (int i = 0; i < numLongsPerShift; ++i) {
tmp[i] = longs[idx++] << shift;
}
for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
for (int i = 0; i < numLongsPerShift; ++i) { for (int i = 0; i < numLongsPerShift; ++i) {
tmp[i] |= longs[idx++] << shift; tmp[i] |= longs[idx++] << shift;
} }
} }
final int remainingBitsPerLong = nextPrimitive % bitsPerValue; final int remainingBitsPerLong = shift + bitsPerValue;
final long maskRemainingBitsPerLong; final long maskRemainingBitsPerLong;
if (nextPrimitive == 8) { if (nextPrimitive == 8) {
maskRemainingBitsPerLong = mask8(remainingBitsPerLong); maskRemainingBitsPerLong = mask8(remainingBitsPerLong);
@ -277,6 +279,7 @@ final class ForUtil {
} else { } else {
maskRemainingBitsPerLong = mask32(remainingBitsPerLong); maskRemainingBitsPerLong = mask32(remainingBitsPerLong);
} }
int tmpIdx = 0; int tmpIdx = 0;
int remainingBitsPerValue = bitsPerValue; int remainingBitsPerValue = bitsPerValue;
while (idx < numLongs) { while (idx < numLongs) {