mirror of https://github.com/apache/lucene.git
LUCENE-9027: Try to get back some indexing speed.
This commit is contained in:
parent
acd56b350d
commit
c51006c3c4
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.lucene.codecs.lucene84;
|
package org.apache.lucene.codecs.lucene84;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
@ -237,15 +236,18 @@ final class ForUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int numLongsPerShift = bitsPerValue * 2;
|
final int numLongsPerShift = bitsPerValue * 2;
|
||||||
Arrays.fill(tmp, 0L);
|
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
for (int shift = nextPrimitive - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
|
int shift = nextPrimitive - bitsPerValue;
|
||||||
|
for (int i = 0; i < numLongsPerShift; ++i) {
|
||||||
|
tmp[i] = longs[idx++] << shift;
|
||||||
|
}
|
||||||
|
for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
|
||||||
for (int i = 0; i < numLongsPerShift; ++i) {
|
for (int i = 0; i < numLongsPerShift; ++i) {
|
||||||
tmp[i] |= longs[idx++] << shift;
|
tmp[i] |= longs[idx++] << shift;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final int remainingBitsPerLong = nextPrimitive % bitsPerValue;
|
final int remainingBitsPerLong = shift + bitsPerValue;
|
||||||
final long maskRemainingBitsPerLong;
|
final long maskRemainingBitsPerLong;
|
||||||
if (nextPrimitive == 8) {
|
if (nextPrimitive == 8) {
|
||||||
maskRemainingBitsPerLong = mask8(remainingBitsPerLong);
|
maskRemainingBitsPerLong = mask8(remainingBitsPerLong);
|
||||||
|
@ -254,6 +256,7 @@ final class ForUtil {
|
||||||
} else {
|
} else {
|
||||||
maskRemainingBitsPerLong = mask32(remainingBitsPerLong);
|
maskRemainingBitsPerLong = mask32(remainingBitsPerLong);
|
||||||
}
|
}
|
||||||
|
|
||||||
int tmpIdx = 0;
|
int tmpIdx = 0;
|
||||||
int remainingBitsPerValue = bitsPerValue;
|
int remainingBitsPerValue = bitsPerValue;
|
||||||
while (idx < numLongs) {
|
while (idx < numLongs) {
|
||||||
|
|
|
@ -41,7 +41,6 @@ import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -413,13 +412,13 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
} else if (docFreq == 1) {
|
} else if (docFreq == 1) {
|
||||||
docBuffer[0] = singletonDocID;
|
docBuffer[0] = singletonDocID;
|
||||||
freqBuffer[0] = totalTermFreq;
|
freqBuffer[0] = totalTermFreq;
|
||||||
Arrays.fill(docBuffer, 1, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
docBuffer[1] = NO_MORE_DOCS;
|
||||||
blockUpto++;
|
blockUpto++;
|
||||||
} else {
|
} else {
|
||||||
// Read vInts:
|
// Read vInts:
|
||||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
|
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
|
||||||
prefixSum(docBuffer, left, accum);
|
prefixSum(docBuffer, left, accum);
|
||||||
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
docBuffer[left] = NO_MORE_DOCS;
|
||||||
blockUpto += left;
|
blockUpto += left;
|
||||||
}
|
}
|
||||||
accum = docBuffer[BLOCK_SIZE - 1];
|
accum = docBuffer[BLOCK_SIZE - 1];
|
||||||
|
@ -516,8 +515,8 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
|
||||||
final PForUtil pforUtil = new PForUtil(forUtil);
|
final PForUtil pforUtil = new PForUtil(forUtil);
|
||||||
|
|
||||||
private final long[] docBuffer = new long[BLOCK_SIZE];
|
private final long[] docBuffer = new long[BLOCK_SIZE+1];
|
||||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
private final long[] freqBuffer = new long[BLOCK_SIZE+1];
|
||||||
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
|
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
|
||||||
|
|
||||||
private final long[] payloadLengthBuffer;
|
private final long[] payloadLengthBuffer;
|
||||||
|
@ -550,7 +549,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
private int docFreq; // number of docs in this posting list
|
private int docFreq; // number of docs in this posting list
|
||||||
private long totalTermFreq; // number of positions in this posting list
|
private long totalTermFreq; // number of positions in this posting list
|
||||||
private int docUpto; // how many docs we've read
|
private int blockUpto; // number of docs in or before the current block
|
||||||
private int doc; // doc we last read
|
private int doc; // doc we last read
|
||||||
private long accum; // accumulator for doc deltas
|
private long accum; // accumulator for doc deltas
|
||||||
private int freq; // freq we last read
|
private int freq; // freq we last read
|
||||||
|
@ -625,6 +624,9 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
payloadBytes = null;
|
payloadBytes = null;
|
||||||
payload = null;
|
payload = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in advance()
|
||||||
|
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
||||||
|
@ -664,7 +666,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
doc = -1;
|
doc = -1;
|
||||||
accum = 0;
|
accum = 0;
|
||||||
docUpto = 0;
|
blockUpto = 0;
|
||||||
if (docFreq > BLOCK_SIZE) {
|
if (docFreq > BLOCK_SIZE) {
|
||||||
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
|
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
|
||||||
} else {
|
} else {
|
||||||
|
@ -686,23 +688,27 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void refillDocs() throws IOException {
|
private void refillDocs() throws IOException {
|
||||||
final int left = docFreq - docUpto;
|
final int left = docFreq - blockUpto;
|
||||||
assert left >= 0;
|
assert left >= 0;
|
||||||
|
|
||||||
if (left >= BLOCK_SIZE) {
|
if (left >= BLOCK_SIZE) {
|
||||||
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||||
pforUtil.decode(docIn, freqBuffer);
|
pforUtil.decode(docIn, freqBuffer);
|
||||||
|
blockUpto += BLOCK_SIZE;
|
||||||
} else if (docFreq == 1) {
|
} else if (docFreq == 1) {
|
||||||
docBuffer[0] = singletonDocID;
|
docBuffer[0] = singletonDocID;
|
||||||
freqBuffer[0] = totalTermFreq;
|
freqBuffer[0] = totalTermFreq;
|
||||||
Arrays.fill(docBuffer, 1, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
docBuffer[1] = NO_MORE_DOCS;
|
||||||
|
blockUpto++;
|
||||||
} else {
|
} else {
|
||||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
|
readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
|
||||||
prefixSum(docBuffer, left, accum);
|
prefixSum(docBuffer, left, accum);
|
||||||
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
docBuffer[left] = NO_MORE_DOCS;
|
||||||
|
blockUpto += left;
|
||||||
}
|
}
|
||||||
accum = docBuffer[BLOCK_SIZE - 1];
|
accum = docBuffer[BLOCK_SIZE - 1];
|
||||||
docBufferUpto = 0;
|
docBufferUpto = 0;
|
||||||
|
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void refillPositions() throws IOException {
|
private void refillPositions() throws IOException {
|
||||||
|
@ -784,7 +790,6 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
freq = (int) freqBuffer[docBufferUpto];
|
freq = (int) freqBuffer[docBufferUpto];
|
||||||
posPendingCount += freq;
|
posPendingCount += freq;
|
||||||
docBufferUpto++;
|
docBufferUpto++;
|
||||||
docUpto++;
|
|
||||||
|
|
||||||
position = 0;
|
position = 0;
|
||||||
lastStartOffset = 0;
|
lastStartOffset = 0;
|
||||||
|
@ -813,10 +818,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
final int newDocUpto = skipper.skipTo(target) + 1;
|
final int newDocUpto = skipper.skipTo(target) + 1;
|
||||||
|
|
||||||
if (newDocUpto > docUpto) {
|
if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
|
||||||
// Skipper moved
|
// Skipper moved
|
||||||
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
|
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
|
||||||
docUpto = newDocUpto;
|
blockUpto = newDocUpto;
|
||||||
|
|
||||||
// Force to read next block
|
// Force to read next block
|
||||||
docBufferUpto = BLOCK_SIZE;
|
docBufferUpto = BLOCK_SIZE;
|
||||||
|
@ -841,15 +846,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
freq = (int) freqBuffer[docBufferUpto];
|
freq = (int) freqBuffer[docBufferUpto];
|
||||||
posPendingCount += freq;
|
posPendingCount += freq;
|
||||||
docBufferUpto++;
|
docBufferUpto++;
|
||||||
docUpto++;
|
|
||||||
|
|
||||||
if (doc >= target) {
|
if (doc >= target) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (docBufferUpto == BLOCK_SIZE) {
|
|
||||||
return this.doc = NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
position = 0;
|
position = 0;
|
||||||
|
@ -1079,7 +1079,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
} else {
|
} else {
|
||||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs);
|
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs);
|
||||||
prefixSum(docBuffer, left, accum);
|
prefixSum(docBuffer, left, accum);
|
||||||
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
docBuffer[left] = NO_MORE_DOCS;
|
||||||
blockUpto += left;
|
blockUpto += left;
|
||||||
}
|
}
|
||||||
accum = docBuffer[BLOCK_SIZE - 1];
|
accum = docBuffer[BLOCK_SIZE - 1];
|
||||||
|
@ -1282,7 +1282,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
} else {
|
} else {
|
||||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
|
readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
|
||||||
prefixSum(docBuffer, left, accum);
|
prefixSum(docBuffer, left, accum);
|
||||||
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
docBuffer[left] = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
accum = docBuffer[BLOCK_SIZE - 1];
|
accum = docBuffer[BLOCK_SIZE - 1];
|
||||||
docBufferUpto = 0;
|
docBufferUpto = 0;
|
||||||
|
@ -1664,7 +1664,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
||||||
} else {
|
} else {
|
||||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
|
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
|
||||||
prefixSum(docBuffer, left, accum);
|
prefixSum(docBuffer, left, accum);
|
||||||
Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
|
docBuffer[left] = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
accum = docBuffer[BLOCK_SIZE - 1];
|
accum = docBuffer[BLOCK_SIZE - 1];
|
||||||
docBufferUpto = 0;
|
docBufferUpto = 0;
|
||||||
|
|
|
@ -42,7 +42,6 @@ HEADER = """// This file has been automatically generated, DO NOT EDIT
|
||||||
package org.apache.lucene.codecs.lucene84;
|
package org.apache.lucene.codecs.lucene84;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
@ -260,15 +259,18 @@ final class ForUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int numLongsPerShift = bitsPerValue * 2;
|
final int numLongsPerShift = bitsPerValue * 2;
|
||||||
Arrays.fill(tmp, 0L);
|
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
for (int shift = nextPrimitive - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
|
int shift = nextPrimitive - bitsPerValue;
|
||||||
|
for (int i = 0; i < numLongsPerShift; ++i) {
|
||||||
|
tmp[i] = longs[idx++] << shift;
|
||||||
|
}
|
||||||
|
for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
|
||||||
for (int i = 0; i < numLongsPerShift; ++i) {
|
for (int i = 0; i < numLongsPerShift; ++i) {
|
||||||
tmp[i] |= longs[idx++] << shift;
|
tmp[i] |= longs[idx++] << shift;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final int remainingBitsPerLong = nextPrimitive % bitsPerValue;
|
final int remainingBitsPerLong = shift + bitsPerValue;
|
||||||
final long maskRemainingBitsPerLong;
|
final long maskRemainingBitsPerLong;
|
||||||
if (nextPrimitive == 8) {
|
if (nextPrimitive == 8) {
|
||||||
maskRemainingBitsPerLong = mask8(remainingBitsPerLong);
|
maskRemainingBitsPerLong = mask8(remainingBitsPerLong);
|
||||||
|
@ -277,6 +279,7 @@ final class ForUtil {
|
||||||
} else {
|
} else {
|
||||||
maskRemainingBitsPerLong = mask32(remainingBitsPerLong);
|
maskRemainingBitsPerLong = mask32(remainingBitsPerLong);
|
||||||
}
|
}
|
||||||
|
|
||||||
int tmpIdx = 0;
|
int tmpIdx = 0;
|
||||||
int remainingBitsPerValue = bitsPerValue;
|
int remainingBitsPerValue = bitsPerValue;
|
||||||
while (idx < numLongs) {
|
while (idx < numLongs) {
|
||||||
|
|
Loading…
Reference in New Issue