mirror of https://github.com/apache/lucene.git
Dry up both the two ImpactsEnum Implementation ins Lucene912PostingsReader (#13892)
These two share a lot of code, in particular the impacts implementation is 100% identical. We can save a lot of code and potentially some cycles for method invocations by drying things up. The changes are just mechanical field movements with the following exceptions: 1. One of the two implementations was using a bytes ref builder, one a bytes ref for holding the serialized impacts. The `BytesRef` variant is faster so I used that for both when extracting. 2. Some simple arithmetic simplifications around the levels that should be obvious. 3. Removed the the logic for an index without positions in `BlockImpactsPostingsEnum`, that was dead code, we only set this thing up if there's positions.
This commit is contained in:
parent
a6a6519ee7
commit
e99db4e954
|
@ -47,7 +47,6 @@ import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
|
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
|
||||||
import org.apache.lucene.internal.vectorization.VectorizationProvider;
|
import org.apache.lucene.internal.vectorization.VectorizationProvider;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
|
@ -56,7 +55,6 @@ import org.apache.lucene.store.ReadAdvice;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BitUtil;
|
import org.apache.lucene.util.BitUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -315,30 +313,24 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
|
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final boolean indexHasFreqs =
|
final IndexOptions options = fieldInfo.getIndexOptions();
|
||||||
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
|
||||||
final boolean indexHasPositions =
|
final boolean indexHasPositions =
|
||||||
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
|
||||||
if (state.docFreq >= BLOCK_SIZE
|
if (state.docFreq >= BLOCK_SIZE) {
|
||||||
&& indexHasFreqs
|
if (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
|
||||||
&& (indexHasPositions == false
|
&& (indexHasPositions == false
|
||||||
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false)) {
|
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false)) {
|
||||||
return new BlockImpactsDocsEnum(fieldInfo, (IntBlockTermState) state);
|
return new BlockImpactsDocsEnum(indexHasPositions, (IntBlockTermState) state);
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean indexHasOffsets =
|
if (indexHasPositions
|
||||||
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|
&& (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0
|
||||||
>= 0;
|
|| PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
|
||||||
final boolean indexHasPayloads = fieldInfo.hasPayloads();
|
&& (fieldInfo.hasPayloads() == false
|
||||||
|
|| PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
|
||||||
if (state.docFreq >= BLOCK_SIZE
|
return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
|
||||||
&& indexHasPositions
|
}
|
||||||
&& (indexHasOffsets == false
|
|
||||||
|| PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
|
|
||||||
&& (indexHasPayloads == false
|
|
||||||
|| PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
|
|
||||||
return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
|
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
|
||||||
|
@ -378,15 +370,11 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
private long freqFP;
|
private long freqFP;
|
||||||
|
|
||||||
public BlockDocsEnum(FieldInfo fieldInfo) {
|
public BlockDocsEnum(FieldInfo fieldInfo) {
|
||||||
this.docIn = null;
|
final IndexOptions options = fieldInfo.getIndexOptions();
|
||||||
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
indexHasPos =
|
indexHasPos = options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
|
||||||
indexHasOffsetsOrPayloads =
|
indexHasOffsetsOrPayloads =
|
||||||
fieldInfo
|
options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0
|
||||||
.getIndexOptions()
|
|
||||||
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|
|
||||||
>= 0
|
|
||||||
|| fieldInfo.hasPayloads();
|
|| fieldInfo.hasPayloads();
|
||||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||||
// advance()
|
// advance()
|
||||||
|
@ -394,17 +382,12 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
||||||
|
final IndexOptions options = fieldInfo.getIndexOptions();
|
||||||
return docIn == Lucene912PostingsReader.this.docIn
|
return docIn == Lucene912PostingsReader.this.docIn
|
||||||
&& indexHasFreq
|
&& indexHasFreq == (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
|
||||||
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
|
&& indexHasPos == (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
|
||||||
&& indexHasPos
|
|
||||||
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
|
|
||||||
>= 0)
|
|
||||||
&& indexHasOffsetsOrPayloads
|
&& indexHasOffsetsOrPayloads
|
||||||
== (fieldInfo
|
== (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0
|
||||||
.getIndexOptions()
|
|
||||||
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|
|
||||||
>= 0
|
|
||||||
|| fieldInfo.hasPayloads());
|
|| fieldInfo.hasPayloads());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -533,7 +516,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
level1DocCountUpto += LEVEL1_NUM_DOCS;
|
level1DocCountUpto += LEVEL1_NUM_DOCS;
|
||||||
|
|
||||||
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
|
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
|
||||||
level1LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level1LastDocID = NO_MORE_DOCS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -569,7 +552,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
docIn.skipBytes(readVLong15(docIn));
|
docIn.skipBytes(readVLong15(docIn));
|
||||||
docCountUpto += BLOCK_SIZE;
|
docCountUpto += BLOCK_SIZE;
|
||||||
} else {
|
} else {
|
||||||
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level0LastDocID = NO_MORE_DOCS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -586,7 +569,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
refillFullBlock();
|
refillFullBlock();
|
||||||
level0LastDocID = (int) docBuffer[BLOCK_SIZE - 1];
|
level0LastDocID = (int) docBuffer[BLOCK_SIZE - 1];
|
||||||
} else {
|
} else {
|
||||||
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level0LastDocID = NO_MORE_DOCS;
|
||||||
refillRemainder();
|
refillRemainder();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -704,13 +687,10 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||||
|
|
||||||
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
|
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
|
||||||
this.docIn = null;
|
final IndexOptions options = fieldInfo.getIndexOptions();
|
||||||
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
indexHasOffsets =
|
indexHasOffsets =
|
||||||
fieldInfo
|
options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
.getIndexOptions()
|
|
||||||
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|
|
||||||
>= 0;
|
|
||||||
indexHasPayloads = fieldInfo.hasPayloads();
|
indexHasPayloads = fieldInfo.hasPayloads();
|
||||||
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;
|
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;
|
||||||
|
|
||||||
|
@ -875,7 +855,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
level1DocCountUpto += LEVEL1_NUM_DOCS;
|
level1DocCountUpto += LEVEL1_NUM_DOCS;
|
||||||
|
|
||||||
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
|
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
|
||||||
level1LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level1LastDocID = NO_MORE_DOCS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -933,7 +913,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
level0BlockPayUpto = docIn.readVInt();
|
level0BlockPayUpto = docIn.readVInt();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level0LastDocID = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
refillDocs();
|
refillDocs();
|
||||||
|
@ -1000,7 +980,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
docIn.seek(blockEndFP);
|
docIn.seek(blockEndFP);
|
||||||
docCountUpto += BLOCK_SIZE;
|
docCountUpto += BLOCK_SIZE;
|
||||||
} else {
|
} else {
|
||||||
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level0LastDocID = NO_MORE_DOCS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1213,70 +1193,48 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final class BlockImpactsDocsEnum extends ImpactsEnum {
|
private abstract class BlockImpactsEnum extends ImpactsEnum {
|
||||||
|
|
||||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
|
protected final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
|
||||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
protected final PForUtil pforUtil = new PForUtil(new ForUtil());
|
||||||
|
|
||||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
protected final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
||||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
protected final long[] freqBuffer = new long[BLOCK_SIZE];
|
||||||
|
|
||||||
private int docBufferUpto;
|
protected final int docFreq; // number of docs in this posting list
|
||||||
|
|
||||||
final IndexInput docIn;
|
protected final IndexInput docIn;
|
||||||
final PostingDecodingUtil docInUtil;
|
protected final PostingDecodingUtil docInUtil;
|
||||||
final boolean indexHasFreq;
|
|
||||||
final boolean indexHasPos;
|
|
||||||
|
|
||||||
private final int docFreq; // number of docs in this posting list
|
protected int docCountUpto; // number of docs in or before the current block
|
||||||
private int docCountUpto; // number of docs in or before the current block
|
protected int doc = -1; // doc we last read
|
||||||
private int doc; // doc we last read
|
protected long prevDocID = -1; // last doc ID of the previous block
|
||||||
private long prevDocID; // last doc ID of the previous block
|
protected int docBufferUpto = BLOCK_SIZE;
|
||||||
private long freqFP;
|
|
||||||
|
|
||||||
// true if we shallow-advanced to a new block that we have not decoded yet
|
// true if we shallow-advanced to a new block that we have not decoded yet
|
||||||
private boolean needsRefilling;
|
protected boolean needsRefilling;
|
||||||
|
|
||||||
// level 0 skip data
|
// level 0 skip data
|
||||||
private int level0LastDocID;
|
protected int level0LastDocID = -1;
|
||||||
private long level0DocEndFP;
|
protected long level0DocEndFP;
|
||||||
private final BytesRef level0SerializedImpacts;
|
protected final BytesRef level0SerializedImpacts;
|
||||||
private final MutableImpactList level0Impacts;
|
protected final MutableImpactList level0Impacts;
|
||||||
// level 1 skip data
|
// level 1 skip data
|
||||||
private int level1LastDocID;
|
protected int level1LastDocID;
|
||||||
private long level1DocEndFP;
|
protected long level1DocEndFP;
|
||||||
private int level1DocCountUpto;
|
protected int level1DocCountUpto = 0;
|
||||||
private final BytesRef level1SerializedImpacts;
|
protected final BytesRef level1SerializedImpacts;
|
||||||
private final MutableImpactList level1Impacts;
|
protected final MutableImpactList level1Impacts;
|
||||||
|
|
||||||
public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
|
private BlockImpactsEnum(IntBlockTermState termState) throws IOException {
|
||||||
throws IOException {
|
this.docFreq = termState.docFreq;
|
||||||
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
this.docIn = Lucene912PostingsReader.this.docIn.clone();
|
||||||
indexHasPos =
|
this.docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
|
||||||
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
prefetchPostings(docIn, termState);
|
||||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
level0SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel0);
|
||||||
// advance()
|
level1SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel1);
|
||||||
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
|
||||||
|
level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
|
||||||
docFreq = termState.docFreq;
|
|
||||||
if (docFreq > 1) {
|
|
||||||
docIn = Lucene912PostingsReader.this.docIn.clone();
|
|
||||||
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
|
|
||||||
prefetchPostings(docIn, termState);
|
|
||||||
} else {
|
|
||||||
docIn = null;
|
|
||||||
docInUtil = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
doc = -1;
|
|
||||||
if (indexHasFreq == false) {
|
|
||||||
// Filling this buffer may not be cheap when doing primary key lookups, so we make sure to
|
|
||||||
// not fill more than `docFreq` entries.
|
|
||||||
Arrays.fill(freqBuffer, 0, Math.min(ForUtil.BLOCK_SIZE, docFreq), 1);
|
|
||||||
}
|
|
||||||
prevDocID = -1;
|
|
||||||
docCountUpto = 0;
|
|
||||||
level0LastDocID = -1;
|
|
||||||
if (docFreq < LEVEL1_NUM_DOCS) {
|
if (docFreq < LEVEL1_NUM_DOCS) {
|
||||||
level1LastDocID = NO_MORE_DOCS;
|
level1LastDocID = NO_MORE_DOCS;
|
||||||
if (docFreq > 1) {
|
if (docFreq > 1) {
|
||||||
|
@ -1286,28 +1244,14 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
level1LastDocID = -1;
|
level1LastDocID = -1;
|
||||||
level1DocEndFP = termState.docStartFP;
|
level1DocEndFP = termState.docStartFP;
|
||||||
}
|
}
|
||||||
level1DocCountUpto = 0;
|
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
||||||
docBufferUpto = BLOCK_SIZE;
|
// advance()
|
||||||
freqFP = -1;
|
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
||||||
level0SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel0);
|
|
||||||
level1SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel1);
|
|
||||||
level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
|
|
||||||
level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int freq() throws IOException {
|
public int docID() {
|
||||||
if (freqFP != -1) {
|
return doc;
|
||||||
docIn.seek(freqFP);
|
|
||||||
pforUtil.decode(docInUtil, freqBuffer);
|
|
||||||
freqFP = -1;
|
|
||||||
}
|
|
||||||
return (int) freqBuffer[docBufferUpto - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextPosition() {
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1326,8 +1270,78 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docID() {
|
public long cost() {
|
||||||
return doc;
|
return docFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final Impacts impacts =
|
||||||
|
new Impacts() {
|
||||||
|
|
||||||
|
private final ByteArrayDataInput scratch = new ByteArrayDataInput();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int numLevels() {
|
||||||
|
return level1LastDocID == NO_MORE_DOCS ? 1 : 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocIdUpTo(int level) {
|
||||||
|
if (level == 0) {
|
||||||
|
return level0LastDocID;
|
||||||
|
}
|
||||||
|
return level == 1 ? level1LastDocID : NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Impact> getImpacts(int level) {
|
||||||
|
if (level == 0 && level0LastDocID != NO_MORE_DOCS) {
|
||||||
|
return readImpacts(level0SerializedImpacts, level0Impacts);
|
||||||
|
}
|
||||||
|
if (level == 1) {
|
||||||
|
return readImpacts(level1SerializedImpacts, level1Impacts);
|
||||||
|
}
|
||||||
|
return DUMMY_IMPACTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Impact> readImpacts(BytesRef serialized, MutableImpactList impactsList) {
|
||||||
|
var scratch = this.scratch;
|
||||||
|
scratch.reset(serialized.bytes, 0, serialized.length);
|
||||||
|
Lucene912PostingsReader.readImpacts(scratch, impactsList);
|
||||||
|
return impactsList;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Impacts getImpacts() {
|
||||||
|
return impacts;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final class BlockImpactsDocsEnum extends BlockImpactsEnum {
|
||||||
|
final boolean indexHasPos;
|
||||||
|
|
||||||
|
private long freqFP;
|
||||||
|
|
||||||
|
public BlockImpactsDocsEnum(boolean indexHasPos, IntBlockTermState termState)
|
||||||
|
throws IOException {
|
||||||
|
super(termState);
|
||||||
|
this.indexHasPos = indexHasPos;
|
||||||
|
freqFP = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
if (freqFP != -1) {
|
||||||
|
docIn.seek(freqFP);
|
||||||
|
pforUtil.decode(docInUtil, freqBuffer);
|
||||||
|
freqFP = -1;
|
||||||
|
}
|
||||||
|
return (int) freqBuffer[docBufferUpto - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextPosition() {
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void refillDocs() throws IOException {
|
private void refillDocs() throws IOException {
|
||||||
|
@ -1336,15 +1350,12 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
if (left >= BLOCK_SIZE) {
|
if (left >= BLOCK_SIZE) {
|
||||||
forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer);
|
forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer);
|
||||||
|
freqFP = docIn.getFilePointer();
|
||||||
if (indexHasFreq) {
|
PForUtil.skip(docIn);
|
||||||
freqFP = docIn.getFilePointer();
|
|
||||||
PForUtil.skip(docIn);
|
|
||||||
}
|
|
||||||
docCountUpto += BLOCK_SIZE;
|
docCountUpto += BLOCK_SIZE;
|
||||||
} else {
|
} else {
|
||||||
// Read vInts:
|
// Read vInts:
|
||||||
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
|
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
|
||||||
prefixSum(docBuffer, left, prevDocID);
|
prefixSum(docBuffer, left, prevDocID);
|
||||||
docBuffer[left] = NO_MORE_DOCS;
|
docBuffer[left] = NO_MORE_DOCS;
|
||||||
freqFP = -1;
|
freqFP = -1;
|
||||||
|
@ -1364,7 +1375,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
level1DocCountUpto += LEVEL1_NUM_DOCS;
|
level1DocCountUpto += LEVEL1_NUM_DOCS;
|
||||||
|
|
||||||
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
|
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
|
||||||
level1LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level1LastDocID = NO_MORE_DOCS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1408,7 +1419,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
docIn.skipBytes(blockLength);
|
docIn.skipBytes(blockLength);
|
||||||
docCountUpto += BLOCK_SIZE;
|
docCountUpto += BLOCK_SIZE;
|
||||||
} else {
|
} else {
|
||||||
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level0LastDocID = NO_MORE_DOCS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1451,7 +1462,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
level0SerializedImpacts.length = numImpactBytes;
|
level0SerializedImpacts.length = numImpactBytes;
|
||||||
docIn.seek(skip0End);
|
docIn.seek(skip0End);
|
||||||
} else {
|
} else {
|
||||||
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level0LastDocID = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
refillDocs();
|
refillDocs();
|
||||||
|
@ -1483,88 +1494,12 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
docBufferUpto = next + 1;
|
docBufferUpto = next + 1;
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Impacts impacts =
|
|
||||||
new Impacts() {
|
|
||||||
|
|
||||||
private final ByteArrayDataInput scratch = new ByteArrayDataInput();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int numLevels() {
|
|
||||||
int numLevels = 0;
|
|
||||||
if (level0LastDocID != NO_MORE_DOCS) {
|
|
||||||
numLevels++;
|
|
||||||
}
|
|
||||||
if (level1LastDocID != NO_MORE_DOCS) {
|
|
||||||
numLevels++;
|
|
||||||
}
|
|
||||||
if (numLevels == 0) {
|
|
||||||
numLevels++;
|
|
||||||
}
|
|
||||||
return numLevels;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDocIdUpTo(int level) {
|
|
||||||
if (level0LastDocID != NO_MORE_DOCS) {
|
|
||||||
if (level == 0) {
|
|
||||||
return level0LastDocID;
|
|
||||||
}
|
|
||||||
level--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (level == 0) {
|
|
||||||
return level1LastDocID;
|
|
||||||
}
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Impact> getImpacts(int level) {
|
|
||||||
if (level0LastDocID != NO_MORE_DOCS) {
|
|
||||||
if (level == 0) {
|
|
||||||
scratch.reset(level0SerializedImpacts.bytes, 0, level0SerializedImpacts.length);
|
|
||||||
readImpacts(scratch, level0Impacts);
|
|
||||||
return level0Impacts;
|
|
||||||
}
|
|
||||||
level--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (level1LastDocID != NO_MORE_DOCS && level == 0) {
|
|
||||||
scratch.reset(level1SerializedImpacts.bytes, 0, level1SerializedImpacts.length);
|
|
||||||
readImpacts(scratch, level1Impacts);
|
|
||||||
return level1Impacts;
|
|
||||||
}
|
|
||||||
|
|
||||||
return DUMMY_IMPACTS;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Impacts getImpacts() {
|
|
||||||
return impacts;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return docFreq;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final class BlockImpactsPostingsEnum extends ImpactsEnum {
|
final class BlockImpactsPostingsEnum extends BlockImpactsEnum {
|
||||||
|
|
||||||
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
|
|
||||||
final PForUtil pforUtil = new PForUtil(new ForUtil());
|
|
||||||
|
|
||||||
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
|
|
||||||
private final long[] freqBuffer = new long[BLOCK_SIZE];
|
|
||||||
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
|
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
|
||||||
|
|
||||||
private int docBufferUpto;
|
|
||||||
private int posBufferUpto;
|
private int posBufferUpto;
|
||||||
|
|
||||||
final IndexInput docIn;
|
|
||||||
final PostingDecodingUtil docInUtil;
|
|
||||||
final IndexInput posIn;
|
final IndexInput posIn;
|
||||||
final PostingDecodingUtil posInUtil;
|
final PostingDecodingUtil posInUtil;
|
||||||
|
|
||||||
|
@ -1573,12 +1508,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
final boolean indexHasPayloads;
|
final boolean indexHasPayloads;
|
||||||
final boolean indexHasOffsetsOrPayloads;
|
final boolean indexHasOffsetsOrPayloads;
|
||||||
|
|
||||||
private final int docFreq; // number of docs in this posting list
|
|
||||||
private final long
|
private final long
|
||||||
totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
|
totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
|
||||||
private int docCountUpto; // number of docs in or before the current block
|
|
||||||
private int doc; // doc we last read
|
|
||||||
private long prevDocID; // last doc ID of the previous block
|
|
||||||
private int freq; // freq we last read
|
private int freq; // freq we last read
|
||||||
private int position; // current position
|
private int position; // current position
|
||||||
|
|
||||||
|
@ -1591,58 +1522,32 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
// decode vs vInt decode the block:
|
// decode vs vInt decode the block:
|
||||||
private final long lastPosBlockFP;
|
private final long lastPosBlockFP;
|
||||||
|
|
||||||
// true if we shallow-advanced to a new block that we have not decoded yet
|
|
||||||
private boolean needsRefilling;
|
|
||||||
|
|
||||||
// level 0 skip data
|
// level 0 skip data
|
||||||
private int level0LastDocID;
|
|
||||||
private long level0DocEndFP;
|
|
||||||
private long level0PosEndFP;
|
private long level0PosEndFP;
|
||||||
private int level0BlockPosUpto;
|
private int level0BlockPosUpto;
|
||||||
private final BytesRefBuilder level0SerializedImpacts = new BytesRefBuilder();
|
|
||||||
private final MutableImpactList level0Impacts;
|
|
||||||
// level 1 skip data
|
// level 1 skip data
|
||||||
private int level1LastDocID;
|
|
||||||
private long level1DocEndFP;
|
|
||||||
private int level1DocCountUpto;
|
|
||||||
private long level1PosEndFP;
|
private long level1PosEndFP;
|
||||||
private int level1BlockPosUpto;
|
private int level1BlockPosUpto;
|
||||||
private final BytesRefBuilder level1SerializedImpacts = new BytesRefBuilder();
|
|
||||||
private final MutableImpactList level1Impacts;
|
|
||||||
|
|
||||||
private final int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
private final int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||||
|
|
||||||
public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
|
public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
super(termState);
|
||||||
|
final IndexOptions options = fieldInfo.getIndexOptions();
|
||||||
|
indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
indexHasOffsets =
|
indexHasOffsets =
|
||||||
fieldInfo
|
options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
.getIndexOptions()
|
|
||||||
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
|
|
||||||
>= 0;
|
|
||||||
indexHasPayloads = fieldInfo.hasPayloads();
|
indexHasPayloads = fieldInfo.hasPayloads();
|
||||||
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;
|
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;
|
||||||
|
|
||||||
this.posIn = Lucene912PostingsReader.this.posIn.clone();
|
this.posIn = Lucene912PostingsReader.this.posIn.clone();
|
||||||
posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn);
|
posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn);
|
||||||
|
|
||||||
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
|
|
||||||
// advance()
|
|
||||||
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
|
|
||||||
|
|
||||||
docFreq = termState.docFreq;
|
|
||||||
// Where this term's postings start in the .pos file:
|
// Where this term's postings start in the .pos file:
|
||||||
final long posTermStartFP = termState.posStartFP;
|
final long posTermStartFP = termState.posStartFP;
|
||||||
totalTermFreq = termState.totalTermFreq;
|
totalTermFreq = termState.totalTermFreq;
|
||||||
singletonDocID = termState.singletonDocID;
|
singletonDocID = termState.singletonDocID;
|
||||||
if (docFreq > 1) {
|
|
||||||
docIn = Lucene912PostingsReader.this.docIn.clone();
|
|
||||||
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
|
|
||||||
prefetchPostings(docIn, termState);
|
|
||||||
} else {
|
|
||||||
docIn = null;
|
|
||||||
docInUtil = null;
|
|
||||||
}
|
|
||||||
posIn.seek(posTermStartFP);
|
posIn.seek(posTermStartFP);
|
||||||
level1PosEndFP = posTermStartFP;
|
level1PosEndFP = posTermStartFP;
|
||||||
level0PosEndFP = posTermStartFP;
|
level0PosEndFP = posTermStartFP;
|
||||||
|
@ -1654,28 +1559,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
} else {
|
} else {
|
||||||
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
|
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
doc = -1;
|
|
||||||
prevDocID = -1;
|
|
||||||
docCountUpto = 0;
|
|
||||||
level0LastDocID = -1;
|
|
||||||
if (docFreq < LEVEL1_NUM_DOCS) {
|
|
||||||
level1LastDocID = NO_MORE_DOCS;
|
|
||||||
if (docFreq > 1) {
|
|
||||||
docIn.seek(termState.docStartFP);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
level1LastDocID = -1;
|
|
||||||
level1DocEndFP = termState.docStartFP;
|
|
||||||
}
|
|
||||||
level1DocCountUpto = 0;
|
|
||||||
level1BlockPosUpto = 0;
|
level1BlockPosUpto = 0;
|
||||||
docBufferUpto = BLOCK_SIZE;
|
|
||||||
posBufferUpto = BLOCK_SIZE;
|
posBufferUpto = BLOCK_SIZE;
|
||||||
level0SerializedImpacts.growNoCopy(maxImpactNumBytesAtLevel0);
|
|
||||||
level1SerializedImpacts.growNoCopy(maxImpactNumBytesAtLevel1);
|
|
||||||
level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
|
|
||||||
level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1683,11 +1568,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void refillDocs() throws IOException {
|
private void refillDocs() throws IOException {
|
||||||
final int left = docFreq - docCountUpto;
|
final int left = docFreq - docCountUpto;
|
||||||
assert left >= 0;
|
assert left >= 0;
|
||||||
|
@ -1724,7 +1604,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
level1DocCountUpto += LEVEL1_NUM_DOCS;
|
level1DocCountUpto += LEVEL1_NUM_DOCS;
|
||||||
|
|
||||||
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
|
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
|
||||||
level1LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level1LastDocID = NO_MORE_DOCS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1734,8 +1614,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
long skip1EndFP = docIn.readShort() + docIn.getFilePointer();
|
long skip1EndFP = docIn.readShort() + docIn.getFilePointer();
|
||||||
int numImpactBytes = docIn.readShort();
|
int numImpactBytes = docIn.readShort();
|
||||||
if (level1LastDocID >= target) {
|
if (level1LastDocID >= target) {
|
||||||
docIn.readBytes(level1SerializedImpacts.bytes(), 0, numImpactBytes);
|
docIn.readBytes(level1SerializedImpacts.bytes, 0, numImpactBytes);
|
||||||
level1SerializedImpacts.setLength(numImpactBytes);
|
level1SerializedImpacts.length = numImpactBytes;
|
||||||
} else {
|
} else {
|
||||||
docIn.skipBytes(numImpactBytes);
|
docIn.skipBytes(numImpactBytes);
|
||||||
}
|
}
|
||||||
|
@ -1778,8 +1658,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
if (target <= level0LastDocID) {
|
if (target <= level0LastDocID) {
|
||||||
int numImpactBytes = docIn.readVInt();
|
int numImpactBytes = docIn.readVInt();
|
||||||
docIn.readBytes(level0SerializedImpacts.bytes(), 0, numImpactBytes);
|
docIn.readBytes(level0SerializedImpacts.bytes, 0, numImpactBytes);
|
||||||
level0SerializedImpacts.setLength(numImpactBytes);
|
level0SerializedImpacts.length = numImpactBytes;
|
||||||
level0PosEndFP += docIn.readVLong();
|
level0PosEndFP += docIn.readVLong();
|
||||||
level0BlockPosUpto = docIn.readByte();
|
level0BlockPosUpto = docIn.readByte();
|
||||||
if (indexHasOffsetsOrPayloads) {
|
if (indexHasOffsetsOrPayloads) {
|
||||||
|
@ -1795,7 +1675,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
docIn.seek(level0DocEndFP);
|
docIn.seek(level0DocEndFP);
|
||||||
docCountUpto += BLOCK_SIZE;
|
docCountUpto += BLOCK_SIZE;
|
||||||
} else {
|
} else {
|
||||||
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
|
level0LastDocID = NO_MORE_DOCS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1818,68 +1698,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Impacts impacts =
|
|
||||||
new Impacts() {
|
|
||||||
|
|
||||||
private final ByteArrayDataInput scratch = new ByteArrayDataInput();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int numLevels() {
|
|
||||||
int numLevels = 0;
|
|
||||||
if (level0LastDocID != NO_MORE_DOCS) {
|
|
||||||
numLevels++;
|
|
||||||
}
|
|
||||||
if (level1LastDocID != NO_MORE_DOCS) {
|
|
||||||
numLevels++;
|
|
||||||
}
|
|
||||||
if (numLevels == 0) {
|
|
||||||
numLevels++;
|
|
||||||
}
|
|
||||||
return numLevels;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDocIdUpTo(int level) {
|
|
||||||
if (level0LastDocID != NO_MORE_DOCS) {
|
|
||||||
if (level == 0) {
|
|
||||||
return level0LastDocID;
|
|
||||||
}
|
|
||||||
level--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (level1LastDocID != NO_MORE_DOCS && level == 0) {
|
|
||||||
return level1LastDocID;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Impact> getImpacts(int level) {
|
|
||||||
if (level0LastDocID != NO_MORE_DOCS) {
|
|
||||||
if (level == 0) {
|
|
||||||
scratch.reset(level0SerializedImpacts.bytes(), 0, level0SerializedImpacts.length());
|
|
||||||
readImpacts(scratch, level0Impacts);
|
|
||||||
return level0Impacts;
|
|
||||||
}
|
|
||||||
level--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (level1LastDocID != NO_MORE_DOCS && level == 0) {
|
|
||||||
scratch.reset(level1SerializedImpacts.bytes(), 0, level1SerializedImpacts.length());
|
|
||||||
readImpacts(scratch, level1Impacts);
|
|
||||||
return level1Impacts;
|
|
||||||
}
|
|
||||||
|
|
||||||
return DUMMY_IMPACTS;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Impacts getImpacts() {
|
|
||||||
return impacts;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() throws IOException {
|
public int nextDoc() throws IOException {
|
||||||
advanceShallow(doc + 1);
|
advanceShallow(doc + 1);
|
||||||
|
@ -1987,26 +1805,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
|
||||||
posPendingCount--;
|
posPendingCount--;
|
||||||
return position;
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int startOffset() {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int endOffset() {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public BytesRef getPayload() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return docFreq;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue