Dry up both the two ImpactsEnum Implementation ins Lucene912PostingsReader (#13892)

These two share a lot of code, in particular the impacts implementation is 100% identical.
We can save a lot of code and potentially some cycles for method invocations by
drying things up. The changes are just mechanical field movements with the following exceptions:

1. One of the two implementations was using a bytes ref builder, one a bytes ref for holding the
serialized impacts. The `BytesRef` variant is faster so I used that for both when extracting.
2. Some simple arithmetic simplifications around the levels that should be obvious.
3. Removed the the logic for an index without positions in `BlockImpactsPostingsEnum`, that was dead code,
we only set this thing up if there's positions.
This commit is contained in:
Armin Braun 2024-10-12 15:48:58 +02:00
parent a6a6519ee7
commit e99db4e954
1 changed files with 157 additions and 359 deletions

View File

@ -47,7 +47,6 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.internal.vectorization.PostingDecodingUtil; import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
import org.apache.lucene.internal.vectorization.VectorizationProvider; import org.apache.lucene.internal.vectorization.VectorizationProvider;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
@ -56,7 +55,6 @@ import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
/** /**
@ -315,30 +313,24 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
@Override @Override
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
throws IOException { throws IOException {
final boolean indexHasFreqs = final IndexOptions options = fieldInfo.getIndexOptions();
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
final boolean indexHasPositions = final boolean indexHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
if (state.docFreq >= BLOCK_SIZE if (state.docFreq >= BLOCK_SIZE) {
&& indexHasFreqs if (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
&& (indexHasPositions == false && (indexHasPositions == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false)) { || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false)) {
return new BlockImpactsDocsEnum(fieldInfo, (IntBlockTermState) state); return new BlockImpactsDocsEnum(indexHasPositions, (IntBlockTermState) state);
} }
final boolean indexHasOffsets = if (indexHasPositions
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) && (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0
>= 0; || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
final boolean indexHasPayloads = fieldInfo.hasPayloads(); && (fieldInfo.hasPayloads() == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
if (state.docFreq >= BLOCK_SIZE return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
&& indexHasPositions }
&& (indexHasOffsets == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
&& (indexHasPayloads == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
} }
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags)); return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
@ -378,15 +370,11 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private long freqFP; private long freqFP;
public BlockDocsEnum(FieldInfo fieldInfo) { public BlockDocsEnum(FieldInfo fieldInfo) {
this.docIn = null; final IndexOptions options = fieldInfo.getIndexOptions();
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos = indexHasPos = options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsetsOrPayloads = indexHasOffsetsOrPayloads =
fieldInfo options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0
|| fieldInfo.hasPayloads(); || fieldInfo.hasPayloads();
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance() // advance()
@ -394,17 +382,12 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
final IndexOptions options = fieldInfo.getIndexOptions();
return docIn == Lucene912PostingsReader.this.docIn return docIn == Lucene912PostingsReader.this.docIn
&& indexHasFreq && indexHasFreq == (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) && indexHasPos == (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
&& indexHasPos
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
>= 0)
&& indexHasOffsetsOrPayloads && indexHasOffsetsOrPayloads
== (fieldInfo == (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0
|| fieldInfo.hasPayloads()); || fieldInfo.hasPayloads());
} }
@ -533,7 +516,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1DocCountUpto += LEVEL1_NUM_DOCS; level1DocCountUpto += LEVEL1_NUM_DOCS;
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) { if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
level1LastDocID = DocIdSetIterator.NO_MORE_DOCS; level1LastDocID = NO_MORE_DOCS;
break; break;
} }
@ -569,7 +552,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docIn.skipBytes(readVLong15(docIn)); docIn.skipBytes(readVLong15(docIn));
docCountUpto += BLOCK_SIZE; docCountUpto += BLOCK_SIZE;
} else { } else {
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS; level0LastDocID = NO_MORE_DOCS;
break; break;
} }
} }
@ -586,7 +569,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
refillFullBlock(); refillFullBlock();
level0LastDocID = (int) docBuffer[BLOCK_SIZE - 1]; level0LastDocID = (int) docBuffer[BLOCK_SIZE - 1];
} else { } else {
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS; level0LastDocID = NO_MORE_DOCS;
refillRemainder(); refillRemainder();
} }
} }
@ -704,13 +687,10 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
public EverythingEnum(FieldInfo fieldInfo) throws IOException { public EverythingEnum(FieldInfo fieldInfo) throws IOException {
this.docIn = null; final IndexOptions options = fieldInfo.getIndexOptions();
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasOffsets = indexHasOffsets =
fieldInfo options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
indexHasPayloads = fieldInfo.hasPayloads(); indexHasPayloads = fieldInfo.hasPayloads();
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads; indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;
@ -875,7 +855,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1DocCountUpto += LEVEL1_NUM_DOCS; level1DocCountUpto += LEVEL1_NUM_DOCS;
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) { if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
level1LastDocID = DocIdSetIterator.NO_MORE_DOCS; level1LastDocID = NO_MORE_DOCS;
break; break;
} }
@ -933,7 +913,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level0BlockPayUpto = docIn.readVInt(); level0BlockPayUpto = docIn.readVInt();
} }
} else { } else {
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS; level0LastDocID = NO_MORE_DOCS;
} }
refillDocs(); refillDocs();
@ -1000,7 +980,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docIn.seek(blockEndFP); docIn.seek(blockEndFP);
docCountUpto += BLOCK_SIZE; docCountUpto += BLOCK_SIZE;
} else { } else {
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS; level0LastDocID = NO_MORE_DOCS;
break; break;
} }
} }
@ -1213,70 +1193,48 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
} }
final class BlockImpactsDocsEnum extends ImpactsEnum { private abstract class BlockImpactsEnum extends ImpactsEnum {
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(); protected final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
final PForUtil pforUtil = new PForUtil(new ForUtil()); protected final PForUtil pforUtil = new PForUtil(new ForUtil());
private final long[] docBuffer = new long[BLOCK_SIZE + 1]; protected final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE]; protected final long[] freqBuffer = new long[BLOCK_SIZE];
private int docBufferUpto; protected final int docFreq; // number of docs in this posting list
final IndexInput docIn; protected final IndexInput docIn;
final PostingDecodingUtil docInUtil; protected final PostingDecodingUtil docInUtil;
final boolean indexHasFreq;
final boolean indexHasPos;
private final int docFreq; // number of docs in this posting list protected int docCountUpto; // number of docs in or before the current block
private int docCountUpto; // number of docs in or before the current block protected int doc = -1; // doc we last read
private int doc; // doc we last read protected long prevDocID = -1; // last doc ID of the previous block
private long prevDocID; // last doc ID of the previous block protected int docBufferUpto = BLOCK_SIZE;
private long freqFP;
// true if we shallow-advanced to a new block that we have not decoded yet // true if we shallow-advanced to a new block that we have not decoded yet
private boolean needsRefilling; protected boolean needsRefilling;
// level 0 skip data // level 0 skip data
private int level0LastDocID; protected int level0LastDocID = -1;
private long level0DocEndFP; protected long level0DocEndFP;
private final BytesRef level0SerializedImpacts; protected final BytesRef level0SerializedImpacts;
private final MutableImpactList level0Impacts; protected final MutableImpactList level0Impacts;
// level 1 skip data // level 1 skip data
private int level1LastDocID; protected int level1LastDocID;
private long level1DocEndFP; protected long level1DocEndFP;
private int level1DocCountUpto; protected int level1DocCountUpto = 0;
private final BytesRef level1SerializedImpacts; protected final BytesRef level1SerializedImpacts;
private final MutableImpactList level1Impacts; protected final MutableImpactList level1Impacts;
public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState) private BlockImpactsEnum(IntBlockTermState termState) throws IOException {
throws IOException { this.docFreq = termState.docFreq;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; this.docIn = Lucene912PostingsReader.this.docIn.clone();
indexHasPos = this.docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; prefetchPostings(docIn, termState);
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in level0SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel0);
// advance() level1SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel1);
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS; level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
docFreq = termState.docFreq;
if (docFreq > 1) {
docIn = Lucene912PostingsReader.this.docIn.clone();
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
prefetchPostings(docIn, termState);
} else {
docIn = null;
docInUtil = null;
}
doc = -1;
if (indexHasFreq == false) {
// Filling this buffer may not be cheap when doing primary key lookups, so we make sure to
// not fill more than `docFreq` entries.
Arrays.fill(freqBuffer, 0, Math.min(ForUtil.BLOCK_SIZE, docFreq), 1);
}
prevDocID = -1;
docCountUpto = 0;
level0LastDocID = -1;
if (docFreq < LEVEL1_NUM_DOCS) { if (docFreq < LEVEL1_NUM_DOCS) {
level1LastDocID = NO_MORE_DOCS; level1LastDocID = NO_MORE_DOCS;
if (docFreq > 1) { if (docFreq > 1) {
@ -1286,28 +1244,14 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1LastDocID = -1; level1LastDocID = -1;
level1DocEndFP = termState.docStartFP; level1DocEndFP = termState.docStartFP;
} }
level1DocCountUpto = 0; // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
docBufferUpto = BLOCK_SIZE; // advance()
freqFP = -1; docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
level0SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel0);
level1SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel1);
level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
} }
@Override @Override
public int freq() throws IOException { public int docID() {
if (freqFP != -1) { return doc;
docIn.seek(freqFP);
pforUtil.decode(docInUtil, freqBuffer);
freqFP = -1;
}
return (int) freqBuffer[docBufferUpto - 1];
}
@Override
public int nextPosition() {
return -1;
} }
@Override @Override
@ -1326,8 +1270,78 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
@Override @Override
public int docID() { public long cost() {
return doc; return docFreq;
}
private final Impacts impacts =
new Impacts() {
private final ByteArrayDataInput scratch = new ByteArrayDataInput();
@Override
public int numLevels() {
return level1LastDocID == NO_MORE_DOCS ? 1 : 2;
}
@Override
public int getDocIdUpTo(int level) {
if (level == 0) {
return level0LastDocID;
}
return level == 1 ? level1LastDocID : NO_MORE_DOCS;
}
@Override
public List<Impact> getImpacts(int level) {
if (level == 0 && level0LastDocID != NO_MORE_DOCS) {
return readImpacts(level0SerializedImpacts, level0Impacts);
}
if (level == 1) {
return readImpacts(level1SerializedImpacts, level1Impacts);
}
return DUMMY_IMPACTS;
}
private List<Impact> readImpacts(BytesRef serialized, MutableImpactList impactsList) {
var scratch = this.scratch;
scratch.reset(serialized.bytes, 0, serialized.length);
Lucene912PostingsReader.readImpacts(scratch, impactsList);
return impactsList;
}
};
@Override
public Impacts getImpacts() {
return impacts;
}
}
final class BlockImpactsDocsEnum extends BlockImpactsEnum {
final boolean indexHasPos;
private long freqFP;
public BlockImpactsDocsEnum(boolean indexHasPos, IntBlockTermState termState)
throws IOException {
super(termState);
this.indexHasPos = indexHasPos;
freqFP = -1;
}
@Override
public int freq() throws IOException {
if (freqFP != -1) {
docIn.seek(freqFP);
pforUtil.decode(docInUtil, freqBuffer);
freqFP = -1;
}
return (int) freqBuffer[docBufferUpto - 1];
}
@Override
public int nextPosition() {
return -1;
} }
private void refillDocs() throws IOException { private void refillDocs() throws IOException {
@ -1336,15 +1350,12 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
if (left >= BLOCK_SIZE) { if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer); forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer);
freqFP = docIn.getFilePointer();
if (indexHasFreq) { PForUtil.skip(docIn);
freqFP = docIn.getFilePointer();
PForUtil.skip(docIn);
}
docCountUpto += BLOCK_SIZE; docCountUpto += BLOCK_SIZE;
} else { } else {
// Read vInts: // Read vInts:
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true); PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
prefixSum(docBuffer, left, prevDocID); prefixSum(docBuffer, left, prevDocID);
docBuffer[left] = NO_MORE_DOCS; docBuffer[left] = NO_MORE_DOCS;
freqFP = -1; freqFP = -1;
@ -1364,7 +1375,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1DocCountUpto += LEVEL1_NUM_DOCS; level1DocCountUpto += LEVEL1_NUM_DOCS;
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) { if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
level1LastDocID = DocIdSetIterator.NO_MORE_DOCS; level1LastDocID = NO_MORE_DOCS;
break; break;
} }
@ -1408,7 +1419,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docIn.skipBytes(blockLength); docIn.skipBytes(blockLength);
docCountUpto += BLOCK_SIZE; docCountUpto += BLOCK_SIZE;
} else { } else {
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS; level0LastDocID = NO_MORE_DOCS;
break; break;
} }
} }
@ -1451,7 +1462,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level0SerializedImpacts.length = numImpactBytes; level0SerializedImpacts.length = numImpactBytes;
docIn.seek(skip0End); docIn.seek(skip0End);
} else { } else {
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS; level0LastDocID = NO_MORE_DOCS;
} }
refillDocs(); refillDocs();
@ -1483,88 +1494,12 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docBufferUpto = next + 1; docBufferUpto = next + 1;
return doc; return doc;
} }
private final Impacts impacts =
new Impacts() {
private final ByteArrayDataInput scratch = new ByteArrayDataInput();
@Override
public int numLevels() {
int numLevels = 0;
if (level0LastDocID != NO_MORE_DOCS) {
numLevels++;
}
if (level1LastDocID != NO_MORE_DOCS) {
numLevels++;
}
if (numLevels == 0) {
numLevels++;
}
return numLevels;
}
@Override
public int getDocIdUpTo(int level) {
if (level0LastDocID != NO_MORE_DOCS) {
if (level == 0) {
return level0LastDocID;
}
level--;
}
if (level == 0) {
return level1LastDocID;
}
return NO_MORE_DOCS;
}
@Override
public List<Impact> getImpacts(int level) {
if (level0LastDocID != NO_MORE_DOCS) {
if (level == 0) {
scratch.reset(level0SerializedImpacts.bytes, 0, level0SerializedImpacts.length);
readImpacts(scratch, level0Impacts);
return level0Impacts;
}
level--;
}
if (level1LastDocID != NO_MORE_DOCS && level == 0) {
scratch.reset(level1SerializedImpacts.bytes, 0, level1SerializedImpacts.length);
readImpacts(scratch, level1Impacts);
return level1Impacts;
}
return DUMMY_IMPACTS;
}
};
@Override
public Impacts getImpacts() {
return impacts;
}
@Override
public long cost() {
return docFreq;
}
} }
final class BlockImpactsPostingsEnum extends ImpactsEnum { final class BlockImpactsPostingsEnum extends BlockImpactsEnum {
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
final PForUtil pforUtil = new PForUtil(new ForUtil());
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE];
private final long[] posDeltaBuffer = new long[BLOCK_SIZE]; private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
private int docBufferUpto;
private int posBufferUpto; private int posBufferUpto;
final IndexInput docIn;
final PostingDecodingUtil docInUtil;
final IndexInput posIn; final IndexInput posIn;
final PostingDecodingUtil posInUtil; final PostingDecodingUtil posInUtil;
@ -1573,12 +1508,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
final boolean indexHasPayloads; final boolean indexHasPayloads;
final boolean indexHasOffsetsOrPayloads; final boolean indexHasOffsetsOrPayloads;
private final int docFreq; // number of docs in this posting list
private final long private final long
totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted) totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
private int docCountUpto; // number of docs in or before the current block
private int doc; // doc we last read
private long prevDocID; // last doc ID of the previous block
private int freq; // freq we last read private int freq; // freq we last read
private int position; // current position private int position; // current position
@ -1591,58 +1522,32 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
// decode vs vInt decode the block: // decode vs vInt decode the block:
private final long lastPosBlockFP; private final long lastPosBlockFP;
// true if we shallow-advanced to a new block that we have not decoded yet
private boolean needsRefilling;
// level 0 skip data // level 0 skip data
private int level0LastDocID;
private long level0DocEndFP;
private long level0PosEndFP; private long level0PosEndFP;
private int level0BlockPosUpto; private int level0BlockPosUpto;
private final BytesRefBuilder level0SerializedImpacts = new BytesRefBuilder();
private final MutableImpactList level0Impacts;
// level 1 skip data // level 1 skip data
private int level1LastDocID;
private long level1DocEndFP;
private int level1DocCountUpto;
private long level1PosEndFP; private long level1PosEndFP;
private int level1BlockPosUpto; private int level1BlockPosUpto;
private final BytesRefBuilder level1SerializedImpacts = new BytesRefBuilder();
private final MutableImpactList level1Impacts;
private final int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 private final int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState) public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
throws IOException { throws IOException {
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; super(termState);
final IndexOptions options = fieldInfo.getIndexOptions();
indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasOffsets = indexHasOffsets =
fieldInfo options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
indexHasPayloads = fieldInfo.hasPayloads(); indexHasPayloads = fieldInfo.hasPayloads();
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads; indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;
this.posIn = Lucene912PostingsReader.this.posIn.clone(); this.posIn = Lucene912PostingsReader.this.posIn.clone();
posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn); posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn);
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
docFreq = termState.docFreq;
// Where this term's postings start in the .pos file: // Where this term's postings start in the .pos file:
final long posTermStartFP = termState.posStartFP; final long posTermStartFP = termState.posStartFP;
totalTermFreq = termState.totalTermFreq; totalTermFreq = termState.totalTermFreq;
singletonDocID = termState.singletonDocID; singletonDocID = termState.singletonDocID;
if (docFreq > 1) {
docIn = Lucene912PostingsReader.this.docIn.clone();
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
prefetchPostings(docIn, termState);
} else {
docIn = null;
docInUtil = null;
}
posIn.seek(posTermStartFP); posIn.seek(posTermStartFP);
level1PosEndFP = posTermStartFP; level1PosEndFP = posTermStartFP;
level0PosEndFP = posTermStartFP; level0PosEndFP = posTermStartFP;
@ -1654,28 +1559,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} else { } else {
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
} }
doc = -1;
prevDocID = -1;
docCountUpto = 0;
level0LastDocID = -1;
if (docFreq < LEVEL1_NUM_DOCS) {
level1LastDocID = NO_MORE_DOCS;
if (docFreq > 1) {
docIn.seek(termState.docStartFP);
}
} else {
level1LastDocID = -1;
level1DocEndFP = termState.docStartFP;
}
level1DocCountUpto = 0;
level1BlockPosUpto = 0; level1BlockPosUpto = 0;
docBufferUpto = BLOCK_SIZE;
posBufferUpto = BLOCK_SIZE; posBufferUpto = BLOCK_SIZE;
level0SerializedImpacts.growNoCopy(maxImpactNumBytesAtLevel0);
level1SerializedImpacts.growNoCopy(maxImpactNumBytesAtLevel1);
level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
} }
@Override @Override
@ -1683,11 +1568,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
return freq; return freq;
} }
@Override
public int docID() {
return doc;
}
private void refillDocs() throws IOException { private void refillDocs() throws IOException {
final int left = docFreq - docCountUpto; final int left = docFreq - docCountUpto;
assert left >= 0; assert left >= 0;
@ -1724,7 +1604,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1DocCountUpto += LEVEL1_NUM_DOCS; level1DocCountUpto += LEVEL1_NUM_DOCS;
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) { if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
level1LastDocID = DocIdSetIterator.NO_MORE_DOCS; level1LastDocID = NO_MORE_DOCS;
break; break;
} }
@ -1734,8 +1614,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
long skip1EndFP = docIn.readShort() + docIn.getFilePointer(); long skip1EndFP = docIn.readShort() + docIn.getFilePointer();
int numImpactBytes = docIn.readShort(); int numImpactBytes = docIn.readShort();
if (level1LastDocID >= target) { if (level1LastDocID >= target) {
docIn.readBytes(level1SerializedImpacts.bytes(), 0, numImpactBytes); docIn.readBytes(level1SerializedImpacts.bytes, 0, numImpactBytes);
level1SerializedImpacts.setLength(numImpactBytes); level1SerializedImpacts.length = numImpactBytes;
} else { } else {
docIn.skipBytes(numImpactBytes); docIn.skipBytes(numImpactBytes);
} }
@ -1778,8 +1658,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
if (target <= level0LastDocID) { if (target <= level0LastDocID) {
int numImpactBytes = docIn.readVInt(); int numImpactBytes = docIn.readVInt();
docIn.readBytes(level0SerializedImpacts.bytes(), 0, numImpactBytes); docIn.readBytes(level0SerializedImpacts.bytes, 0, numImpactBytes);
level0SerializedImpacts.setLength(numImpactBytes); level0SerializedImpacts.length = numImpactBytes;
level0PosEndFP += docIn.readVLong(); level0PosEndFP += docIn.readVLong();
level0BlockPosUpto = docIn.readByte(); level0BlockPosUpto = docIn.readByte();
if (indexHasOffsetsOrPayloads) { if (indexHasOffsetsOrPayloads) {
@ -1795,7 +1675,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docIn.seek(level0DocEndFP); docIn.seek(level0DocEndFP);
docCountUpto += BLOCK_SIZE; docCountUpto += BLOCK_SIZE;
} else { } else {
level0LastDocID = DocIdSetIterator.NO_MORE_DOCS; level0LastDocID = NO_MORE_DOCS;
break; break;
} }
} }
@ -1818,68 +1698,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
} }
private final Impacts impacts =
new Impacts() {
private final ByteArrayDataInput scratch = new ByteArrayDataInput();
@Override
public int numLevels() {
int numLevels = 0;
if (level0LastDocID != NO_MORE_DOCS) {
numLevels++;
}
if (level1LastDocID != NO_MORE_DOCS) {
numLevels++;
}
if (numLevels == 0) {
numLevels++;
}
return numLevels;
}
@Override
public int getDocIdUpTo(int level) {
if (level0LastDocID != NO_MORE_DOCS) {
if (level == 0) {
return level0LastDocID;
}
level--;
}
if (level1LastDocID != NO_MORE_DOCS && level == 0) {
return level1LastDocID;
}
return NO_MORE_DOCS;
}
@Override
public List<Impact> getImpacts(int level) {
if (level0LastDocID != NO_MORE_DOCS) {
if (level == 0) {
scratch.reset(level0SerializedImpacts.bytes(), 0, level0SerializedImpacts.length());
readImpacts(scratch, level0Impacts);
return level0Impacts;
}
level--;
}
if (level1LastDocID != NO_MORE_DOCS && level == 0) {
scratch.reset(level1SerializedImpacts.bytes(), 0, level1SerializedImpacts.length());
readImpacts(scratch, level1Impacts);
return level1Impacts;
}
return DUMMY_IMPACTS;
}
};
@Override
public Impacts getImpacts() {
return impacts;
}
@Override @Override
public int nextDoc() throws IOException { public int nextDoc() throws IOException {
advanceShallow(doc + 1); advanceShallow(doc + 1);
@ -1987,26 +1805,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
posPendingCount--; posPendingCount--;
return position; return position;
} }
@Override
public int startOffset() {
return -1;
}
@Override
public int endOffset() {
return -1;
}
@Override
public BytesRef getPayload() {
return null;
}
@Override
public long cost() {
return docFreq;
}
} }
/** /**