Misc cleanups postings codec (#13862)

Removing some obvious dead code, turning some fields into locals that don't need to be fields, making things static and deduplicating duplicate "scratch" field.
This commit is contained in:
Armin Braun 2024-10-08 19:01:11 +02:00
parent 7c6237a912
commit ee70793fa8
8 changed files with 82 additions and 149 deletions

View File

@ -1,4 +1,4 @@
{ {
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java": "5115b12ac31537ce31d73c0a279df92060749a3a", "lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java": "f561578ccb6a95364bb62c5ed86b38ff0b4a009d",
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py": "db6154406e68b80d2c90116b5d0bfa9ba220762a" "lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py": "eea1a71be9da8a13fdd979354dc4a8c6edf21be1"
} }

View File

@ -23,7 +23,6 @@ import static org.apache.lucene.codecs.lucene912.ForUtil.*;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.internal.vectorization.PostingDecodingUtil; import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
/** /**
@ -282,11 +281,6 @@ public final class ForDeltaUtil {
} }
} }
void skip(IndexInput in) throws IOException {
final int bitsPerValue = Byte.toUnsignedInt(in.readByte());
in.skipBytes(numBytes(bitsPerValue));
}
/** Delta-decode 128 integers into {@code longs}. */ /** Delta-decode 128 integers into {@code longs}. */
void decodeAndPrefixSum(int bitsPerValue, PostingDecodingUtil pdu, long base, long[] longs) void decodeAndPrefixSum(int bitsPerValue, PostingDecodingUtil pdu, long base, long[] longs)
throws IOException { throws IOException {

View File

@ -77,8 +77,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private final int maxNumImpactsAtLevel1; private final int maxNumImpactsAtLevel1;
private final int maxImpactNumBytesAtLevel1; private final int maxImpactNumBytesAtLevel1;
private final int version;
/** Sole constructor. */ /** Sole constructor. */
public Lucene912PostingsReader(SegmentReadState state) throws IOException { public Lucene912PostingsReader(SegmentReadState state) throws IOException {
String metaName = String metaName =
@ -87,6 +85,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
final long expectedDocFileLength, expectedPosFileLength, expectedPayFileLength; final long expectedDocFileLength, expectedPosFileLength, expectedPayFileLength;
ChecksumIndexInput metaIn = null; ChecksumIndexInput metaIn = null;
boolean success = false; boolean success = false;
int version;
try { try {
metaIn = state.directory.openChecksumInput(metaName); metaIn = state.directory.openChecksumInput(metaName);
version = version =
@ -236,13 +235,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException { throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState; final IntBlockTermState termState = (IntBlockTermState) _termState;
final boolean fieldHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean fieldHasOffsets =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
final boolean fieldHasPayloads = fieldInfo.hasPayloads();
if (absolute) { if (absolute) {
termState.docStartFP = 0; termState.docStartFP = 0;
termState.posStartFP = 0; termState.posStartFP = 0;
@ -263,9 +255,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1); termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1);
} }
if (fieldHasPositions) { if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
termState.posStartFP += in.readVLong(); termState.posStartFP += in.readVLong();
if (fieldHasOffsets || fieldHasPayloads) { if (fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0
|| fieldInfo.hasPayloads()) {
termState.payStartFP += in.readVLong(); termState.payStartFP += in.readVLong();
} }
if (termState.totalTermFreq > BLOCK_SIZE) { if (termState.totalTermFreq > BLOCK_SIZE) {
@ -344,17 +340,14 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
final class BlockDocsEnum extends PostingsEnum { final class BlockDocsEnum extends PostingsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(); final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
final PForUtil pforUtil = new PForUtil(forUtil); final PForUtil pforUtil = new PForUtil(new ForUtil());
private final long[] docBuffer = new long[BLOCK_SIZE + 1]; private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE]; private final long[] freqBuffer = new long[BLOCK_SIZE];
private int docBufferUpto; private int docBufferUpto;
final IndexInput startDocIn;
IndexInput docIn; IndexInput docIn;
PostingDecodingUtil docInUtil; PostingDecodingUtil docInUtil;
final boolean indexHasFreq; final boolean indexHasFreq;
@ -378,8 +371,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
private long freqFP; private long freqFP;
public BlockDocsEnum(FieldInfo fieldInfo) throws IOException { public BlockDocsEnum(FieldInfo fieldInfo) {
this.startDocIn = Lucene912PostingsReader.this.docIn;
this.docIn = null; this.docIn = null;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos = indexHasPos =
@ -396,7 +388,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
return docIn == startDocIn return docIn == Lucene912PostingsReader.this.docIn
&& indexHasFreq && indexHasFreq
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
&& indexHasPos && indexHasPos
@ -417,7 +409,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
if (docFreq > 1) { if (docFreq > 1) {
if (docIn == null) { if (docIn == null) {
// lazy init // lazy init
docIn = startDocIn.clone(); docIn = Lucene912PostingsReader.this.docIn.clone();
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn); docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
} }
prefetchPostings(docIn, termState); prefetchPostings(docIn, termState);
@ -460,22 +452,22 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
@Override @Override
public int nextPosition() throws IOException { public int nextPosition() {
return -1; return -1;
} }
@Override @Override
public int startOffset() throws IOException { public int startOffset() {
return -1; return -1;
} }
@Override @Override
public int endOffset() throws IOException { public int endOffset() {
return -1; return -1;
} }
@Override @Override
public BytesRef getPayload() throws IOException { public BytesRef getPayload() {
return null; return null;
} }
@ -493,7 +485,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
if (needsFreq) { if (needsFreq) {
freqFP = docIn.getFilePointer(); freqFP = docIn.getFilePointer();
} }
pforUtil.skip(docIn); PForUtil.skip(docIn);
} }
docCountUpto += BLOCK_SIZE; docCountUpto += BLOCK_SIZE;
prevDocID = docBuffer[BLOCK_SIZE - 1]; prevDocID = docBuffer[BLOCK_SIZE - 1];
@ -629,9 +621,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
final class EverythingEnum extends PostingsEnum { final class EverythingEnum extends PostingsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(); final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
final PForUtil pforUtil = new PForUtil(forUtil); final PForUtil pforUtil = new PForUtil(new ForUtil());
private final long[] docBuffer = new long[BLOCK_SIZE + 1]; private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE + 1]; private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
@ -652,8 +643,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private int docBufferUpto; private int docBufferUpto;
private int posBufferUpto; private int posBufferUpto;
final IndexInput startDocIn;
IndexInput docIn; IndexInput docIn;
PostingDecodingUtil docInUtil; PostingDecodingUtil docInUtil;
final IndexInput posIn; final IndexInput posIn;
@ -663,7 +652,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
final BytesRef payload; final BytesRef payload;
final boolean indexHasFreq; final boolean indexHasFreq;
final boolean indexHasPos;
final boolean indexHasOffsets; final boolean indexHasOffsets;
final boolean indexHasPayloads; final boolean indexHasPayloads;
final boolean indexHasOffsetsOrPayloads; final boolean indexHasOffsetsOrPayloads;
@ -680,13 +668,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
// skip these to "catch up": // skip these to "catch up":
private long posPendingCount; private long posPendingCount;
// Where this term's postings start in the .pos file:
private long posTermStartFP;
// Where this term's payloads/offsets start in the .pay
// file:
private long payTermStartFP;
// File pointer where the last (vInt encoded) pos delta // File pointer where the last (vInt encoded) pos delta
// block is. We need this to know whether to bulk // block is. We need this to know whether to bulk
// decode vs vInt decode the block: // decode vs vInt decode the block:
@ -713,11 +694,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
public EverythingEnum(FieldInfo fieldInfo) throws IOException { public EverythingEnum(FieldInfo fieldInfo) throws IOException {
this.startDocIn = Lucene912PostingsReader.this.docIn;
this.docIn = null; this.docIn = null;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = indexHasOffsets =
fieldInfo fieldInfo
.getIndexOptions() .getIndexOptions()
@ -761,7 +739,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
return docIn == startDocIn return docIn == Lucene912PostingsReader.this.docIn
&& indexHasOffsets && indexHasOffsets
== (fieldInfo == (fieldInfo
.getIndexOptions() .getIndexOptions()
@ -772,14 +750,17 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException { public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
docFreq = termState.docFreq; docFreq = termState.docFreq;
posTermStartFP = termState.posStartFP; // Where this term's postings start in the .pos file:
payTermStartFP = termState.payStartFP; final long posTermStartFP = termState.posStartFP;
// Where this term's payloads/offsets start in the .pay
// file:
final long payTermStartFP = termState.payStartFP;
totalTermFreq = termState.totalTermFreq; totalTermFreq = termState.totalTermFreq;
singletonDocID = termState.singletonDocID; singletonDocID = termState.singletonDocID;
if (docFreq > 1) { if (docFreq > 1) {
if (docIn == null) { if (docIn == null) {
// lazy init // lazy init
docIn = startDocIn.clone(); docIn = Lucene912PostingsReader.this.docIn.clone();
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn); docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
} }
prefetchPostings(docIn, termState); prefetchPostings(docIn, termState);
@ -829,7 +810,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
@Override @Override
public int freq() throws IOException { public int freq() {
return freq; return freq;
} }
@ -1054,11 +1035,11 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
toSkip -= leftInBlock; toSkip -= leftInBlock;
while (toSkip >= BLOCK_SIZE) { while (toSkip >= BLOCK_SIZE) {
assert posIn.getFilePointer() != lastPosBlockFP; assert posIn.getFilePointer() != lastPosBlockFP;
pforUtil.skip(posIn); PForUtil.skip(posIn);
if (indexHasPayloads) { if (indexHasPayloads) {
// Skip payloadLength block: // Skip payloadLength block:
pforUtil.skip(payIn); PForUtil.skip(payIn);
// Skip payloadBytes block: // Skip payloadBytes block:
int numBytes = payIn.readVInt(); int numBytes = payIn.readVInt();
@ -1066,14 +1047,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
if (indexHasOffsets) { if (indexHasOffsets) {
pforUtil.skip(payIn); PForUtil.skip(payIn);
pforUtil.skip(payIn); PForUtil.skip(payIn);
} }
toSkip -= BLOCK_SIZE; toSkip -= BLOCK_SIZE;
} }
refillPositions(); refillPositions();
payloadByteUpto = 0; payloadByteUpto = 0;
posBufferUpto = 0;
final int toSkipInt = (int) toSkip; final int toSkipInt = (int) toSkip;
if (indexHasPayloads) { if (indexHasPayloads) {
for (int i = 0; i < toSkipInt; ++i) { for (int i = 0; i < toSkipInt; ++i) {
@ -1137,7 +1117,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} else { } else {
// this works, because when writing a vint block we always force the first length to be // this works, because when writing a vint block we always force the first length to be
// written // written
pforUtil.skip(payIn); // skip over lengths PForUtil.skip(payIn); // skip over lengths
int numBytes = payIn.readVInt(); // read length of payloadBytes int numBytes = payIn.readVInt(); // read length of payloadBytes
payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
} }
@ -1151,8 +1131,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} else { } else {
// this works, because when writing a vint block we always force the first length to be // this works, because when writing a vint block we always force the first length to be
// written // written
pforUtil.skip(payIn); // skip over starts PForUtil.skip(payIn); // skip over starts
pforUtil.skip(payIn); // skip over lengths PForUtil.skip(payIn); // skip over lengths
} }
} }
} }
@ -1219,24 +1199,20 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
final class BlockImpactsDocsEnum extends ImpactsEnum { final class BlockImpactsDocsEnum extends ImpactsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(); final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
final PForUtil pforUtil = new PForUtil(forUtil); final PForUtil pforUtil = new PForUtil(new ForUtil());
private final long[] docBuffer = new long[BLOCK_SIZE + 1]; private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE]; private final long[] freqBuffer = new long[BLOCK_SIZE];
private int docBufferUpto; private int docBufferUpto;
final IndexInput startDocIn;
final IndexInput docIn; final IndexInput docIn;
final PostingDecodingUtil docInUtil; final PostingDecodingUtil docInUtil;
final boolean indexHasFreq; final boolean indexHasFreq;
final boolean indexHasPos; final boolean indexHasPos;
final boolean indexHasOffsetsOrPayloads;
private int docFreq; // number of docs in this posting list private final int docFreq; // number of docs in this posting list
private int docCountUpto; // number of docs in or before the current block private int docCountUpto; // number of docs in or before the current block
private int doc; // doc we last read private int doc; // doc we last read
private long prevDocID; // last doc ID of the previous block private long prevDocID; // last doc ID of the previous block
@ -1245,39 +1221,32 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
// true if we shallow-advanced to a new block that we have not decoded yet // true if we shallow-advanced to a new block that we have not decoded yet
private boolean needsRefilling; private boolean needsRefilling;
private final ByteArrayDataInput scratch = new ByteArrayDataInput();
// level 0 skip data // level 0 skip data
private int level0LastDocID; private int level0LastDocID;
private long level0DocEndFP; private long level0DocEndFP;
private final BytesRef level0SerializedImpacts; private final BytesRef level0SerializedImpacts;
private final ByteArrayDataInput level0SerializedImpactsIn = new ByteArrayDataInput();
private final MutableImpactList level0Impacts; private final MutableImpactList level0Impacts;
// level 1 skip data // level 1 skip data
private int level1LastDocID; private int level1LastDocID;
private long level1DocEndFP; private long level1DocEndFP;
private int level1DocCountUpto; private int level1DocCountUpto;
private final BytesRef level1SerializedImpacts; private final BytesRef level1SerializedImpacts;
private final ByteArrayDataInput level1SerializedImpactsIn = new ByteArrayDataInput();
private final MutableImpactList level1Impacts; private final MutableImpactList level1Impacts;
public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState) public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
throws IOException { throws IOException {
this.startDocIn = Lucene912PostingsReader.this.docIn;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos = indexHasPos =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsetsOrPayloads =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0
|| fieldInfo.hasPayloads();
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance() // advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS; docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
docFreq = termState.docFreq; docFreq = termState.docFreq;
if (docFreq > 1) { if (docFreq > 1) {
docIn = startDocIn.clone(); docIn = Lucene912PostingsReader.this.docIn.clone();
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn); docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
prefetchPostings(docIn, termState); prefetchPostings(docIn, termState);
} else { } else {
@ -1323,22 +1292,22 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
@Override @Override
public int nextPosition() throws IOException { public int nextPosition() {
return -1; return -1;
} }
@Override @Override
public int startOffset() throws IOException { public int startOffset() {
return -1; return -1;
} }
@Override @Override
public int endOffset() throws IOException { public int endOffset() {
return -1; return -1;
} }
@Override @Override
public BytesRef getPayload() throws IOException { public BytesRef getPayload() {
return null; return null;
} }
@ -1356,7 +1325,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
if (indexHasFreq) { if (indexHasFreq) {
freqFP = docIn.getFilePointer(); freqFP = docIn.getFilePointer();
pforUtil.skip(docIn); PForUtil.skip(docIn);
} }
docCountUpto += BLOCK_SIZE; docCountUpto += BLOCK_SIZE;
} else { } else {
@ -1502,7 +1471,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
@Override @Override
public Impacts getImpacts() throws IOException { public Impacts getImpacts() {
return new Impacts() { return new Impacts() {
@Override @Override
@ -1529,13 +1498,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level--; level--;
} }
if (level1LastDocID != NO_MORE_DOCS) {
if (level == 0) { if (level == 0) {
return level1LastDocID; return level1LastDocID;
} }
level--;
}
return NO_MORE_DOCS; return NO_MORE_DOCS;
} }
@ -1543,23 +1508,18 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
public List<Impact> getImpacts(int level) { public List<Impact> getImpacts(int level) {
if (level0LastDocID != NO_MORE_DOCS) { if (level0LastDocID != NO_MORE_DOCS) {
if (level == 0) { if (level == 0) {
level0SerializedImpactsIn.reset( scratch.reset(level0SerializedImpacts.bytes, 0, level0SerializedImpacts.length);
level0SerializedImpacts.bytes, 0, level0SerializedImpacts.length); readImpacts(scratch, level0Impacts);
readImpacts(level0SerializedImpactsIn, level0Impacts);
return level0Impacts; return level0Impacts;
} }
level--; level--;
} }
if (level1LastDocID != NO_MORE_DOCS) { if (level1LastDocID != NO_MORE_DOCS && level == 0) {
if (level == 0) { scratch.reset(level1SerializedImpacts.bytes, 0, level1SerializedImpacts.length);
level1SerializedImpactsIn.reset( readImpacts(scratch, level1Impacts);
level1SerializedImpacts.bytes, 0, level1SerializedImpacts.length);
readImpacts(level1SerializedImpactsIn, level1Impacts);
return level1Impacts; return level1Impacts;
} }
level--;
}
return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L)); return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
} }
@ -1574,9 +1534,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
final class BlockImpactsPostingsEnum extends ImpactsEnum { final class BlockImpactsPostingsEnum extends ImpactsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(); final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
final PForUtil pforUtil = new PForUtil(forUtil); final PForUtil pforUtil = new PForUtil(new ForUtil());
private final long[] docBuffer = new long[BLOCK_SIZE + 1]; private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE]; private final long[] freqBuffer = new long[BLOCK_SIZE];
@ -1585,21 +1544,19 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private int docBufferUpto; private int docBufferUpto;
private int posBufferUpto; private int posBufferUpto;
final IndexInput startDocIn;
final IndexInput docIn; final IndexInput docIn;
final PostingDecodingUtil docInUtil; final PostingDecodingUtil docInUtil;
final IndexInput posIn; final IndexInput posIn;
final PostingDecodingUtil posInUtil; final PostingDecodingUtil posInUtil;
final boolean indexHasFreq; final boolean indexHasFreq;
final boolean indexHasPos;
final boolean indexHasOffsets; final boolean indexHasOffsets;
final boolean indexHasPayloads; final boolean indexHasPayloads;
final boolean indexHasOffsetsOrPayloads; final boolean indexHasOffsetsOrPayloads;
private int docFreq; // number of docs in this posting list private final int docFreq; // number of docs in this posting list
private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted) private final long
totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
private int docCountUpto; // number of docs in or before the current block private int docCountUpto; // number of docs in or before the current block
private int doc; // doc we last read private int doc; // doc we last read
private long prevDocID; // last doc ID of the previous block private long prevDocID; // last doc ID of the previous block
@ -1610,24 +1567,22 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
// skip these to "catch up": // skip these to "catch up":
private long posPendingCount; private long posPendingCount;
// Where this term's postings start in the .pos file:
private long posTermStartFP;
// File pointer where the last (vInt encoded) pos delta // File pointer where the last (vInt encoded) pos delta
// block is. We need this to know whether to bulk // block is. We need this to know whether to bulk
// decode vs vInt decode the block: // decode vs vInt decode the block:
private long lastPosBlockFP; private final long lastPosBlockFP;
// true if we shallow-advanced to a new block that we have not decoded yet // true if we shallow-advanced to a new block that we have not decoded yet
private boolean needsRefilling; private boolean needsRefilling;
private final ByteArrayDataInput scratch = new ByteArrayDataInput();
// level 0 skip data // level 0 skip data
private int level0LastDocID; private int level0LastDocID;
private long level0DocEndFP; private long level0DocEndFP;
private long level0PosEndFP; private long level0PosEndFP;
private int level0BlockPosUpto; private int level0BlockPosUpto;
private final BytesRefBuilder level0SerializedImpacts = new BytesRefBuilder(); private final BytesRefBuilder level0SerializedImpacts = new BytesRefBuilder();
private final ByteArrayDataInput level0SerializedImpactsIn = new ByteArrayDataInput();
private final MutableImpactList level0Impacts; private final MutableImpactList level0Impacts;
// level 1 skip data // level 1 skip data
private int level1LastDocID; private int level1LastDocID;
@ -1636,17 +1591,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private long level1PosEndFP; private long level1PosEndFP;
private int level1BlockPosUpto; private int level1BlockPosUpto;
private final BytesRefBuilder level1SerializedImpacts = new BytesRefBuilder(); private final BytesRefBuilder level1SerializedImpacts = new BytesRefBuilder();
private final ByteArrayDataInput level1SerializedImpactsIn = new ByteArrayDataInput();
private final MutableImpactList level1Impacts; private final MutableImpactList level1Impacts;
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 private final int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState) public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
throws IOException { throws IOException {
this.startDocIn = Lucene912PostingsReader.this.docIn;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = indexHasOffsets =
fieldInfo fieldInfo
.getIndexOptions() .getIndexOptions()
@ -1663,11 +1614,12 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS; docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
docFreq = termState.docFreq; docFreq = termState.docFreq;
posTermStartFP = termState.posStartFP; // Where this term's postings start in the .pos file:
final long posTermStartFP = termState.posStartFP;
totalTermFreq = termState.totalTermFreq; totalTermFreq = termState.totalTermFreq;
singletonDocID = termState.singletonDocID; singletonDocID = termState.singletonDocID;
if (docFreq > 1) { if (docFreq > 1) {
docIn = startDocIn.clone(); docIn = Lucene912PostingsReader.this.docIn.clone();
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn); docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
prefetchPostings(docIn, termState); prefetchPostings(docIn, termState);
} else { } else {
@ -1710,7 +1662,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
@Override @Override
public int freq() throws IOException { public int freq() {
return freq; return freq;
} }
@ -1850,7 +1802,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
@Override @Override
public Impacts getImpacts() throws IOException { public Impacts getImpacts() {
return new Impacts() { return new Impacts() {
@Override @Override
@ -1877,12 +1829,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level--; level--;
} }
if (level1LastDocID != NO_MORE_DOCS) { if (level1LastDocID != NO_MORE_DOCS && level == 0) {
if (level == 0) {
return level1LastDocID; return level1LastDocID;
} }
level--;
}
return NO_MORE_DOCS; return NO_MORE_DOCS;
} }
@ -1891,23 +1840,18 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
public List<Impact> getImpacts(int level) { public List<Impact> getImpacts(int level) {
if (level0LastDocID != NO_MORE_DOCS) { if (level0LastDocID != NO_MORE_DOCS) {
if (level == 0) { if (level == 0) {
level0SerializedImpactsIn.reset( scratch.reset(level0SerializedImpacts.bytes(), 0, level0SerializedImpacts.length());
level0SerializedImpacts.bytes(), 0, level0SerializedImpacts.length()); readImpacts(scratch, level0Impacts);
readImpacts(level0SerializedImpactsIn, level0Impacts);
return level0Impacts; return level0Impacts;
} }
level--; level--;
} }
if (level1LastDocID != NO_MORE_DOCS) { if (level1LastDocID != NO_MORE_DOCS && level == 0) {
if (level == 0) { scratch.reset(level1SerializedImpacts.bytes(), 0, level1SerializedImpacts.length());
level1SerializedImpactsIn.reset( readImpacts(scratch, level1Impacts);
level1SerializedImpacts.bytes(), 0, level1SerializedImpacts.length());
readImpacts(level1SerializedImpactsIn, level1Impacts);
return level1Impacts; return level1Impacts;
} }
level--;
}
return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L)); return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
} }
@ -1962,7 +1906,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
toSkip -= leftInBlock; toSkip -= leftInBlock;
while (toSkip >= BLOCK_SIZE) { while (toSkip >= BLOCK_SIZE) {
assert posIn.getFilePointer() != lastPosBlockFP; assert posIn.getFilePointer() != lastPosBlockFP;
pforUtil.skip(posIn); PForUtil.skip(posIn);
toSkip -= BLOCK_SIZE; toSkip -= BLOCK_SIZE;
} }
refillPositions(); refillPositions();
@ -2067,7 +2011,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
} }
private void prefetchPostings(IndexInput docIn, IntBlockTermState state) throws IOException { private static void prefetchPostings(IndexInput docIn, IntBlockTermState state)
throws IOException {
assert state.docFreq > 1; // Singletons are inlined in the terms dict, nothing to prefetch assert state.docFreq > 1; // Singletons are inlined in the terms dict, nothing to prefetch
if (docIn.getFilePointer() != state.docStartFP) { if (docIn.getFilePointer() != state.docStartFP) {
// Don't prefetch if the input is already positioned at the right offset, which suggests that // Don't prefetch if the input is already positioned at the right offset, which suggests that

View File

@ -342,7 +342,7 @@ public class Lucene912PostingsWriter extends PushPostingsWriterBase {
} }
@Override @Override
public void finishDoc() throws IOException { public void finishDoc() {
docBufferUpto++; docBufferUpto++;
docCount++; docCount++;
@ -443,7 +443,6 @@ public class Lucene912PostingsWriter extends PushPostingsWriterBase {
private void writeLevel1SkipData() throws IOException { private void writeLevel1SkipData() throws IOException {
docOut.writeVInt(docID - level1LastDocID); docOut.writeVInt(docID - level1LastDocID);
long numImpactBytes = scratchOutput.size();
final long level1End; final long level1End;
if (writeFreqs) { if (writeFreqs) {
List<Impact> impacts = level1CompetitiveFreqNormAccumulator.getCompetitiveFreqNormPairs(); List<Impact> impacts = level1CompetitiveFreqNormAccumulator.getCompetitiveFreqNormPairs();
@ -451,7 +450,7 @@ public class Lucene912PostingsWriter extends PushPostingsWriterBase {
maxNumImpactsAtLevel1 = impacts.size(); maxNumImpactsAtLevel1 = impacts.size();
} }
writeImpacts(impacts, scratchOutput); writeImpacts(impacts, scratchOutput);
numImpactBytes = scratchOutput.size(); long numImpactBytes = scratchOutput.size();
if (numImpactBytes > maxImpactNumBytesAtLevel1) { if (numImpactBytes > maxImpactNumBytesAtLevel1) {
maxImpactNumBytesAtLevel1 = Math.toIntExact(numImpactBytes); maxImpactNumBytesAtLevel1 = Math.toIntExact(numImpactBytes);
} }

View File

@ -121,7 +121,7 @@ final class PForUtil {
} }
/** Skip 128 integers. */ /** Skip 128 integers. */
void skip(DataInput in) throws IOException { static void skip(DataInput in) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte()); final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f; final int bitsPerValue = token & 0x1f;
final int numExceptions = token >>> 5; final int numExceptions = token >>> 5;

View File

@ -308,11 +308,6 @@ public final class ForDeltaUtil {
} }
} }
void skip(IndexInput in) throws IOException {
final int bitsPerValue = Byte.toUnsignedInt(in.readByte());
in.skipBytes(numBytes(bitsPerValue));
}
""" """
def primitive_size_for_bpv(bpv): def primitive_size_for_bpv(bpv):

View File

@ -254,7 +254,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
} }
} }
private class FieldsReader extends DocValuesProducer { private static class FieldsReader extends DocValuesProducer {
private final Map<String, DocValuesProducer> fields = new HashMap<>(); private final Map<String, DocValuesProducer> fields = new HashMap<>();
private final Map<String, DocValuesProducer> formats = new HashMap<>(); private final Map<String, DocValuesProducer> formats = new HashMap<>();

View File

@ -46,7 +46,7 @@ public class TestPForUtil extends LuceneTestCase {
final PForUtil pforUtil = new PForUtil(forUtil); final PForUtil pforUtil = new PForUtil(forUtil);
for (int i = 0; i < iterations; ++i) { for (int i = 0; i < iterations; ++i) {
if (random().nextInt(5) == 0) { if (random().nextInt(5) == 0) {
pforUtil.skip(in); PForUtil.skip(in);
continue; continue;
} }
final long[] restored = new long[ForUtil.BLOCK_SIZE]; final long[] restored = new long[ForUtil.BLOCK_SIZE];