Don't clone an IndexInput if postings are inlined in the terms dict (#13585).

This commit is contained in:
Adrien Grand 2024-08-01 12:46:06 +02:00
parent 47650a4314
commit 0a24769850

View File

@ -366,7 +366,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private int level0LastDocID; private int level0LastDocID;
// level 1 skip data // level 1 skip data
private int level1LastDocID; private int level1LastDocID;
private long level1DocEndOffset; private long level1DocEndFP;
private int level1DocCountUpto; private int level1DocCountUpto;
private boolean needsFreq; // true if the caller actually needs frequencies private boolean needsFreq; // true if the caller actually needs frequencies
@ -409,11 +409,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docFreq = termState.docFreq; docFreq = termState.docFreq;
totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq; totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
singletonDocID = termState.singletonDocID; singletonDocID = termState.singletonDocID;
if (docIn == null) { if (docFreq > 1) {
// lazy init if (docIn == null) {
docIn = startDocIn.clone(); // lazy init
docIn = startDocIn.clone();
}
prefetchPostings(docIn, termState);
} }
prefetchPostings(docIn, termState);
doc = -1; doc = -1;
this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS); this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
@ -425,8 +427,15 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
prevDocID = -1; prevDocID = -1;
docCountUpto = 0; docCountUpto = 0;
level0LastDocID = -1; level0LastDocID = -1;
level1LastDocID = -1; if (docFreq < LEVEL1_NUM_DOCS) {
level1DocEndOffset = termState.docStartFP; level1LastDocID = NO_MORE_DOCS;
if (docFreq > 1) {
docIn.seek(termState.docStartFP);
}
} else {
level1LastDocID = -1;
level1DocEndFP = termState.docStartFP;
}
level1DocCountUpto = 0; level1DocCountUpto = 0;
docBufferUpto = BLOCK_SIZE; docBufferUpto = BLOCK_SIZE;
freqFP = -1; freqFP = -1;
@ -510,7 +519,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
while (true) { while (true) {
prevDocID = level1LastDocID; prevDocID = level1LastDocID;
level0LastDocID = level1LastDocID; level0LastDocID = level1LastDocID;
docIn.seek(level1DocEndOffset); docIn.seek(level1DocEndFP);
docCountUpto = level1DocCountUpto; docCountUpto = level1DocCountUpto;
level1DocCountUpto += LEVEL1_NUM_DOCS; level1DocCountUpto += LEVEL1_NUM_DOCS;
@ -520,7 +529,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
level1LastDocID += docIn.readVInt(); level1LastDocID += docIn.readVInt();
level1DocEndOffset = docIn.readVLong() + docIn.getFilePointer(); level1DocEndFP = docIn.readVLong() + docIn.getFilePointer();
if (level1LastDocID >= target) { if (level1LastDocID >= target) {
if (indexHasFreq) { if (indexHasFreq) {
@ -677,7 +686,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private int level0LastDocID; private int level0LastDocID;
private long level0PosEndFP; private long level0PosEndFP;
private int level0BlockPosUpto; private int level0BlockPosUpto;
private long levelPayEndFP; private long level0PayEndFP;
private int level0BlockPayUpto; private int level0BlockPayUpto;
// level 1 skip data // level 1 skip data
private int level1LastDocID; private int level1LastDocID;
@ -754,7 +763,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
payTermStartFP = termState.payStartFP; payTermStartFP = termState.payStartFP;
totalTermFreq = termState.totalTermFreq; totalTermFreq = termState.totalTermFreq;
singletonDocID = termState.singletonDocID; singletonDocID = termState.singletonDocID;
if (docFreq > 1 || true) { if (docFreq > 1) {
if (docIn == null) { if (docIn == null) {
// lazy init // lazy init
docIn = startDocIn.clone(); docIn = startDocIn.clone();
@ -768,7 +777,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1PosEndFP = posTermStartFP; level1PosEndFP = posTermStartFP;
level1PayEndFP = payTermStartFP; level1PayEndFP = payTermStartFP;
level0PosEndFP = posTermStartFP; level0PosEndFP = posTermStartFP;
levelPayEndFP = payTermStartFP; level0PayEndFP = payTermStartFP;
posPendingCount = 0; posPendingCount = 0;
payloadByteUpto = 0; payloadByteUpto = 0;
if (termState.totalTermFreq < BLOCK_SIZE) { if (termState.totalTermFreq < BLOCK_SIZE) {
@ -786,8 +795,15 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
prevDocID = -1; prevDocID = -1;
docCountUpto = 0; docCountUpto = 0;
level0LastDocID = -1; level0LastDocID = -1;
level1LastDocID = -1; if (docFreq < LEVEL1_NUM_DOCS) {
level1DocEndFP = termState.docStartFP; level1LastDocID = NO_MORE_DOCS;
if (docFreq > 1) {
docIn.seek(termState.docStartFP);
}
} else {
level1LastDocID = -1;
level1DocEndFP = termState.docStartFP;
}
level1DocCountUpto = 0; level1DocCountUpto = 0;
level1BlockPosUpto = 0; level1BlockPosUpto = 0;
level1BlockPayUpto = 0; level1BlockPayUpto = 0;
@ -841,7 +857,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level0PosEndFP = level1PosEndFP; level0PosEndFP = level1PosEndFP;
level0BlockPosUpto = level1BlockPosUpto; level0BlockPosUpto = level1BlockPosUpto;
if (indexHasOffsetsOrPayloads) { if (indexHasOffsetsOrPayloads) {
levelPayEndFP = level1PayEndFP; level0PayEndFP = level1PayEndFP;
level0BlockPayUpto = level1BlockPayUpto; level0BlockPayUpto = level1BlockPayUpto;
} }
docCountUpto = level1DocCountUpto; docCountUpto = level1DocCountUpto;
@ -885,8 +901,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
posIn.seek(level0PosEndFP); posIn.seek(level0PosEndFP);
posPendingCount = level0BlockPosUpto; posPendingCount = level0BlockPosUpto;
if (indexHasOffsetsOrPayloads) { if (indexHasOffsetsOrPayloads) {
assert levelPayEndFP >= payIn.getFilePointer(); assert level0PayEndFP >= payIn.getFilePointer();
payIn.seek(levelPayEndFP); payIn.seek(level0PayEndFP);
payloadByteUpto = level0BlockPayUpto; payloadByteUpto = level0BlockPayUpto;
} }
posBufferUpto = BLOCK_SIZE; posBufferUpto = BLOCK_SIZE;
@ -902,7 +918,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level0PosEndFP += docIn.readVLong(); level0PosEndFP += docIn.readVLong();
level0BlockPosUpto = docIn.readByte(); level0BlockPosUpto = docIn.readByte();
if (indexHasOffsetsOrPayloads) { if (indexHasOffsetsOrPayloads) {
levelPayEndFP += docIn.readVLong(); level0PayEndFP += docIn.readVLong();
level0BlockPayUpto = docIn.readVInt(); level0BlockPayUpto = docIn.readVInt();
} }
} else { } else {
@ -939,8 +955,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
posIn.seek(level0PosEndFP); posIn.seek(level0PosEndFP);
posPendingCount = level0BlockPosUpto; posPendingCount = level0BlockPosUpto;
if (indexHasOffsetsOrPayloads) { if (indexHasOffsetsOrPayloads) {
assert levelPayEndFP >= payIn.getFilePointer(); assert level0PayEndFP >= payIn.getFilePointer();
payIn.seek(levelPayEndFP); payIn.seek(level0PayEndFP);
payloadByteUpto = level0BlockPayUpto; payloadByteUpto = level0BlockPayUpto;
} }
posBufferUpto = BLOCK_SIZE; posBufferUpto = BLOCK_SIZE;
@ -962,7 +978,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level0PosEndFP += docIn.readVLong(); level0PosEndFP += docIn.readVLong();
level0BlockPosUpto = docIn.readByte(); level0BlockPosUpto = docIn.readByte();
if (indexHasOffsetsOrPayloads) { if (indexHasOffsetsOrPayloads) {
levelPayEndFP += docIn.readVLong(); level0PayEndFP += docIn.readVLong();
level0BlockPayUpto = docIn.readVInt(); level0BlockPayUpto = docIn.readVInt();
} }
@ -1246,7 +1262,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS; docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
docFreq = termState.docFreq; docFreq = termState.docFreq;
if (docFreq > 1 || true) { if (docFreq > 1) {
if (docIn == null) { if (docIn == null) {
// lazy init // lazy init
docIn = startDocIn.clone(); docIn = startDocIn.clone();
@ -1263,8 +1279,15 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
prevDocID = -1; prevDocID = -1;
docCountUpto = 0; docCountUpto = 0;
level0LastDocID = -1; level0LastDocID = -1;
level1LastDocID = -1; if (docFreq < LEVEL1_NUM_DOCS) {
level1DocEndFP = termState.docStartFP; level1LastDocID = NO_MORE_DOCS;
if (docFreq > 1) {
docIn.seek(termState.docStartFP);
}
} else {
level1LastDocID = -1;
level1DocEndFP = termState.docStartFP;
}
level1DocCountUpto = 0; level1DocCountUpto = 0;
docBufferUpto = BLOCK_SIZE; docBufferUpto = BLOCK_SIZE;
freqFP = -1; freqFP = -1;
@ -1626,7 +1649,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
posTermStartFP = termState.posStartFP; posTermStartFP = termState.posStartFP;
totalTermFreq = termState.totalTermFreq; totalTermFreq = termState.totalTermFreq;
singletonDocID = termState.singletonDocID; singletonDocID = termState.singletonDocID;
if (docFreq > 1 || true) { if (docFreq > 1) {
if (docIn == null) { if (docIn == null) {
// lazy init // lazy init
docIn = startDocIn.clone(); docIn = startDocIn.clone();
@ -1649,8 +1672,15 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
prevDocID = -1; prevDocID = -1;
docCountUpto = 0; docCountUpto = 0;
level0LastDocID = -1; level0LastDocID = -1;
level1LastDocID = -1; if (docFreq < LEVEL1_NUM_DOCS) {
level1DocEndFP = termState.docStartFP; level1LastDocID = NO_MORE_DOCS;
if (docFreq > 1) {
docIn.seek(termState.docStartFP);
}
} else {
level1LastDocID = -1;
level1DocEndFP = termState.docStartFP;
}
level1DocCountUpto = 0; level1DocCountUpto = 0;
level1BlockPosUpto = 0; level1BlockPosUpto = 0;
docBufferUpto = BLOCK_SIZE; docBufferUpto = BLOCK_SIZE;
@ -2020,7 +2050,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} }
private void prefetchPostings(IndexInput docIn, IntBlockTermState state) throws IOException { private void prefetchPostings(IndexInput docIn, IntBlockTermState state) throws IOException {
if (state.docFreq > 1 && docIn.getFilePointer() != state.docStartFP) { assert state.docFreq > 1; // Singletons are inlined in the terms dict, nothing to prefetch
if (docIn.getFilePointer() != state.docStartFP) {
// Don't prefetch if the input is already positioned at the right offset, which suggests that // Don't prefetch if the input is already positioned at the right offset, which suggests that
// the caller is streaming the entire inverted index (e.g. for merging), let the read-ahead // the caller is streaming the entire inverted index (e.g. for merging), let the read-ahead
// logic do its work instead. Note that this heuristic doesn't work for terms that have skip // logic do its work instead. Note that this heuristic doesn't work for terms that have skip