mirror of https://github.com/apache/lucene.git
LUCENE-4498: pulse docFreq=1 in 4.1 codec
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1401284 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
db2e268bec
commit
2c462fa3a6
|
@ -108,6 +108,10 @@ Optimizations
|
||||||
* LUCENE-4497: Don't write PosVIntCount to the positions file in
|
* LUCENE-4497: Don't write PosVIntCount to the positions file in
|
||||||
Lucene41PostingsFormat, as its always totalTermFreq % BLOCK_SIZE. (Robert Muir)
|
Lucene41PostingsFormat, as its always totalTermFreq % BLOCK_SIZE. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4498: In Lucene41PostingsFormat, when a term appears in only one document,
|
||||||
|
Instead of writing a file pointer to a VIntBlock containing the doc id, just
|
||||||
|
write the doc id. (Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-4451: Memory leak per unique thread caused by
|
* LUCENE-4451: Memory leak per unique thread caused by
|
||||||
|
|
|
@ -127,10 +127,10 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
*
|
*
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Postings Metadata --> Header, PackedBlockSize</li>
|
* <li>Postings Metadata --> Header, PackedBlockSize</li>
|
||||||
* <li>Term Metadata --> DocFPDelta, PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?,
|
* <li>Term Metadata --> (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?,
|
||||||
* SkipFPDelta?</li>
|
* SkipFPDelta?</li>
|
||||||
* <li>Header, --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
* <li>Header, --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
* <li>PackedBlockSize --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>PackedBlockSize, SingletonDocID --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> {@link DataOutput#writeVLong VLong}</li>
|
* <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> {@link DataOutput#writeVLong VLong}</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>Notes:</p>
|
* <p>Notes:</p>
|
||||||
|
@ -162,6 +162,9 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* file. In particular, it is the length of the TermFreq data.
|
* file. In particular, it is the length of the TermFreq data.
|
||||||
* SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
|
* SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
|
||||||
* (i.e. 8 in Lucene41PostingsFormat).</li>
|
* (i.e. 8 in Lucene41PostingsFormat).</li>
|
||||||
|
* <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
|
||||||
|
* of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the
|
||||||
|
* single document ID is written to the term dictionary.</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* </dd>
|
* </dd>
|
||||||
* </dl>
|
* </dl>
|
||||||
|
@ -277,7 +280,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* <li>VIntBlock --> <PositionDelta[, PayloadLength?], PayloadData?,
|
* <li>VIntBlock --> <PositionDelta[, PayloadLength?], PayloadData?,
|
||||||
* OffsetDelta?, OffsetLength?><sup>PosVIntCount</sup>
|
* OffsetDelta?, OffsetLength?><sup>PosVIntCount</sup>
|
||||||
* <li>PackedPosDeltaBlock --> {@link PackedInts PackedInts}</li>
|
* <li>PackedPosDeltaBlock --> {@link PackedInts PackedInts}</li>
|
||||||
* <li>PosVIntCount, PositionDelta, OffsetDelta, OffsetLength -->
|
* <li>PositionDelta, OffsetDelta, OffsetLength -->
|
||||||
* {@link DataOutput#writeVInt VInt}</li>
|
* {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>PayloadData --> {@link DataOutput#writeByte byte}<sup>PayLength</sup></li>
|
* <li>PayloadData --> {@link DataOutput#writeByte byte}<sup>PayLength</sup></li>
|
||||||
* </ul>
|
* </ul>
|
||||||
|
|
|
@ -148,6 +148,9 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
long payStartFP;
|
long payStartFP;
|
||||||
long skipOffset;
|
long skipOffset;
|
||||||
long lastPosBlockOffset;
|
long lastPosBlockOffset;
|
||||||
|
// docid when there is a single pulsed posting, otherwise -1
|
||||||
|
// freq is always implicitly totalTermFreq in this case.
|
||||||
|
int singletonDocID;
|
||||||
|
|
||||||
// Only used by the "primary" TermState -- clones don't
|
// Only used by the "primary" TermState -- clones don't
|
||||||
// copy this (basically they are "transient"):
|
// copy this (basically they are "transient"):
|
||||||
|
@ -170,6 +173,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
payStartFP = other.payStartFP;
|
payStartFP = other.payStartFP;
|
||||||
lastPosBlockOffset = other.lastPosBlockOffset;
|
lastPosBlockOffset = other.lastPosBlockOffset;
|
||||||
skipOffset = other.skipOffset;
|
skipOffset = other.skipOffset;
|
||||||
|
singletonDocID = other.singletonDocID;
|
||||||
|
|
||||||
// Do not copy bytes, bytesReader (else TermState is
|
// Do not copy bytes, bytesReader (else TermState is
|
||||||
// very heavy, ie drags around the entire block's
|
// very heavy, ie drags around the entire block's
|
||||||
|
@ -179,7 +183,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return super.toString() + " docStartFP=" + docStartFP + " posStartFP=" + posStartFP + " payStartFP=" + payStartFP + " lastPosBlockOffset=" + lastPosBlockOffset;
|
return super.toString() + " docStartFP=" + docStartFP + " posStartFP=" + posStartFP + " payStartFP=" + payStartFP + " lastPosBlockOffset=" + lastPosBlockOffset + " singletonDocID=" + singletonDocID;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -223,7 +227,13 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
final DataInput in = termState.bytesReader;
|
final DataInput in = termState.bytesReader;
|
||||||
if (isFirstTerm) {
|
if (isFirstTerm) {
|
||||||
|
if (termState.docFreq == 1) {
|
||||||
|
termState.singletonDocID = in.readVInt();
|
||||||
|
termState.docStartFP = 0;
|
||||||
|
} else {
|
||||||
|
termState.singletonDocID = -1;
|
||||||
termState.docStartFP = in.readVLong();
|
termState.docStartFP = in.readVLong();
|
||||||
|
}
|
||||||
if (fieldHasPositions) {
|
if (fieldHasPositions) {
|
||||||
termState.posStartFP = in.readVLong();
|
termState.posStartFP = in.readVLong();
|
||||||
if (termState.totalTermFreq > BLOCK_SIZE) {
|
if (termState.totalTermFreq > BLOCK_SIZE) {
|
||||||
|
@ -238,7 +248,12 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (termState.docFreq == 1) {
|
||||||
|
termState.singletonDocID = in.readVInt();
|
||||||
|
} else {
|
||||||
|
termState.singletonDocID = -1;
|
||||||
termState.docStartFP += in.readVLong();
|
termState.docStartFP += in.readVLong();
|
||||||
|
}
|
||||||
if (fieldHasPositions) {
|
if (fieldHasPositions) {
|
||||||
termState.posStartFP += in.readVLong();
|
termState.posStartFP += in.readVLong();
|
||||||
if (termState.totalTermFreq > BLOCK_SIZE) {
|
if (termState.totalTermFreq > BLOCK_SIZE) {
|
||||||
|
@ -327,13 +342,14 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
final IndexInput startDocIn;
|
final IndexInput startDocIn;
|
||||||
|
|
||||||
final IndexInput docIn;
|
IndexInput docIn;
|
||||||
final boolean indexHasFreq;
|
final boolean indexHasFreq;
|
||||||
final boolean indexHasPos;
|
final boolean indexHasPos;
|
||||||
final boolean indexHasOffsets;
|
final boolean indexHasOffsets;
|
||||||
final boolean indexHasPayloads;
|
final boolean indexHasPayloads;
|
||||||
|
|
||||||
private int docFreq; // number of docs in this posting list
|
private int docFreq; // number of docs in this posting list
|
||||||
|
private long totalTermFreq; // sum of freqs in this posting list (or docFreq when omitted)
|
||||||
private int docUpto; // how many docs we've read
|
private int docUpto; // how many docs we've read
|
||||||
private int doc; // doc we last read
|
private int doc; // doc we last read
|
||||||
private int accum; // accumulator for doc deltas
|
private int accum; // accumulator for doc deltas
|
||||||
|
@ -354,10 +370,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
private Bits liveDocs;
|
private Bits liveDocs;
|
||||||
|
|
||||||
private boolean needsFreq; // true if the caller actually needs frequencies
|
private boolean needsFreq; // true if the caller actually needs frequencies
|
||||||
|
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||||
|
|
||||||
public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
|
public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
|
||||||
this.startDocIn = Lucene41PostingsReader.this.docIn;
|
this.startDocIn = Lucene41PostingsReader.this.docIn;
|
||||||
this.docIn = startDocIn.clone();
|
this.docIn = null;
|
||||||
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
@ -378,9 +395,17 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
// System.out.println(" FPR.reset: termState=" + termState);
|
// System.out.println(" FPR.reset: termState=" + termState);
|
||||||
// }
|
// }
|
||||||
docFreq = termState.docFreq;
|
docFreq = termState.docFreq;
|
||||||
|
totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
|
||||||
docTermStartFP = termState.docStartFP;
|
docTermStartFP = termState.docStartFP;
|
||||||
docIn.seek(docTermStartFP);
|
|
||||||
skipOffset = termState.skipOffset;
|
skipOffset = termState.skipOffset;
|
||||||
|
singletonDocID = termState.singletonDocID;
|
||||||
|
if (docFreq > 1) {
|
||||||
|
if (docIn == null) {
|
||||||
|
// lazy init
|
||||||
|
docIn = startDocIn.clone();
|
||||||
|
}
|
||||||
|
docIn.seek(docTermStartFP);
|
||||||
|
}
|
||||||
|
|
||||||
doc = -1;
|
doc = -1;
|
||||||
this.needsFreq = (flags & DocsEnum.FLAG_FREQS) != 0;
|
this.needsFreq = (flags & DocsEnum.FLAG_FREQS) != 0;
|
||||||
|
@ -425,6 +450,9 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
forUtil.skipBlock(docIn); // skip over freqs
|
forUtil.skipBlock(docIn); // skip over freqs
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (docFreq == 1) {
|
||||||
|
docDeltaBuffer[0] = singletonDocID;
|
||||||
|
freqBuffer[0] = (int) totalTermFreq;
|
||||||
} else {
|
} else {
|
||||||
// Read vInts:
|
// Read vInts:
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
|
@ -590,7 +618,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
final IndexInput startDocIn;
|
final IndexInput startDocIn;
|
||||||
|
|
||||||
final IndexInput docIn;
|
IndexInput docIn;
|
||||||
final IndexInput posIn;
|
final IndexInput posIn;
|
||||||
|
|
||||||
final boolean indexHasOffsets;
|
final boolean indexHasOffsets;
|
||||||
|
@ -635,10 +663,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
private int nextSkipDoc;
|
private int nextSkipDoc;
|
||||||
|
|
||||||
private Bits liveDocs;
|
private Bits liveDocs;
|
||||||
|
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||||
|
|
||||||
public BlockDocsAndPositionsEnum(FieldInfo fieldInfo) throws IOException {
|
public BlockDocsAndPositionsEnum(FieldInfo fieldInfo) throws IOException {
|
||||||
this.startDocIn = Lucene41PostingsReader.this.docIn;
|
this.startDocIn = Lucene41PostingsReader.this.docIn;
|
||||||
this.docIn = startDocIn.clone();
|
this.docIn = null;
|
||||||
this.posIn = Lucene41PostingsReader.this.posIn.clone();
|
this.posIn = Lucene41PostingsReader.this.posIn.clone();
|
||||||
encoded = new byte[MAX_ENCODED_SIZE];
|
encoded = new byte[MAX_ENCODED_SIZE];
|
||||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
@ -660,9 +689,16 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
docTermStartFP = termState.docStartFP;
|
docTermStartFP = termState.docStartFP;
|
||||||
posTermStartFP = termState.posStartFP;
|
posTermStartFP = termState.posStartFP;
|
||||||
payTermStartFP = termState.payStartFP;
|
payTermStartFP = termState.payStartFP;
|
||||||
docIn.seek(docTermStartFP);
|
|
||||||
skipOffset = termState.skipOffset;
|
skipOffset = termState.skipOffset;
|
||||||
totalTermFreq = termState.totalTermFreq;
|
totalTermFreq = termState.totalTermFreq;
|
||||||
|
singletonDocID = termState.singletonDocID;
|
||||||
|
if (docFreq > 1) {
|
||||||
|
if (docIn == null) {
|
||||||
|
// lazy init
|
||||||
|
docIn = startDocIn.clone();
|
||||||
|
}
|
||||||
|
docIn.seek(docTermStartFP);
|
||||||
|
}
|
||||||
posPendingFP = posTermStartFP;
|
posPendingFP = posTermStartFP;
|
||||||
posPendingCount = 0;
|
posPendingCount = 0;
|
||||||
if (termState.totalTermFreq < BLOCK_SIZE) {
|
if (termState.totalTermFreq < BLOCK_SIZE) {
|
||||||
|
@ -705,6 +741,9 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
// System.out.println(" fill freq block from fp=" + docIn.getFilePointer());
|
// System.out.println(" fill freq block from fp=" + docIn.getFilePointer());
|
||||||
// }
|
// }
|
||||||
forUtil.readBlock(docIn, encoded, freqBuffer);
|
forUtil.readBlock(docIn, encoded, freqBuffer);
|
||||||
|
} else if (docFreq == 1) {
|
||||||
|
docDeltaBuffer[0] = singletonDocID;
|
||||||
|
freqBuffer[0] = (int) totalTermFreq;
|
||||||
} else {
|
} else {
|
||||||
// Read vInts:
|
// Read vInts:
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
|
@ -1002,7 +1041,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
final IndexInput startDocIn;
|
final IndexInput startDocIn;
|
||||||
|
|
||||||
final IndexInput docIn;
|
IndexInput docIn;
|
||||||
final IndexInput posIn;
|
final IndexInput posIn;
|
||||||
final IndexInput payIn;
|
final IndexInput payIn;
|
||||||
final BytesRef payload;
|
final BytesRef payload;
|
||||||
|
@ -1056,10 +1095,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
private boolean needsOffsets; // true if we actually need offsets
|
private boolean needsOffsets; // true if we actually need offsets
|
||||||
private boolean needsPayloads; // true if we actually need payloads
|
private boolean needsPayloads; // true if we actually need payloads
|
||||||
|
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||||
|
|
||||||
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
|
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
|
||||||
this.startDocIn = Lucene41PostingsReader.this.docIn;
|
this.startDocIn = Lucene41PostingsReader.this.docIn;
|
||||||
this.docIn = startDocIn.clone();
|
this.docIn = null;
|
||||||
this.posIn = Lucene41PostingsReader.this.posIn.clone();
|
this.posIn = Lucene41PostingsReader.this.posIn.clone();
|
||||||
this.payIn = Lucene41PostingsReader.this.payIn.clone();
|
this.payIn = Lucene41PostingsReader.this.payIn.clone();
|
||||||
encoded = new byte[MAX_ENCODED_SIZE];
|
encoded = new byte[MAX_ENCODED_SIZE];
|
||||||
|
@ -1101,9 +1141,16 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
docTermStartFP = termState.docStartFP;
|
docTermStartFP = termState.docStartFP;
|
||||||
posTermStartFP = termState.posStartFP;
|
posTermStartFP = termState.posStartFP;
|
||||||
payTermStartFP = termState.payStartFP;
|
payTermStartFP = termState.payStartFP;
|
||||||
docIn.seek(docTermStartFP);
|
|
||||||
skipOffset = termState.skipOffset;
|
skipOffset = termState.skipOffset;
|
||||||
totalTermFreq = termState.totalTermFreq;
|
totalTermFreq = termState.totalTermFreq;
|
||||||
|
singletonDocID = termState.singletonDocID;
|
||||||
|
if (docFreq > 1) {
|
||||||
|
if (docIn == null) {
|
||||||
|
// lazy init
|
||||||
|
docIn = startDocIn.clone();
|
||||||
|
}
|
||||||
|
docIn.seek(docTermStartFP);
|
||||||
|
}
|
||||||
posPendingFP = posTermStartFP;
|
posPendingFP = posTermStartFP;
|
||||||
payPendingFP = payTermStartFP;
|
payPendingFP = payTermStartFP;
|
||||||
posPendingCount = 0;
|
posPendingCount = 0;
|
||||||
|
@ -1150,6 +1197,9 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
// System.out.println(" fill freq block from fp=" + docIn.getFilePointer());
|
// System.out.println(" fill freq block from fp=" + docIn.getFilePointer());
|
||||||
// }
|
// }
|
||||||
forUtil.readBlock(docIn, encoded, freqBuffer);
|
forUtil.readBlock(docIn, encoded, freqBuffer);
|
||||||
|
} else if (docFreq == 1) {
|
||||||
|
docDeltaBuffer[0] = singletonDocID;
|
||||||
|
freqBuffer[0] = (int) totalTermFreq;
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
|
// System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
|
||||||
|
|
|
@ -354,13 +354,15 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
|
||||||
public final long payStartFP;
|
public final long payStartFP;
|
||||||
public final long skipOffset;
|
public final long skipOffset;
|
||||||
public final long lastPosBlockOffset;
|
public final long lastPosBlockOffset;
|
||||||
|
public final int singletonDocID;
|
||||||
|
|
||||||
public PendingTerm(long docStartFP, long posStartFP, long payStartFP, long skipOffset, long lastPosBlockOffset) {
|
public PendingTerm(long docStartFP, long posStartFP, long payStartFP, long skipOffset, long lastPosBlockOffset, int singletonDocID) {
|
||||||
this.docStartFP = docStartFP;
|
this.docStartFP = docStartFP;
|
||||||
this.posStartFP = posStartFP;
|
this.posStartFP = posStartFP;
|
||||||
this.payStartFP = payStartFP;
|
this.payStartFP = payStartFP;
|
||||||
this.skipOffset = skipOffset;
|
this.skipOffset = skipOffset;
|
||||||
this.lastPosBlockOffset = lastPosBlockOffset;
|
this.lastPosBlockOffset = lastPosBlockOffset;
|
||||||
|
this.singletonDocID = singletonDocID;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -385,6 +387,13 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
// docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
|
||||||
|
final int singletonDocID;
|
||||||
|
if (stats.docFreq == 1) {
|
||||||
|
// pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
|
||||||
|
singletonDocID = docDeltaBuffer[0];
|
||||||
|
} else {
|
||||||
|
singletonDocID = -1;
|
||||||
// vInt encode the remaining doc deltas and freqs:
|
// vInt encode the remaining doc deltas and freqs:
|
||||||
for(int i=0;i<docBufferUpto;i++) {
|
for(int i=0;i<docBufferUpto;i++) {
|
||||||
final int docDelta = docDeltaBuffer[i];
|
final int docDelta = docDeltaBuffer[i];
|
||||||
|
@ -398,6 +407,7 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
|
||||||
docOut.writeVInt(freq);
|
docOut.writeVInt(freq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final long lastPosBlockOffset;
|
final long lastPosBlockOffset;
|
||||||
|
|
||||||
|
@ -507,7 +517,7 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
|
||||||
// System.out.println(" payStartFP=" + payStartFP);
|
// System.out.println(" payStartFP=" + payStartFP);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
pendingTerms.add(new PendingTerm(docTermStartFP, posTermStartFP, payStartFP, skipOffset, lastPosBlockOffset));
|
pendingTerms.add(new PendingTerm(docTermStartFP, posTermStartFP, payStartFP, skipOffset, lastPosBlockOffset, singletonDocID));
|
||||||
docBufferUpto = 0;
|
docBufferUpto = 0;
|
||||||
posBufferUpto = 0;
|
posBufferUpto = 0;
|
||||||
lastDocID = 0;
|
lastDocID = 0;
|
||||||
|
@ -535,8 +545,12 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
|
||||||
for(int idx=limit-count; idx<limit; idx++) {
|
for(int idx=limit-count; idx<limit; idx++) {
|
||||||
PendingTerm term = pendingTerms.get(idx);
|
PendingTerm term = pendingTerms.get(idx);
|
||||||
|
|
||||||
|
if (term.singletonDocID == -1) {
|
||||||
bytesWriter.writeVLong(term.docStartFP - lastDocStartFP);
|
bytesWriter.writeVLong(term.docStartFP - lastDocStartFP);
|
||||||
lastDocStartFP = term.docStartFP;
|
lastDocStartFP = term.docStartFP;
|
||||||
|
} else {
|
||||||
|
bytesWriter.writeVInt(term.singletonDocID);
|
||||||
|
}
|
||||||
|
|
||||||
if (fieldHasPositions) {
|
if (fieldHasPositions) {
|
||||||
bytesWriter.writeVLong(term.posStartFP - lastPosStartFP);
|
bytesWriter.writeVLong(term.posStartFP - lastPosStartFP);
|
||||||
|
|
Loading…
Reference in New Issue