mirror of https://github.com/apache/lucene.git
LUCENE-2862: use the stats instead of an extra byte in pulsing
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1059371 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bcbf1d462c
commit
d08d464528
|
@ -54,6 +54,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
public void init(IndexInput termsIn) throws IOException {
|
||||
CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC,
|
||||
PulsingPostingsWriterImpl.VERSION_START, PulsingPostingsWriterImpl.VERSION_START);
|
||||
maxPositions = termsIn.readVInt();
|
||||
wrappedPostingsReader.init(termsIn);
|
||||
}
|
||||
|
||||
|
@ -115,8 +116,10 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
termState.pendingIndexTerm |= isIndexTerm;
|
||||
|
||||
// TODO: wasteful to use whole byte for this (need just a 1 bit);
|
||||
if (termsIn.readByte() == 1) {
|
||||
// total TF, but in the omitTFAP case its computed based on docFreq.
|
||||
long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
|
||||
|
||||
if (count <= maxPositions) {
|
||||
|
||||
// Inlined into terms dict -- just read the byte[] blob in,
|
||||
// but don't decode it now (we only decode when a DocsEnum
|
||||
|
|
|
@ -27,10 +27,10 @@ import org.apache.lucene.store.RAMOutputStream;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
|
||||
// TODO: we now pulse entirely according to docFreq of the
|
||||
// term; it might be better to eg pulse by "net bytes used"
|
||||
// so that a term that has only 1 doc but zillions of
|
||||
// positions would not be inlined. Though this is
|
||||
// TODO: we pulse based on total TF of the term,
|
||||
// it might be better to eg pulse by "net bytes used"
|
||||
// so that a term that has only 1 posting but a huge
|
||||
// payload would not be inlined. Though this is
|
||||
// presumably rare in practice...
|
||||
|
||||
/** @lucene.experimental */
|
||||
|
@ -86,6 +86,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
public void start(IndexOutput termsOut) throws IOException {
|
||||
this.termsOut = termsOut;
|
||||
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
|
||||
termsOut.writeVInt(pending.length); // encode maxPositions in header
|
||||
wrappedPostingsWriter.start(termsOut);
|
||||
}
|
||||
|
||||
|
@ -186,7 +187,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
pendingIsIndexTerm |= isIndexTerm;
|
||||
|
||||
if (pendingCount == -1) {
|
||||
termsOut.writeByte((byte) 0);
|
||||
wrappedPostingsWriter.finishTerm(stats, pendingIsIndexTerm);
|
||||
pendingIsIndexTerm = false;
|
||||
} else {
|
||||
|
@ -195,8 +195,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
// term, so we fully inline our postings data into
|
||||
// terms dict, now:
|
||||
|
||||
termsOut.writeByte((byte) 1);
|
||||
|
||||
// TODO: it'd be better to share this encoding logic
|
||||
// in some inner codec that knows how to write a
|
||||
// single doc / single position, etc. This way if a
|
||||
|
|
Loading…
Reference in New Issue