mirror of
https://github.com/apache/lucene.git
synced 2025-02-08 11:05:29 +00:00
LUCENE-2862: use the stats instead of an extra byte in pulsing
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1059371 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bcbf1d462c
commit
d08d464528
@ -54,6 +54,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||||||
public void init(IndexInput termsIn) throws IOException {
|
public void init(IndexInput termsIn) throws IOException {
|
||||||
CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC,
|
CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC,
|
||||||
PulsingPostingsWriterImpl.VERSION_START, PulsingPostingsWriterImpl.VERSION_START);
|
PulsingPostingsWriterImpl.VERSION_START, PulsingPostingsWriterImpl.VERSION_START);
|
||||||
|
maxPositions = termsIn.readVInt();
|
||||||
wrappedPostingsReader.init(termsIn);
|
wrappedPostingsReader.init(termsIn);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -115,8 +116,10 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||||||
|
|
||||||
termState.pendingIndexTerm |= isIndexTerm;
|
termState.pendingIndexTerm |= isIndexTerm;
|
||||||
|
|
||||||
// TODO: wasteful to use whole byte for this (need just a 1 bit);
|
// total TF, but in the omitTFAP case its computed based on docFreq.
|
||||||
if (termsIn.readByte() == 1) {
|
long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
|
||||||
|
|
||||||
|
if (count <= maxPositions) {
|
||||||
|
|
||||||
// Inlined into terms dict -- just read the byte[] blob in,
|
// Inlined into terms dict -- just read the byte[] blob in,
|
||||||
// but don't decode it now (we only decode when a DocsEnum
|
// but don't decode it now (we only decode when a DocsEnum
|
||||||
|
@ -27,10 +27,10 @@ import org.apache.lucene.store.RAMOutputStream;
|
|||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
|
||||||
// TODO: we now pulse entirely according to docFreq of the
|
// TODO: we pulse based on total TF of the term,
|
||||||
// term; it might be better to eg pulse by "net bytes used"
|
// it might be better to eg pulse by "net bytes used"
|
||||||
// so that a term that has only 1 doc but zillions of
|
// so that a term that has only 1 posting but a huge
|
||||||
// positions would not be inlined. Though this is
|
// payload would not be inlined. Though this is
|
||||||
// presumably rare in practice...
|
// presumably rare in practice...
|
||||||
|
|
||||||
/** @lucene.experimental */
|
/** @lucene.experimental */
|
||||||
@ -86,6 +86,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||||||
public void start(IndexOutput termsOut) throws IOException {
|
public void start(IndexOutput termsOut) throws IOException {
|
||||||
this.termsOut = termsOut;
|
this.termsOut = termsOut;
|
||||||
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
|
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
|
||||||
|
termsOut.writeVInt(pending.length); // encode maxPositions in header
|
||||||
wrappedPostingsWriter.start(termsOut);
|
wrappedPostingsWriter.start(termsOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -186,7 +187,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||||||
pendingIsIndexTerm |= isIndexTerm;
|
pendingIsIndexTerm |= isIndexTerm;
|
||||||
|
|
||||||
if (pendingCount == -1) {
|
if (pendingCount == -1) {
|
||||||
termsOut.writeByte((byte) 0);
|
|
||||||
wrappedPostingsWriter.finishTerm(stats, pendingIsIndexTerm);
|
wrappedPostingsWriter.finishTerm(stats, pendingIsIndexTerm);
|
||||||
pendingIsIndexTerm = false;
|
pendingIsIndexTerm = false;
|
||||||
} else {
|
} else {
|
||||||
@ -195,8 +195,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||||||
// term, so we fully inline our postings data into
|
// term, so we fully inline our postings data into
|
||||||
// terms dict, now:
|
// terms dict, now:
|
||||||
|
|
||||||
termsOut.writeByte((byte) 1);
|
|
||||||
|
|
||||||
// TODO: it'd be better to share this encoding logic
|
// TODO: it'd be better to share this encoding logic
|
||||||
// in some inner codec that knows how to write a
|
// in some inner codec that knows how to write a
|
||||||
// single doc / single position, etc. This way if a
|
// single doc / single position, etc. This way if a
|
||||||
|
Loading…
x
Reference in New Issue
Block a user