mirror of https://github.com/apache/lucene.git
LUCENE-2905: make skip variables private to codec, separate skipMinimum from skipInterval, don't skip when close in preflex and sep
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1069829 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e14219e78d
commit
faf8d13086
|
@ -53,20 +53,6 @@ public class SegmentWriteState {
|
|||
* tweaking this is rarely useful.*/
|
||||
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
|
||||
|
||||
/** Expert: The fraction of TermDocs entries stored in skip tables,
|
||||
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
|
||||
* smaller indexes, greater acceleration, but fewer accelerable cases, while
|
||||
* smaller values result in bigger indexes, less acceleration and more
|
||||
* accelerable cases. More detailed experiments would be useful here. */
|
||||
public final int skipInterval = 16;
|
||||
|
||||
/** Expert: The maximum number of skip levels. Smaller values result in
|
||||
* slightly smaller indexes, but slower skipping in big posting lists.
|
||||
*/
|
||||
public final int maxSkipLevels = 10;
|
||||
|
||||
|
||||
|
||||
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
|
||||
int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
|
||||
this.infoStream = infoStream;
|
||||
|
|
|
@ -209,7 +209,8 @@ public class SegmentTermDocs {
|
|||
|
||||
/** Optimized implementation. */
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (df >= skipInterval) { // optimized case
|
||||
// don't skip if the target is close (within skipInterval docs away)
|
||||
if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case
|
||||
if (skipListReader == null)
|
||||
skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
int skipInterval;
|
||||
int maxSkipLevels;
|
||||
int skipMinimum;
|
||||
|
||||
public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
|
||||
|
||||
|
@ -102,6 +103,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START);
|
||||
skipInterval = termsIn.readInt();
|
||||
maxSkipLevels = termsIn.readInt();
|
||||
skipMinimum = termsIn.readInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -231,7 +233,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
//System.out.println(" payloadFP=" + termState.payloadFP);
|
||||
}
|
||||
}
|
||||
if (termState.docFreq >= skipInterval) {
|
||||
if (termState.docFreq >= skipMinimum) {
|
||||
//System.out.println(" readSkip @ " + termState.bytesReader.pos);
|
||||
if (isFirstTerm) {
|
||||
termState.skipFP = termState.bytesReader.readVLong();
|
||||
|
@ -344,7 +346,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
docFreq = termState.docFreq;
|
||||
// NOTE: unused if docFreq < skipInterval:
|
||||
// NOTE: unused if docFreq < skipMinimum:
|
||||
skipFP = termState.skipFP;
|
||||
count = 0;
|
||||
doc = 0;
|
||||
|
@ -420,13 +422,10 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
|
||||
// TODO: jump right to next() if target is < X away
|
||||
// from where we are now?
|
||||
|
||||
if (docFreq >= skipInterval) {
|
||||
if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
|
||||
|
||||
// There are enough docs in the posting to have
|
||||
// skip data
|
||||
// skip data, and its not too close
|
||||
|
||||
if (skipper == null) {
|
||||
// This DocsEnum has never done any skipping
|
||||
|
@ -599,13 +598,10 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
|||
public int advance(int target) throws IOException {
|
||||
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
|
||||
|
||||
// TODO: jump right to next() if target is < X away
|
||||
// from where we are now?
|
||||
|
||||
if (docFreq >= skipInterval) {
|
||||
if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
|
||||
|
||||
// There are enough docs in the posting to have
|
||||
// skip data
|
||||
// skip data, and its not too close
|
||||
|
||||
if (skipper == null) {
|
||||
//System.out.println(" create skipper");
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
@ -63,8 +64,23 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
IndexOutput termsOut;
|
||||
|
||||
final SepSkipListWriter skipListWriter;
|
||||
final int skipInterval;
|
||||
final int maxSkipLevels;
|
||||
/** Expert: The fraction of TermDocs entries stored in skip tables,
|
||||
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
|
||||
* smaller indexes, greater acceleration, but fewer accelerable cases, while
|
||||
* smaller values result in bigger indexes, less acceleration and more
|
||||
* accelerable cases. More detailed experiments would be useful here. */
|
||||
final int skipInterval = 16;
|
||||
|
||||
/**
|
||||
* Expert: minimum docFreq to write any skip data at all
|
||||
*/
|
||||
final int skipMinimum = skipInterval;
|
||||
|
||||
/** Expert: The maximum number of skip levels. Smaller values result in
|
||||
* slightly smaller indexes, but slower skipping in big posting lists.
|
||||
*/
|
||||
final int maxSkipLevels = 10;
|
||||
|
||||
final int totalNumDocs;
|
||||
|
||||
boolean storePayloads;
|
||||
|
@ -118,15 +134,11 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
|
||||
totalNumDocs = state.numDocs;
|
||||
|
||||
// TODO: -- abstraction violation
|
||||
skipListWriter = new SepSkipListWriter(state.skipInterval,
|
||||
state.maxSkipLevels,
|
||||
skipListWriter = new SepSkipListWriter(skipInterval,
|
||||
maxSkipLevels,
|
||||
state.numDocs,
|
||||
freqOut, docOut,
|
||||
posOut, payloadOut);
|
||||
|
||||
skipInterval = state.skipInterval;
|
||||
maxSkipLevels = state.maxSkipLevels;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -136,6 +148,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
// TODO: -- just ask skipper to "start" here
|
||||
termsOut.writeInt(skipInterval); // write skipInterval
|
||||
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
|
||||
termsOut.writeInt(skipMinimum); // write skipMinimum
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -264,7 +277,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
}
|
||||
}
|
||||
|
||||
if (df >= skipInterval) {
|
||||
if (df >= skipMinimum) {
|
||||
//System.out.println(" skipFP=" + skipStart);
|
||||
final long skipFP = skipOut.getFilePointer();
|
||||
skipListWriter.writeSkip(skipOut);
|
||||
|
|
|
@ -47,6 +47,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
|
||||
int skipInterval;
|
||||
int maxSkipLevels;
|
||||
int skipMinimum;
|
||||
|
||||
//private String segment;
|
||||
|
||||
|
@ -86,6 +87,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
|
||||
skipInterval = termsIn.readInt();
|
||||
maxSkipLevels = termsIn.readInt();
|
||||
skipMinimum = termsIn.readInt();
|
||||
}
|
||||
|
||||
// Must keep final because we do non-standard clone
|
||||
|
@ -179,7 +181,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
//System.out.println(" freqFP=" + termState.freqOffset);
|
||||
assert termState.freqOffset < freqIn.length();
|
||||
|
||||
if (termState.docFreq >= skipInterval) {
|
||||
if (termState.docFreq >= skipMinimum) {
|
||||
termState.skipOffset = termState.bytesReader.readVInt();
|
||||
//System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
|
||||
assert termState.freqOffset + termState.skipOffset < freqIn.length();
|
||||
|
@ -378,7 +380,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
|
||||
if ((target - skipInterval) >= doc && limit >= skipInterval) {
|
||||
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
|
||||
|
||||
// There are enough docs in the posting to have
|
||||
// skip data, and it isn't too close.
|
||||
|
@ -528,7 +530,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
|
||||
//System.out.println("StandardR.D&PE advance target=" + target);
|
||||
|
||||
if ((target - skipInterval) >= doc && limit >= skipInterval) {
|
||||
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
|
||||
|
||||
// There are enough docs in the posting to have
|
||||
// skip data, and it isn't too close
|
||||
|
@ -725,7 +727,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
|||
|
||||
//System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
|
||||
|
||||
if ((target - skipInterval) >= doc && limit >= skipInterval) {
|
||||
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
|
||||
|
||||
// There are enough docs in the posting to have
|
||||
// skip data, and it isn't too close
|
||||
|
|
|
@ -23,6 +23,7 @@ package org.apache.lucene.index.codecs.standard;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
@ -44,8 +45,22 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
final IndexOutput freqOut;
|
||||
final IndexOutput proxOut;
|
||||
final DefaultSkipListWriter skipListWriter;
|
||||
final int skipInterval;
|
||||
final int maxSkipLevels;
|
||||
/** Expert: The fraction of TermDocs entries stored in skip tables,
|
||||
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
|
||||
* smaller indexes, greater acceleration, but fewer accelerable cases, while
|
||||
* smaller values result in bigger indexes, less acceleration and more
|
||||
* accelerable cases. More detailed experiments would be useful here. */
|
||||
final int skipInterval = 16;
|
||||
|
||||
/**
|
||||
* Expert: minimum docFreq to write any skip data at all
|
||||
*/
|
||||
final int skipMinimum = skipInterval;
|
||||
|
||||
/** Expert: The maximum number of skip levels. Smaller values result in
|
||||
* slightly smaller indexes, but slower skipping in big posting lists.
|
||||
*/
|
||||
final int maxSkipLevels = 10;
|
||||
final int totalNumDocs;
|
||||
IndexOutput termsOut;
|
||||
|
||||
|
@ -84,14 +99,11 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
|
||||
totalNumDocs = state.numDocs;
|
||||
|
||||
skipListWriter = new DefaultSkipListWriter(state.skipInterval,
|
||||
state.maxSkipLevels,
|
||||
skipListWriter = new DefaultSkipListWriter(skipInterval,
|
||||
maxSkipLevels,
|
||||
state.numDocs,
|
||||
freqOut,
|
||||
proxOut);
|
||||
|
||||
skipInterval = state.skipInterval;
|
||||
maxSkipLevels = state.maxSkipLevels;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -100,6 +112,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
|
||||
termsOut.writeInt(skipInterval); // write skipInterval
|
||||
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
|
||||
termsOut.writeInt(skipMinimum); // write skipMinimum
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -218,7 +231,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
}
|
||||
lastFreqStart = freqStart;
|
||||
|
||||
if (df >= skipInterval) {
|
||||
if (df >= skipMinimum) {
|
||||
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue