LUCENE-2905: make skip variables private to codec, separate skipMinimum from skipInterval, don't skip when close in preflex and sep

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1069829 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-02-11 15:10:50 +00:00
parent e14219e78d
commit faf8d13086
6 changed files with 59 additions and 48 deletions

View File

@ -53,20 +53,6 @@ public class SegmentWriteState {
* tweaking this is rarely useful.*/
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
/** Expert: The fraction of TermDocs entries stored in skip tables,
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
* smaller indexes, greater acceleration, but fewer accelerable cases, while
* smaller values result in bigger indexes, less acceleration and more
* accelerable cases. More detailed experiments would be useful here. */
public final int skipInterval = 16;
/** Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
*/
public final int maxSkipLevels = 10;
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
this.infoStream = infoStream;

View File

@ -209,7 +209,8 @@ public class SegmentTermDocs {
/** Optimized implementation. */
public boolean skipTo(int target) throws IOException {
if (df >= skipInterval) { // optimized case
// don't skip if the target is close (within skipInterval docs away)
if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case
if (skipListReader == null)
skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone

View File

@ -56,6 +56,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
int skipInterval;
int maxSkipLevels;
int skipMinimum;
public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
@ -102,6 +103,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
skipMinimum = termsIn.readInt();
}
@Override
@ -231,7 +233,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
//System.out.println(" payloadFP=" + termState.payloadFP);
}
}
if (termState.docFreq >= skipInterval) {
if (termState.docFreq >= skipMinimum) {
//System.out.println(" readSkip @ " + termState.bytesReader.pos);
if (isFirstTerm) {
termState.skipFP = termState.bytesReader.readVLong();
@ -344,7 +346,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
}
docFreq = termState.docFreq;
// NOTE: unused if docFreq < skipInterval:
// NOTE: unused if docFreq < skipMinimum:
skipFP = termState.skipFP;
count = 0;
doc = 0;
@ -420,13 +422,10 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
@Override
public int advance(int target) throws IOException {
// TODO: jump right to next() if target is < X away
// from where we are now?
if (docFreq >= skipInterval) {
if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
// skip data
// skip data, and its not too close
if (skipper == null) {
// This DocsEnum has never done any skipping
@ -599,13 +598,10 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
public int advance(int target) throws IOException {
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
// TODO: jump right to next() if target is < X away
// from where we are now?
if (docFreq >= skipInterval) {
if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
// skip data
// skip data, and its not too close
if (skipper == null) {
//System.out.println(" create skipper");

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@ -63,8 +64,23 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
final int skipInterval;
final int maxSkipLevels;
/** Expert: The fraction of TermDocs entries stored in skip tables,
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
* smaller indexes, greater acceleration, but fewer accelerable cases, while
* smaller values result in bigger indexes, less acceleration and more
* accelerable cases. More detailed experiments would be useful here. */
final int skipInterval = 16;
/**
* Expert: minimum docFreq to write any skip data at all
*/
final int skipMinimum = skipInterval;
/** Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
*/
final int maxSkipLevels = 10;
final int totalNumDocs;
boolean storePayloads;
@ -118,15 +134,11 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
totalNumDocs = state.numDocs;
// TODO: -- abstraction violation
skipListWriter = new SepSkipListWriter(state.skipInterval,
state.maxSkipLevels,
skipListWriter = new SepSkipListWriter(skipInterval,
maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
skipInterval = state.skipInterval;
maxSkipLevels = state.maxSkipLevels;
}
@Override
@ -136,6 +148,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@ -264,7 +277,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
}
}
if (df >= skipInterval) {
if (df >= skipMinimum) {
//System.out.println(" skipFP=" + skipStart);
final long skipFP = skipOut.getFilePointer();
skipListWriter.writeSkip(skipOut);

View File

@ -47,6 +47,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
int skipInterval;
int maxSkipLevels;
int skipMinimum;
//private String segment;
@ -86,6 +87,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
skipMinimum = termsIn.readInt();
}
// Must keep final because we do non-standard clone
@ -179,7 +181,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
//System.out.println(" freqFP=" + termState.freqOffset);
assert termState.freqOffset < freqIn.length();
if (termState.docFreq >= skipInterval) {
if (termState.docFreq >= skipMinimum) {
termState.skipOffset = termState.bytesReader.readVInt();
//System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
assert termState.freqOffset + termState.skipOffset < freqIn.length();
@ -378,7 +380,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
@Override
public int advance(int target) throws IOException {
if ((target - skipInterval) >= doc && limit >= skipInterval) {
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close.
@ -528,7 +530,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
//System.out.println("StandardR.D&PE advance target=" + target);
if ((target - skipInterval) >= doc && limit >= skipInterval) {
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
@ -725,7 +727,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
//System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
if ((target - skipInterval) >= doc && limit >= skipInterval) {
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close

View File

@ -23,6 +23,7 @@ package org.apache.lucene.index.codecs.standard;
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@ -44,8 +45,22 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
final IndexOutput freqOut;
final IndexOutput proxOut;
final DefaultSkipListWriter skipListWriter;
final int skipInterval;
final int maxSkipLevels;
/** Expert: The fraction of TermDocs entries stored in skip tables,
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
* smaller indexes, greater acceleration, but fewer accelerable cases, while
* smaller values result in bigger indexes, less acceleration and more
* accelerable cases. More detailed experiments would be useful here. */
final int skipInterval = 16;
/**
* Expert: minimum docFreq to write any skip data at all
*/
final int skipMinimum = skipInterval;
/** Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
*/
final int maxSkipLevels = 10;
final int totalNumDocs;
IndexOutput termsOut;
@ -84,14 +99,11 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
totalNumDocs = state.numDocs;
skipListWriter = new DefaultSkipListWriter(state.skipInterval,
state.maxSkipLevels,
skipListWriter = new DefaultSkipListWriter(skipInterval,
maxSkipLevels,
state.numDocs,
freqOut,
proxOut);
skipInterval = state.skipInterval;
maxSkipLevels = state.maxSkipLevels;
}
@Override
@ -100,6 +112,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@ -218,7 +231,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
}
lastFreqStart = freqStart;
if (df >= skipInterval) {
if (df >= skipMinimum) {
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}