hopefully corrected or at least improved version of skipTo

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150296 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christoph Goller 2004-04-20 13:47:58 +00:00
parent 82dc3d50a3
commit d00487507e
4 changed files with 38 additions and 21 deletions

View File

@ -234,6 +234,7 @@ final class SegmentMerger {
private OutputStream freqOutput = null; private OutputStream freqOutput = null;
private OutputStream proxOutput = null; private OutputStream proxOutput = null;
private TermInfosWriter termInfosWriter = null; private TermInfosWriter termInfosWriter = null;
private int skipInterval;
private SegmentMergeQueue queue = null; private SegmentMergeQueue queue = null;
private final void mergeTerms() throws IOException { private final void mergeTerms() throws IOException {
@ -242,6 +243,8 @@ final class SegmentMerger {
proxOutput = directory.createFile(segment + ".prx"); proxOutput = directory.createFile(segment + ".prx");
termInfosWriter = termInfosWriter =
new TermInfosWriter(directory, segment, fieldInfos); new TermInfosWriter(directory, segment, fieldInfos);
skipInterval = termInfosWriter.skipInterval;
queue = new SegmentMergeQueue(readers.size());
mergeTermInfos(); mergeTermInfos();
@ -254,7 +257,6 @@ final class SegmentMerger {
} }
private final void mergeTermInfos() throws IOException { private final void mergeTermInfos() throws IOException {
queue = new SegmentMergeQueue(readers.size());
int base = 0; int base = 0;
for (int i = 0; i < readers.size(); i++) { for (int i = 0; i < readers.size(); i++) {
IndexReader reader = (IndexReader) readers.elementAt(i); IndexReader reader = (IndexReader) readers.elementAt(i);
@ -327,7 +329,6 @@ final class SegmentMerger {
*/ */
private final int appendPostings(SegmentMergeInfo[] smis, int n) private final int appendPostings(SegmentMergeInfo[] smis, int n)
throws IOException { throws IOException {
final int skipInterval = termInfosWriter.skipInterval;
int lastDoc = 0; int lastDoc = 0;
int df = 0; // number of docs w/ term int df = 0; // number of docs w/ term
resetSkip(); resetSkip();

View File

@ -84,6 +84,8 @@ class SegmentTermDocs implements TermDocs {
public void close() throws IOException { public void close() throws IOException {
freqStream.close(); freqStream.close();
if (skipStream != null)
skipStream.close();
} }
public final int doc() { return doc; } public final int doc() { return doc; }
@ -143,7 +145,7 @@ class SegmentTermDocs implements TermDocs {
/** Optimized implementation. */ /** Optimized implementation. */
public boolean skipTo(int target) throws IOException { public boolean skipTo(int target) throws IOException {
if (df > skipInterval) { // optimized case if (df >= skipInterval) { // optimized case
if (skipStream == null) if (skipStream == null)
skipStream = (InputStream) freqStream.clone(); // lazily clone skipStream = (InputStream) freqStream.clone(); // lazily clone

View File

@ -33,6 +33,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
long indexPointer = 0; long indexPointer = 0;
int indexInterval; int indexInterval;
int skipInterval; int skipInterval;
private int formatM1SkipInterval;
Term prev; Term prev;
private char[] buffer = {}; private char[] buffer = {};
@ -51,7 +52,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
// back-compatible settings // back-compatible settings
indexInterval = 128; indexInterval = 128;
skipInterval = Integer.MAX_VALUE; skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
} else { } else {
// we have a format version number // we have a format version number
@ -62,8 +63,17 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
throw new IOException("Unknown format version:" + format); throw new IOException("Unknown format version:" + format);
size = input.readLong(); // read the size size = input.readLong(); // read the size
if (!isIndex) { if(format == -1){
if (!isIndex) {
indexInterval = input.readInt();
formatM1SkipInterval = input.readInt();
}
// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
// skipTo implementation of these versions
skipInterval = Integer.MAX_VALUE;
}
else{
indexInterval = input.readInt(); indexInterval = input.readInt();
skipInterval = input.readInt(); skipInterval = input.readInt();
} }
@ -107,13 +117,21 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
termInfo.docFreq = input.readVInt(); // read doc freq termInfo.docFreq = input.readVInt(); // read doc freq
termInfo.freqPointer += input.readVLong(); // read freq pointer termInfo.freqPointer += input.readVLong(); // read freq pointer
termInfo.proxPointer += input.readVLong(); // read prox pointer termInfo.proxPointer += input.readVLong(); // read prox pointer
if (!isIndex) { if(format == -1){
if (termInfo.docFreq > skipInterval) { // just read skipOffset in order to increment file pointer;
termInfo.skipOffset = input.readVInt(); // value is never used since skipTo is switched off
if (!isIndex) {
if (termInfo.docFreq > formatM1SkipInterval) {
termInfo.skipOffset = input.readVInt();
}
} }
} }
else{
if (termInfo.docFreq >= skipInterval)
termInfo.skipOffset = input.readVInt();
}
if (isIndex) if (isIndex)
indexPointer += input.readVLong(); // read index pointer indexPointer += input.readVLong(); // read index pointer

View File

@ -27,13 +27,13 @@ import org.apache.lucene.util.StringHelper;
final class TermInfosWriter { final class TermInfosWriter {
/** The file format version, a negative number. */ /** The file format version, a negative number. */
public static final int FORMAT = -1; public static final int FORMAT = -2;
private FieldInfos fieldInfos; private FieldInfos fieldInfos;
private OutputStream output; private OutputStream output;
private Term lastTerm = new Term("", ""); private Term lastTerm = new Term("", "");
private TermInfo lastTi = new TermInfo(); private TermInfo lastTi = new TermInfo();
private int size = 0; private long size = 0;
// TODO: the default values for these two parameters should be settable from // TODO: the default values for these two parameters should be settable from
// IndexWriter. However, once that's done, folks will start setting them to // IndexWriter. However, once that's done, folks will start setting them to
@ -80,10 +80,8 @@ final class TermInfosWriter {
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis")); output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
output.writeInt(FORMAT); // write format output.writeInt(FORMAT); // write format
output.writeLong(0); // leave space for size output.writeLong(0); // leave space for size
if (!isIndex) { output.writeInt(indexInterval); // write indexInterval
output.writeInt(indexInterval); // write indexInterval output.writeInt(skipInterval); // write skipInterval
output.writeInt(skipInterval); // write skipInterval
}
} }
/** Adds a new <Term, TermInfo> pair to the set. /** Adds a new <Term, TermInfo> pair to the set.
@ -106,10 +104,8 @@ final class TermInfosWriter {
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
output.writeVLong(ti.proxPointer - lastTi.proxPointer); output.writeVLong(ti.proxPointer - lastTi.proxPointer);
if (!isIndex) { if (ti.docFreq >= skipInterval) {
if (ti.docFreq > skipInterval) { output.writeVInt(ti.skipOffset);
output.writeVInt(ti.skipOffset);
}
} }
if (isIndex) { if (isIndex) {