From d00487507e36b0106f140d6b9f4c0cf561aa0842 Mon Sep 17 00:00:00 2001 From: Christoph Goller Date: Tue, 20 Apr 2004 13:47:58 +0000 Subject: [PATCH] hopefully corrected or at least improved version of skipTo git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150296 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/SegmentMerger.java | 5 +-- .../apache/lucene/index/SegmentTermDocs.java | 4 ++- .../apache/lucene/index/SegmentTermEnum.java | 34 ++++++++++++++----- .../apache/lucene/index/TermInfosWriter.java | 16 ++++----- 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/java/org/apache/lucene/index/SegmentMerger.java b/src/java/org/apache/lucene/index/SegmentMerger.java index a94fa39100c..b631cae6450 100644 --- a/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/src/java/org/apache/lucene/index/SegmentMerger.java @@ -234,6 +234,7 @@ final class SegmentMerger { private OutputStream freqOutput = null; private OutputStream proxOutput = null; private TermInfosWriter termInfosWriter = null; + private int skipInterval; private SegmentMergeQueue queue = null; private final void mergeTerms() throws IOException { @@ -242,6 +243,8 @@ final class SegmentMerger { proxOutput = directory.createFile(segment + ".prx"); termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos); + skipInterval = termInfosWriter.skipInterval; + queue = new SegmentMergeQueue(readers.size()); mergeTermInfos(); @@ -254,7 +257,6 @@ final class SegmentMerger { } private final void mergeTermInfos() throws IOException { - queue = new SegmentMergeQueue(readers.size()); int base = 0; for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.elementAt(i); @@ -327,7 +329,6 @@ final class SegmentMerger { */ private final int appendPostings(SegmentMergeInfo[] smis, int n) throws IOException { - final int skipInterval = termInfosWriter.skipInterval; int lastDoc = 0; int df = 0; // number of docs w/ term resetSkip(); diff --git a/src/java/org/apache/lucene/index/SegmentTermDocs.java b/src/java/org/apache/lucene/index/SegmentTermDocs.java index fb922b98a9e..eba5bcbafb9 100644 --- a/src/java/org/apache/lucene/index/SegmentTermDocs.java +++ b/src/java/org/apache/lucene/index/SegmentTermDocs.java @@ -84,6 +84,8 @@ class SegmentTermDocs implements TermDocs { public void close() throws IOException { freqStream.close(); + if (skipStream != null) + skipStream.close(); } public final int doc() { return doc; } @@ -143,7 +145,7 @@ class SegmentTermDocs implements TermDocs { /** Optimized implementation. */ public boolean skipTo(int target) throws IOException { - if (df > skipInterval) { // optimized case + if (df >= skipInterval) { // optimized case if (skipStream == null) skipStream = (InputStream) freqStream.clone(); // lazily clone diff --git a/src/java/org/apache/lucene/index/SegmentTermEnum.java b/src/java/org/apache/lucene/index/SegmentTermEnum.java index 36f3e2af881..884bcfddfc7 100644 --- a/src/java/org/apache/lucene/index/SegmentTermEnum.java +++ b/src/java/org/apache/lucene/index/SegmentTermEnum.java @@ -33,6 +33,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable { long indexPointer = 0; int indexInterval; int skipInterval; + private int formatM1SkipInterval; Term prev; private char[] buffer = {}; @@ -51,7 +52,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable { // back-compatible settings indexInterval = 128; - skipInterval = Integer.MAX_VALUE; + skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization } else { // we have a format version number @@ -62,8 +63,17 @@ final class SegmentTermEnum extends TermEnum implements Cloneable { throw new IOException("Unknown format version:" + format); size = input.readLong(); // read the size - - if (!isIndex) { + + if(format == -1){ + if (!isIndex) { + indexInterval = input.readInt(); + formatM1SkipInterval = input.readInt(); + } + // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in + // skipTo implementation of these versions + skipInterval = Integer.MAX_VALUE; + } + else{ indexInterval = input.readInt(); skipInterval = input.readInt(); } @@ -107,13 +117,21 @@ final class SegmentTermEnum extends TermEnum implements Cloneable { termInfo.docFreq = input.readVInt(); // read doc freq termInfo.freqPointer += input.readVLong(); // read freq pointer termInfo.proxPointer += input.readVLong(); // read prox pointer - - if (!isIndex) { - if (termInfo.docFreq > skipInterval) { - termInfo.skipOffset = input.readVInt(); + + if(format == -1){ + // just read skipOffset in order to increment file pointer; + // value is never used since skipTo is switched off + if (!isIndex) { + if (termInfo.docFreq > formatM1SkipInterval) { + termInfo.skipOffset = input.readVInt(); + } } } - + else{ + if (termInfo.docFreq >= skipInterval) + termInfo.skipOffset = input.readVInt(); + } + if (isIndex) indexPointer += input.readVLong(); // read index pointer diff --git a/src/java/org/apache/lucene/index/TermInfosWriter.java b/src/java/org/apache/lucene/index/TermInfosWriter.java index 06320c5a91c..a5a73e89234 100644 --- a/src/java/org/apache/lucene/index/TermInfosWriter.java +++ b/src/java/org/apache/lucene/index/TermInfosWriter.java @@ -27,13 +27,13 @@ import org.apache.lucene.util.StringHelper; final class TermInfosWriter { /** The file format version, a negative number. */ - public static final int FORMAT = -1; + public static final int FORMAT = -2; private FieldInfos fieldInfos; private OutputStream output; private Term lastTerm = new Term("", ""); private TermInfo lastTi = new TermInfo(); - private int size = 0; + private long size = 0; // TODO: the default values for these two parameters should be settable from // IndexWriter. However, once that's done, folks will start setting them to @@ -80,10 +80,8 @@ final class TermInfosWriter { output = directory.createFile(segment + (isIndex ? ".tii" : ".tis")); output.writeInt(FORMAT); // write format output.writeLong(0); // leave space for size - if (!isIndex) { - output.writeInt(indexInterval); // write indexInterval - output.writeInt(skipInterval); // write skipInterval - } + output.writeInt(indexInterval); // write indexInterval + output.writeInt(skipInterval); // write skipInterval } /** Adds a new pair to the set. @@ -106,10 +104,8 @@ final class TermInfosWriter { output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.writeVLong(ti.proxPointer - lastTi.proxPointer); - if (!isIndex) { - if (ti.docFreq > skipInterval) { - output.writeVInt(ti.skipOffset); - } + if (ti.docFreq >= skipInterval) { + output.writeVInt(ti.skipOffset); } if (isIndex) {