hopefully corrected or at least improved version of skipTo

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150296 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christoph Goller 2004-04-20 13:47:58 +00:00
parent 82dc3d50a3
commit d00487507e
4 changed files with 38 additions and 21 deletions

View File

@ -234,6 +234,7 @@ final class SegmentMerger {
private OutputStream freqOutput = null;
private OutputStream proxOutput = null;
private TermInfosWriter termInfosWriter = null;
private int skipInterval;
private SegmentMergeQueue queue = null;
private final void mergeTerms() throws IOException {
@ -242,6 +243,8 @@ final class SegmentMerger {
proxOutput = directory.createFile(segment + ".prx");
termInfosWriter =
new TermInfosWriter(directory, segment, fieldInfos);
skipInterval = termInfosWriter.skipInterval;
queue = new SegmentMergeQueue(readers.size());
mergeTermInfos();
@ -254,7 +257,6 @@ final class SegmentMerger {
}
private final void mergeTermInfos() throws IOException {
queue = new SegmentMergeQueue(readers.size());
int base = 0;
for (int i = 0; i < readers.size(); i++) {
IndexReader reader = (IndexReader) readers.elementAt(i);
@ -327,7 +329,6 @@ final class SegmentMerger {
*/
private final int appendPostings(SegmentMergeInfo[] smis, int n)
throws IOException {
final int skipInterval = termInfosWriter.skipInterval;
int lastDoc = 0;
int df = 0; // number of docs w/ term
resetSkip();

View File

@ -84,6 +84,8 @@ class SegmentTermDocs implements TermDocs {
public void close() throws IOException {
freqStream.close();
if (skipStream != null)
skipStream.close();
}
public final int doc() { return doc; }
@ -143,7 +145,7 @@ class SegmentTermDocs implements TermDocs {
/** Optimized implementation. */
public boolean skipTo(int target) throws IOException {
if (df > skipInterval) { // optimized case
if (df >= skipInterval) { // optimized case
if (skipStream == null)
skipStream = (InputStream) freqStream.clone(); // lazily clone

View File

@ -33,6 +33,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
long indexPointer = 0;
int indexInterval;
int skipInterval;
private int formatM1SkipInterval;
Term prev;
private char[] buffer = {};
@ -51,7 +52,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
// back-compatible settings
indexInterval = 128;
skipInterval = Integer.MAX_VALUE;
skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
} else {
// we have a format version number
@ -62,8 +63,17 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
throw new IOException("Unknown format version:" + format);
size = input.readLong(); // read the size
if (!isIndex) {
if(format == -1){
if (!isIndex) {
indexInterval = input.readInt();
formatM1SkipInterval = input.readInt();
}
// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
// skipTo implementation of these versions
skipInterval = Integer.MAX_VALUE;
}
else{
indexInterval = input.readInt();
skipInterval = input.readInt();
}
@ -107,13 +117,21 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
termInfo.docFreq = input.readVInt(); // read doc freq
termInfo.freqPointer += input.readVLong(); // read freq pointer
termInfo.proxPointer += input.readVLong(); // read prox pointer
if (!isIndex) {
if (termInfo.docFreq > skipInterval) {
termInfo.skipOffset = input.readVInt();
if(format == -1){
// just read skipOffset in order to increment file pointer;
// value is never used since skipTo is switched off
if (!isIndex) {
if (termInfo.docFreq > formatM1SkipInterval) {
termInfo.skipOffset = input.readVInt();
}
}
}
else{
if (termInfo.docFreq >= skipInterval)
termInfo.skipOffset = input.readVInt();
}
if (isIndex)
indexPointer += input.readVLong(); // read index pointer

View File

@ -27,13 +27,13 @@ import org.apache.lucene.util.StringHelper;
final class TermInfosWriter {
/** The file format version, a negative number. */
public static final int FORMAT = -1;
public static final int FORMAT = -2;
private FieldInfos fieldInfos;
private OutputStream output;
private Term lastTerm = new Term("", "");
private TermInfo lastTi = new TermInfo();
private int size = 0;
private long size = 0;
// TODO: the default values for these two parameters should be settable from
// IndexWriter. However, once that's done, folks will start setting them to
@ -80,10 +80,8 @@ final class TermInfosWriter {
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
output.writeInt(FORMAT); // write format
output.writeLong(0); // leave space for size
if (!isIndex) {
output.writeInt(indexInterval); // write indexInterval
output.writeInt(skipInterval); // write skipInterval
}
output.writeInt(indexInterval); // write indexInterval
output.writeInt(skipInterval); // write skipInterval
}
/** Adds a new <Term, TermInfo> pair to the set.
@ -106,10 +104,8 @@ final class TermInfosWriter {
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
if (!isIndex) {
if (ti.docFreq > skipInterval) {
output.writeVInt(ti.skipOffset);
}
if (ti.docFreq >= skipInterval) {
output.writeVInt(ti.skipOffset);
}
if (isIndex) {