mirror of https://github.com/apache/lucene.git
hopefully corrected or at least improved version of skipTo
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150296 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
82dc3d50a3
commit
d00487507e
|
@ -234,6 +234,7 @@ final class SegmentMerger {
|
|||
private OutputStream freqOutput = null;
|
||||
private OutputStream proxOutput = null;
|
||||
private TermInfosWriter termInfosWriter = null;
|
||||
private int skipInterval;
|
||||
private SegmentMergeQueue queue = null;
|
||||
|
||||
private final void mergeTerms() throws IOException {
|
||||
|
@ -242,6 +243,8 @@ final class SegmentMerger {
|
|||
proxOutput = directory.createFile(segment + ".prx");
|
||||
termInfosWriter =
|
||||
new TermInfosWriter(directory, segment, fieldInfos);
|
||||
skipInterval = termInfosWriter.skipInterval;
|
||||
queue = new SegmentMergeQueue(readers.size());
|
||||
|
||||
mergeTermInfos();
|
||||
|
||||
|
@ -254,7 +257,6 @@ final class SegmentMerger {
|
|||
}
|
||||
|
||||
private final void mergeTermInfos() throws IOException {
|
||||
queue = new SegmentMergeQueue(readers.size());
|
||||
int base = 0;
|
||||
for (int i = 0; i < readers.size(); i++) {
|
||||
IndexReader reader = (IndexReader) readers.elementAt(i);
|
||||
|
@ -327,7 +329,6 @@ final class SegmentMerger {
|
|||
*/
|
||||
private final int appendPostings(SegmentMergeInfo[] smis, int n)
|
||||
throws IOException {
|
||||
final int skipInterval = termInfosWriter.skipInterval;
|
||||
int lastDoc = 0;
|
||||
int df = 0; // number of docs w/ term
|
||||
resetSkip();
|
||||
|
|
|
@ -84,6 +84,8 @@ class SegmentTermDocs implements TermDocs {
|
|||
|
||||
public void close() throws IOException {
|
||||
freqStream.close();
|
||||
if (skipStream != null)
|
||||
skipStream.close();
|
||||
}
|
||||
|
||||
public final int doc() { return doc; }
|
||||
|
@ -143,7 +145,7 @@ class SegmentTermDocs implements TermDocs {
|
|||
|
||||
/** Optimized implementation. */
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
if (df > skipInterval) { // optimized case
|
||||
if (df >= skipInterval) { // optimized case
|
||||
|
||||
if (skipStream == null)
|
||||
skipStream = (InputStream) freqStream.clone(); // lazily clone
|
||||
|
|
|
@ -33,6 +33,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
|||
long indexPointer = 0;
|
||||
int indexInterval;
|
||||
int skipInterval;
|
||||
private int formatM1SkipInterval;
|
||||
Term prev;
|
||||
|
||||
private char[] buffer = {};
|
||||
|
@ -51,7 +52,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
|||
|
||||
// back-compatible settings
|
||||
indexInterval = 128;
|
||||
skipInterval = Integer.MAX_VALUE;
|
||||
skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
|
||||
|
||||
} else {
|
||||
// we have a format version number
|
||||
|
@ -63,7 +64,16 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
|||
|
||||
size = input.readLong(); // read the size
|
||||
|
||||
if (!isIndex) {
|
||||
if(format == -1){
|
||||
if (!isIndex) {
|
||||
indexInterval = input.readInt();
|
||||
formatM1SkipInterval = input.readInt();
|
||||
}
|
||||
// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
|
||||
// skipTo implementation of these versions
|
||||
skipInterval = Integer.MAX_VALUE;
|
||||
}
|
||||
else{
|
||||
indexInterval = input.readInt();
|
||||
skipInterval = input.readInt();
|
||||
}
|
||||
|
@ -108,11 +118,19 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
|||
termInfo.freqPointer += input.readVLong(); // read freq pointer
|
||||
termInfo.proxPointer += input.readVLong(); // read prox pointer
|
||||
|
||||
if (!isIndex) {
|
||||
if (termInfo.docFreq > skipInterval) {
|
||||
termInfo.skipOffset = input.readVInt();
|
||||
if(format == -1){
|
||||
// just read skipOffset in order to increment file pointer;
|
||||
// value is never used since skipTo is switched off
|
||||
if (!isIndex) {
|
||||
if (termInfo.docFreq > formatM1SkipInterval) {
|
||||
termInfo.skipOffset = input.readVInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
if (termInfo.docFreq >= skipInterval)
|
||||
termInfo.skipOffset = input.readVInt();
|
||||
}
|
||||
|
||||
if (isIndex)
|
||||
indexPointer += input.readVLong(); // read index pointer
|
||||
|
|
|
@ -27,13 +27,13 @@ import org.apache.lucene.util.StringHelper;
|
|||
|
||||
final class TermInfosWriter {
|
||||
/** The file format version, a negative number. */
|
||||
public static final int FORMAT = -1;
|
||||
public static final int FORMAT = -2;
|
||||
|
||||
private FieldInfos fieldInfos;
|
||||
private OutputStream output;
|
||||
private Term lastTerm = new Term("", "");
|
||||
private TermInfo lastTi = new TermInfo();
|
||||
private int size = 0;
|
||||
private long size = 0;
|
||||
|
||||
// TODO: the default values for these two parameters should be settable from
|
||||
// IndexWriter. However, once that's done, folks will start setting them to
|
||||
|
@ -80,10 +80,8 @@ final class TermInfosWriter {
|
|||
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
|
||||
output.writeInt(FORMAT); // write format
|
||||
output.writeLong(0); // leave space for size
|
||||
if (!isIndex) {
|
||||
output.writeInt(indexInterval); // write indexInterval
|
||||
output.writeInt(skipInterval); // write skipInterval
|
||||
}
|
||||
output.writeInt(indexInterval); // write indexInterval
|
||||
output.writeInt(skipInterval); // write skipInterval
|
||||
}
|
||||
|
||||
/** Adds a new <Term, TermInfo> pair to the set.
|
||||
|
@ -106,10 +104,8 @@ final class TermInfosWriter {
|
|||
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
|
||||
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
|
||||
|
||||
if (!isIndex) {
|
||||
if (ti.docFreq > skipInterval) {
|
||||
output.writeVInt(ti.skipOffset);
|
||||
}
|
||||
if (ti.docFreq >= skipInterval) {
|
||||
output.writeVInt(ti.skipOffset);
|
||||
}
|
||||
|
||||
if (isIndex) {
|
||||
|
|
Loading…
Reference in New Issue