mirror of https://github.com/apache/lucene.git
hopefully corrected or at least improved version of skipTo
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150296 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
82dc3d50a3
commit
d00487507e
|
@ -234,6 +234,7 @@ final class SegmentMerger {
|
||||||
private OutputStream freqOutput = null;
|
private OutputStream freqOutput = null;
|
||||||
private OutputStream proxOutput = null;
|
private OutputStream proxOutput = null;
|
||||||
private TermInfosWriter termInfosWriter = null;
|
private TermInfosWriter termInfosWriter = null;
|
||||||
|
private int skipInterval;
|
||||||
private SegmentMergeQueue queue = null;
|
private SegmentMergeQueue queue = null;
|
||||||
|
|
||||||
private final void mergeTerms() throws IOException {
|
private final void mergeTerms() throws IOException {
|
||||||
|
@ -242,6 +243,8 @@ final class SegmentMerger {
|
||||||
proxOutput = directory.createFile(segment + ".prx");
|
proxOutput = directory.createFile(segment + ".prx");
|
||||||
termInfosWriter =
|
termInfosWriter =
|
||||||
new TermInfosWriter(directory, segment, fieldInfos);
|
new TermInfosWriter(directory, segment, fieldInfos);
|
||||||
|
skipInterval = termInfosWriter.skipInterval;
|
||||||
|
queue = new SegmentMergeQueue(readers.size());
|
||||||
|
|
||||||
mergeTermInfos();
|
mergeTermInfos();
|
||||||
|
|
||||||
|
@ -254,7 +257,6 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void mergeTermInfos() throws IOException {
|
private final void mergeTermInfos() throws IOException {
|
||||||
queue = new SegmentMergeQueue(readers.size());
|
|
||||||
int base = 0;
|
int base = 0;
|
||||||
for (int i = 0; i < readers.size(); i++) {
|
for (int i = 0; i < readers.size(); i++) {
|
||||||
IndexReader reader = (IndexReader) readers.elementAt(i);
|
IndexReader reader = (IndexReader) readers.elementAt(i);
|
||||||
|
@ -327,7 +329,6 @@ final class SegmentMerger {
|
||||||
*/
|
*/
|
||||||
private final int appendPostings(SegmentMergeInfo[] smis, int n)
|
private final int appendPostings(SegmentMergeInfo[] smis, int n)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final int skipInterval = termInfosWriter.skipInterval;
|
|
||||||
int lastDoc = 0;
|
int lastDoc = 0;
|
||||||
int df = 0; // number of docs w/ term
|
int df = 0; // number of docs w/ term
|
||||||
resetSkip();
|
resetSkip();
|
||||||
|
|
|
@ -84,6 +84,8 @@ class SegmentTermDocs implements TermDocs {
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
freqStream.close();
|
freqStream.close();
|
||||||
|
if (skipStream != null)
|
||||||
|
skipStream.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int doc() { return doc; }
|
public final int doc() { return doc; }
|
||||||
|
@ -143,7 +145,7 @@ class SegmentTermDocs implements TermDocs {
|
||||||
|
|
||||||
/** Optimized implementation. */
|
/** Optimized implementation. */
|
||||||
public boolean skipTo(int target) throws IOException {
|
public boolean skipTo(int target) throws IOException {
|
||||||
if (df > skipInterval) { // optimized case
|
if (df >= skipInterval) { // optimized case
|
||||||
|
|
||||||
if (skipStream == null)
|
if (skipStream == null)
|
||||||
skipStream = (InputStream) freqStream.clone(); // lazily clone
|
skipStream = (InputStream) freqStream.clone(); // lazily clone
|
||||||
|
|
|
@ -33,6 +33,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
long indexPointer = 0;
|
long indexPointer = 0;
|
||||||
int indexInterval;
|
int indexInterval;
|
||||||
int skipInterval;
|
int skipInterval;
|
||||||
|
private int formatM1SkipInterval;
|
||||||
Term prev;
|
Term prev;
|
||||||
|
|
||||||
private char[] buffer = {};
|
private char[] buffer = {};
|
||||||
|
@ -51,7 +52,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
|
|
||||||
// back-compatible settings
|
// back-compatible settings
|
||||||
indexInterval = 128;
|
indexInterval = 128;
|
||||||
skipInterval = Integer.MAX_VALUE;
|
skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// we have a format version number
|
// we have a format version number
|
||||||
|
@ -63,7 +64,16 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
|
|
||||||
size = input.readLong(); // read the size
|
size = input.readLong(); // read the size
|
||||||
|
|
||||||
|
if(format == -1){
|
||||||
if (!isIndex) {
|
if (!isIndex) {
|
||||||
|
indexInterval = input.readInt();
|
||||||
|
formatM1SkipInterval = input.readInt();
|
||||||
|
}
|
||||||
|
// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
|
||||||
|
// skipTo implementation of these versions
|
||||||
|
skipInterval = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
else{
|
||||||
indexInterval = input.readInt();
|
indexInterval = input.readInt();
|
||||||
skipInterval = input.readInt();
|
skipInterval = input.readInt();
|
||||||
}
|
}
|
||||||
|
@ -108,11 +118,19 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
termInfo.freqPointer += input.readVLong(); // read freq pointer
|
termInfo.freqPointer += input.readVLong(); // read freq pointer
|
||||||
termInfo.proxPointer += input.readVLong(); // read prox pointer
|
termInfo.proxPointer += input.readVLong(); // read prox pointer
|
||||||
|
|
||||||
|
if(format == -1){
|
||||||
|
// just read skipOffset in order to increment file pointer;
|
||||||
|
// value is never used since skipTo is switched off
|
||||||
if (!isIndex) {
|
if (!isIndex) {
|
||||||
if (termInfo.docFreq > skipInterval) {
|
if (termInfo.docFreq > formatM1SkipInterval) {
|
||||||
termInfo.skipOffset = input.readVInt();
|
termInfo.skipOffset = input.readVInt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
if (termInfo.docFreq >= skipInterval)
|
||||||
|
termInfo.skipOffset = input.readVInt();
|
||||||
|
}
|
||||||
|
|
||||||
if (isIndex)
|
if (isIndex)
|
||||||
indexPointer += input.readVLong(); // read index pointer
|
indexPointer += input.readVLong(); // read index pointer
|
||||||
|
|
|
@ -27,13 +27,13 @@ import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
final class TermInfosWriter {
|
final class TermInfosWriter {
|
||||||
/** The file format version, a negative number. */
|
/** The file format version, a negative number. */
|
||||||
public static final int FORMAT = -1;
|
public static final int FORMAT = -2;
|
||||||
|
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
private OutputStream output;
|
private OutputStream output;
|
||||||
private Term lastTerm = new Term("", "");
|
private Term lastTerm = new Term("", "");
|
||||||
private TermInfo lastTi = new TermInfo();
|
private TermInfo lastTi = new TermInfo();
|
||||||
private int size = 0;
|
private long size = 0;
|
||||||
|
|
||||||
// TODO: the default values for these two parameters should be settable from
|
// TODO: the default values for these two parameters should be settable from
|
||||||
// IndexWriter. However, once that's done, folks will start setting them to
|
// IndexWriter. However, once that's done, folks will start setting them to
|
||||||
|
@ -80,11 +80,9 @@ final class TermInfosWriter {
|
||||||
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
|
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
|
||||||
output.writeInt(FORMAT); // write format
|
output.writeInt(FORMAT); // write format
|
||||||
output.writeLong(0); // leave space for size
|
output.writeLong(0); // leave space for size
|
||||||
if (!isIndex) {
|
|
||||||
output.writeInt(indexInterval); // write indexInterval
|
output.writeInt(indexInterval); // write indexInterval
|
||||||
output.writeInt(skipInterval); // write skipInterval
|
output.writeInt(skipInterval); // write skipInterval
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/** Adds a new <Term, TermInfo> pair to the set.
|
/** Adds a new <Term, TermInfo> pair to the set.
|
||||||
Term must be lexicographically greater than all previous Terms added.
|
Term must be lexicographically greater than all previous Terms added.
|
||||||
|
@ -106,11 +104,9 @@ final class TermInfosWriter {
|
||||||
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
|
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
|
||||||
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
|
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
|
||||||
|
|
||||||
if (!isIndex) {
|
if (ti.docFreq >= skipInterval) {
|
||||||
if (ti.docFreq > skipInterval) {
|
|
||||||
output.writeVInt(ti.skipOffset);
|
output.writeVInt(ti.skipOffset);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (isIndex) {
|
if (isIndex) {
|
||||||
output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
|
output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
|
||||||
|
|
Loading…
Reference in New Issue