mirror of https://github.com/apache/lucene.git
Optimize term dictionary lookup to allocate fewer terms.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150581 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1f11f2e937
commit
e56faa3097
|
@ -97,6 +97,11 @@ $Id$
|
||||||
21. Add a serializable Parameter Class to standardize parameter enum
|
21. Add a serializable Parameter Class to standardize parameter enum
|
||||||
classes in BooleanClause and Field. (Christoph)
|
classes in BooleanClause and Field. (Christoph)
|
||||||
|
|
||||||
|
22. Optimize term-dictionary lookup to allocate far fewer terms when
|
||||||
|
scanning for the matching term. This speeds searches involving
|
||||||
|
low-frequency terms, where the cost of dictionary lookup can be
|
||||||
|
significant. (cutting)
|
||||||
|
|
||||||
|
|
||||||
1.4.1
|
1.4.1
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,10 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
long size;
|
long size;
|
||||||
long position = -1;
|
long position = -1;
|
||||||
|
|
||||||
private Term term = new Term("", "");
|
private TermBuffer termBuffer = new TermBuffer();
|
||||||
|
private TermBuffer prevBuffer = new TermBuffer();
|
||||||
|
private TermBuffer scratch; // used for scanning
|
||||||
|
|
||||||
private TermInfo termInfo = new TermInfo();
|
private TermInfo termInfo = new TermInfo();
|
||||||
|
|
||||||
private int format;
|
private int format;
|
||||||
|
@ -34,9 +37,6 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
int indexInterval;
|
int indexInterval;
|
||||||
int skipInterval;
|
int skipInterval;
|
||||||
private int formatM1SkipInterval;
|
private int formatM1SkipInterval;
|
||||||
Term prev;
|
|
||||||
|
|
||||||
private char[] buffer = {};
|
|
||||||
|
|
||||||
SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
|
SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -89,7 +89,10 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
|
|
||||||
clone.input = (IndexInput) input.clone();
|
clone.input = (IndexInput) input.clone();
|
||||||
clone.termInfo = new TermInfo(termInfo);
|
clone.termInfo = new TermInfo(termInfo);
|
||||||
if (term != null) clone.growBuffer(term.text.length());
|
|
||||||
|
clone.termBuffer = (TermBuffer)termBuffer.clone();
|
||||||
|
clone.prevBuffer = (TermBuffer)prevBuffer.clone();
|
||||||
|
clone.scratch = null;
|
||||||
|
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
@ -98,21 +101,20 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
input.seek(pointer);
|
input.seek(pointer);
|
||||||
position = p;
|
position = p;
|
||||||
term = t;
|
termBuffer.set(t);
|
||||||
prev = null;
|
prevBuffer.reset();
|
||||||
termInfo.set(ti);
|
termInfo.set(ti);
|
||||||
growBuffer(term.text.length()); // copy term text into buffer
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Increments the enumeration to the next element. True if one exists.*/
|
/** Increments the enumeration to the next element. True if one exists.*/
|
||||||
public final boolean next() throws IOException {
|
public final boolean next() throws IOException {
|
||||||
if (position++ >= size - 1) {
|
if (position++ >= size - 1) {
|
||||||
term = null;
|
termBuffer.reset();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
prev = term;
|
prevBuffer.set(termBuffer);
|
||||||
term = readTerm();
|
termBuffer.read(input, fieldInfos);
|
||||||
|
|
||||||
termInfo.docFreq = input.readVInt(); // read doc freq
|
termInfo.docFreq = input.readVInt(); // read doc freq
|
||||||
termInfo.freqPointer += input.readVLong(); // read freq pointer
|
termInfo.freqPointer += input.readVLong(); // read freq pointer
|
||||||
|
@ -138,28 +140,23 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Term readTerm() throws IOException {
|
/** Optimized scan, without allocating new terms. */
|
||||||
int start = input.readVInt();
|
final void scanTo(Term term) throws IOException {
|
||||||
int length = input.readVInt();
|
if (scratch == null)
|
||||||
int totalLength = start + length;
|
scratch = new TermBuffer();
|
||||||
if (buffer.length < totalLength)
|
scratch.set(term);
|
||||||
growBuffer(totalLength);
|
while (scratch.compareTo(termBuffer) > 0 && next()) {}
|
||||||
|
|
||||||
input.readChars(buffer, start, length);
|
|
||||||
return new Term(fieldInfos.fieldName(input.readVInt()),
|
|
||||||
new String(buffer, 0, totalLength), false);
|
|
||||||
}
|
|
||||||
|
|
||||||
private final void growBuffer(int length) {
|
|
||||||
buffer = new char[length];
|
|
||||||
for (int i = 0; i < term.text.length(); i++) // copy contents
|
|
||||||
buffer[i] = term.text.charAt(i);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the current Term in the enumeration.
|
/** Returns the current Term in the enumeration.
|
||||||
Initially invalid, valid after next() called for the first time.*/
|
Initially invalid, valid after next() called for the first time.*/
|
||||||
public final Term term() {
|
public final Term term() {
|
||||||
return term;
|
return termBuffer.toTerm();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the previous Term enumerated. Initially null.*/
|
||||||
|
final Term prev() {
|
||||||
|
return prevBuffer.toTerm();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the current TermInfo in the enumeration.
|
/** Returns the current TermInfo in the enumeration.
|
||||||
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
|
final class TermBuffer implements Cloneable {
|
||||||
|
private static final char[] NO_CHARS = new char[0];
|
||||||
|
|
||||||
|
private String field;
|
||||||
|
private char[] text = NO_CHARS;
|
||||||
|
private int textLength;
|
||||||
|
private Term term; // cached
|
||||||
|
|
||||||
|
public final int compareTo(TermBuffer other) {
|
||||||
|
if (field == other.field) // fields are interned
|
||||||
|
return compareChars(text, textLength, other.text, other.textLength);
|
||||||
|
else
|
||||||
|
return field.compareTo(other.field);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int compareChars(char[] v1, int len1,
|
||||||
|
char[] v2, int len2) {
|
||||||
|
int end = Math.min(len1, len2);
|
||||||
|
for (int k = 0; k < end; k++) {
|
||||||
|
char c1 = v1[k];
|
||||||
|
char c2 = v2[k];
|
||||||
|
if (c1 != c2) {
|
||||||
|
return c1 - c2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len1 - len2;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void setTextLength(int newLength) {
|
||||||
|
if (text.length < newLength) {
|
||||||
|
char[] newText = new char[newLength];
|
||||||
|
System.arraycopy(text, 0, newText, 0, textLength);
|
||||||
|
text = newText;
|
||||||
|
}
|
||||||
|
textLength = newLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final void read(IndexInput input, FieldInfos fieldInfos)
|
||||||
|
throws IOException {
|
||||||
|
this.term = null; // invalidate cache
|
||||||
|
int start = input.readVInt();
|
||||||
|
int length = input.readVInt();
|
||||||
|
int totalLength = start + length;
|
||||||
|
setTextLength(totalLength);
|
||||||
|
input.readChars(this.text, start, length);
|
||||||
|
this.field = fieldInfos.fieldName(input.readVInt());
|
||||||
|
}
|
||||||
|
|
||||||
|
public final void set(Term term) {
|
||||||
|
if (term == null) {
|
||||||
|
reset();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy text into the buffer
|
||||||
|
setTextLength(term.text().length());
|
||||||
|
term.text().getChars(0, term.text().length(), text, 0);
|
||||||
|
|
||||||
|
this.field = term.field();
|
||||||
|
this.term = term;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final void set(TermBuffer other) {
|
||||||
|
setTextLength(other.textLength);
|
||||||
|
System.arraycopy(other.text, 0, text, 0, textLength);
|
||||||
|
|
||||||
|
this.field = other.field;
|
||||||
|
this.term = other.term;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
this.field = null;
|
||||||
|
this.textLength = 0;
|
||||||
|
this.term = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Term toTerm() {
|
||||||
|
if (field == null) // unset
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if (term == null)
|
||||||
|
term = new Term(field, new String(text, 0, textLength), false);
|
||||||
|
|
||||||
|
return term;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Object clone() {
|
||||||
|
TermBuffer clone = null;
|
||||||
|
try {
|
||||||
|
clone = (TermBuffer)super.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {}
|
||||||
|
|
||||||
|
clone.text = new char[text.length];
|
||||||
|
System.arraycopy(text, 0, clone.text, 0, textLength);
|
||||||
|
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
|
}
|
|
@ -129,7 +129,7 @@ final class TermInfosReader {
|
||||||
// optimize sequential access: first try scanning cached enum w/o seeking
|
// optimize sequential access: first try scanning cached enum w/o seeking
|
||||||
SegmentTermEnum enumerator = getEnum();
|
SegmentTermEnum enumerator = getEnum();
|
||||||
if (enumerator.term() != null // term is at or past current
|
if (enumerator.term() != null // term is at or past current
|
||||||
&& ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
|
&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
|
||||||
|| term.compareTo(enumerator.term()) >= 0)) {
|
|| term.compareTo(enumerator.term()) >= 0)) {
|
||||||
int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
|
int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
|
||||||
if (indexTerms.length == enumOffset // but before end of block
|
if (indexTerms.length == enumOffset // but before end of block
|
||||||
|
@ -145,7 +145,7 @@ final class TermInfosReader {
|
||||||
/** Scans within block for matching term. */
|
/** Scans within block for matching term. */
|
||||||
private final TermInfo scanEnum(Term term) throws IOException {
|
private final TermInfo scanEnum(Term term) throws IOException {
|
||||||
SegmentTermEnum enumerator = getEnum();
|
SegmentTermEnum enumerator = getEnum();
|
||||||
while (term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
|
enumerator.scanTo(term);
|
||||||
if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0)
|
if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0)
|
||||||
return enumerator.termInfo();
|
return enumerator.termInfo();
|
||||||
else
|
else
|
||||||
|
|
Loading…
Reference in New Issue