LUCENE-1119: small optimization to TermInfosWriter.add to take a char[] instead of Term/String

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@609378 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-01-06 19:29:45 +00:00
parent 26bc874e62
commit 393a1d0575
2 changed files with 81 additions and 37 deletions

View File

@ -2050,7 +2050,7 @@ final class DocumentsWriter {
IndexOutput proxOut) IndexOutput proxOut)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
final String fieldName = fields[0].fieldInfo.name; final int fieldNumber = fields[0].fieldInfo.number;
int numFields = fields.length; int numFields = fields.length;
final FieldMergeState[] mergeStates = new FieldMergeState[numFields]; final FieldMergeState[] mergeStates = new FieldMergeState[numFields];
@ -2101,9 +2101,6 @@ final class DocumentsWriter {
while(text[pos] != 0xffff) while(text[pos] != 0xffff)
pos++; pos++;
// TODO: can we avoid 2 new objects here?
Term term = new Term(fieldName, new String(text, start, pos-start));
long freqPointer = freqOut.getFilePointer(); long freqPointer = freqOut.getFilePointer();
long proxPointer = proxOut.getFilePointer(); long proxPointer = proxOut.getFilePointer();
@ -2201,7 +2198,7 @@ final class DocumentsWriter {
// Write term // Write term
termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
termsOut.add(term, termInfo); termsOut.add(fieldNumber, text, start, pos-start, termInfo);
} }
} }

View File

@ -21,7 +21,6 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.StringHelper;
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
Directory. A TermInfos can be written once, in order. */ Directory. A TermInfos can be written once, in order. */
@ -32,9 +31,8 @@ final class TermInfosWriter {
private FieldInfos fieldInfos; private FieldInfos fieldInfos;
private IndexOutput output; private IndexOutput output;
private Term lastTerm = new Term("", "");
private TermInfo lastTi = new TermInfo(); private TermInfo lastTi = new TermInfo();
private long size = 0; private long size;
// TODO: the default values for these two parameters should be settable from // TODO: the default values for these two parameters should be settable from
// IndexWriter. However, once that's done, folks will start setting them to // IndexWriter. However, once that's done, folks will start setting them to
@ -62,10 +60,15 @@ final class TermInfosWriter {
*/ */
int maxSkipLevels = 10; int maxSkipLevels = 10;
private long lastIndexPointer = 0; private long lastIndexPointer;
private boolean isIndex = false; private boolean isIndex;
private char[] lastTermText = new char[10];
private int lastTermTextLength;
private int lastFieldNumber = -1;
private TermInfosWriter other = null; private char[] termTextBuffer = new char[10];
private TermInfosWriter other;
TermInfosWriter(Directory directory, String segment, FieldInfos fis, TermInfosWriter(Directory directory, String segment, FieldInfos fis,
int interval) int interval)
@ -93,25 +96,59 @@ final class TermInfosWriter {
output.writeInt(maxSkipLevels); // write maxSkipLevels output.writeInt(maxSkipLevels); // write maxSkipLevels
} }
/** Adds a new <Term, TermInfo> pair to the set. void add(Term term, TermInfo ti) throws IOException {
final int length = term.text.length();
if (termTextBuffer.length < length)
termTextBuffer = new char[(int) (length*1.25)];
term.text.getChars(0, length, termTextBuffer, 0);
add(fieldInfos.fieldNumber(term.field), termTextBuffer, 0, length, ti);
}
// Currently used only by assert statement
private int compareToLastTerm(int fieldNumber, char[] termText, int start, int length) {
int pos = 0;
if (lastFieldNumber != fieldNumber)
return fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber));
while(pos < length && pos < lastTermTextLength) {
final char c1 = lastTermText[pos];
final char c2 = termText[pos + start];
if (c1 < c2)
return -1;
else if (c1 > c2)
return 1;
pos++;
}
if (pos < lastTermTextLength)
// Last term was longer
return 1;
else if (pos < length)
// Last term was shorter
return -1;
else
return 0;
}
/** Adds a new <<fieldNumber, termText>, TermInfo> pair to the set.
Term must be lexicographically greater than all previous Terms added. Term must be lexicographically greater than all previous Terms added.
TermInfo pointers must be positive and greater than all previous.*/ TermInfo pointers must be positive and greater than all previous.*/
final void add(Term term, TermInfo ti) void add(int fieldNumber, char[] termText, int termTextStart, int termTextLength, TermInfo ti)
throws CorruptIndexException, IOException { throws IOException {
if (!isIndex && term.compareTo(lastTerm) <= 0)
throw new CorruptIndexException("term out of order (\"" + term + assert compareToLastTerm(fieldNumber, termText, termTextStart, termTextLength) < 0 || (isIndex && termTextLength == 0 && lastTermTextLength == 0);
"\".compareTo(\"" + lastTerm + "\") <= 0)"); assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")";
if (ti.freqPointer < lastTi.freqPointer) assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")";
throw new CorruptIndexException("freqPointer out of order (" + ti.freqPointer +
" < " + lastTi.freqPointer + ")");
if (ti.proxPointer < lastTi.proxPointer)
throw new CorruptIndexException("proxPointer out of order (" + ti.proxPointer +
" < " + lastTi.proxPointer + ")");
if (!isIndex && size % indexInterval == 0) if (!isIndex && size % indexInterval == 0)
other.add(lastTerm, lastTi); // add an index term other.add(lastFieldNumber, lastTermText, 0, lastTermTextLength, lastTi); // add an index term
writeTerm(fieldNumber, termText, termTextStart, termTextLength); // write term
writeTerm(term); // write term
output.writeVInt(ti.docFreq); // write doc freq output.writeVInt(ti.docFreq); // write doc freq
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
output.writeVLong(ti.proxPointer - lastTi.proxPointer); output.writeVLong(ti.proxPointer - lastTi.proxPointer);
@ -125,28 +162,38 @@ final class TermInfosWriter {
lastIndexPointer = other.output.getFilePointer(); // write pointer lastIndexPointer = other.output.getFilePointer(); // write pointer
} }
if (lastTermText.length < termTextLength)
lastTermText = new char[(int) (termTextLength*1.25)];
System.arraycopy(termText, termTextStart, lastTermText, 0, termTextLength);
lastTermTextLength = termTextLength;
lastFieldNumber = fieldNumber;
lastTi.set(ti); lastTi.set(ti);
size++; size++;
} }
private final void writeTerm(Term term) private void writeTerm(int fieldNumber, char[] termText, int termTextStart, int termTextLength)
throws IOException { throws IOException {
int start = StringHelper.stringDifference(lastTerm.text, term.text);
int length = term.text.length() - start;
output.writeVInt(start); // write shared prefix length // Compute prefix in common with last term:
int start = 0;
final int limit = termTextLength < lastTermTextLength ? termTextLength : lastTermTextLength;
while(start < limit) {
if (termText[termTextStart+start] != lastTermText[start])
break;
start++;
}
int length = termTextLength - start;
output.writeVInt(start); // write shared prefix length
output.writeVInt(length); // write delta length output.writeVInt(length); // write delta length
output.writeChars(term.text, start, length); // write delta chars output.writeChars(termText, start+termTextStart, length); // write delta chars
output.writeVInt(fieldNumber); // write field num
output.writeVInt(fieldInfos.fieldNumber(term.field)); // write field num
lastTerm = term;
} }
/** Called to complete TermInfos creation. */ /** Called to complete TermInfos creation. */
final void close() throws IOException { void close() throws IOException {
output.seek(4); // write size after format output.seek(4); // write size after format
output.writeLong(size); output.writeLong(size);
output.close(); output.close();