mirror of https://github.com/apache/lucene.git
LUCENE-1119: small optimization to TermInfosWriter.add to take a char[] instead of Term/String
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@609378 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
26bc874e62
commit
393a1d0575
|
@ -2050,7 +2050,7 @@ final class DocumentsWriter {
|
|||
IndexOutput proxOut)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
||||
final String fieldName = fields[0].fieldInfo.name;
|
||||
final int fieldNumber = fields[0].fieldInfo.number;
|
||||
int numFields = fields.length;
|
||||
|
||||
final FieldMergeState[] mergeStates = new FieldMergeState[numFields];
|
||||
|
@ -2101,9 +2101,6 @@ final class DocumentsWriter {
|
|||
while(text[pos] != 0xffff)
|
||||
pos++;
|
||||
|
||||
// TODO: can we avoid 2 new objects here?
|
||||
Term term = new Term(fieldName, new String(text, start, pos-start));
|
||||
|
||||
long freqPointer = freqOut.getFilePointer();
|
||||
long proxPointer = proxOut.getFilePointer();
|
||||
|
||||
|
@ -2201,7 +2198,7 @@ final class DocumentsWriter {
|
|||
|
||||
// Write term
|
||||
termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
|
||||
termsOut.add(term, termInfo);
|
||||
termsOut.add(fieldNumber, text, start, pos-start, termInfo);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
|
||||
Directory. A TermInfos can be written once, in order. */
|
||||
|
@ -32,9 +31,8 @@ final class TermInfosWriter {
|
|||
|
||||
private FieldInfos fieldInfos;
|
||||
private IndexOutput output;
|
||||
private Term lastTerm = new Term("", "");
|
||||
private TermInfo lastTi = new TermInfo();
|
||||
private long size = 0;
|
||||
private long size;
|
||||
|
||||
// TODO: the default values for these two parameters should be settable from
|
||||
// IndexWriter. However, once that's done, folks will start setting them to
|
||||
|
@ -62,10 +60,15 @@ final class TermInfosWriter {
|
|||
*/
|
||||
int maxSkipLevels = 10;
|
||||
|
||||
private long lastIndexPointer = 0;
|
||||
private boolean isIndex = false;
|
||||
private long lastIndexPointer;
|
||||
private boolean isIndex;
|
||||
private char[] lastTermText = new char[10];
|
||||
private int lastTermTextLength;
|
||||
private int lastFieldNumber = -1;
|
||||
|
||||
private TermInfosWriter other = null;
|
||||
private char[] termTextBuffer = new char[10];
|
||||
|
||||
private TermInfosWriter other;
|
||||
|
||||
TermInfosWriter(Directory directory, String segment, FieldInfos fis,
|
||||
int interval)
|
||||
|
@ -93,25 +96,59 @@ final class TermInfosWriter {
|
|||
output.writeInt(maxSkipLevels); // write maxSkipLevels
|
||||
}
|
||||
|
||||
/** Adds a new <Term, TermInfo> pair to the set.
|
||||
void add(Term term, TermInfo ti) throws IOException {
|
||||
|
||||
final int length = term.text.length();
|
||||
if (termTextBuffer.length < length)
|
||||
termTextBuffer = new char[(int) (length*1.25)];
|
||||
|
||||
term.text.getChars(0, length, termTextBuffer, 0);
|
||||
|
||||
add(fieldInfos.fieldNumber(term.field), termTextBuffer, 0, length, ti);
|
||||
}
|
||||
|
||||
// Currently used only by assert statement
|
||||
private int compareToLastTerm(int fieldNumber, char[] termText, int start, int length) {
|
||||
int pos = 0;
|
||||
|
||||
if (lastFieldNumber != fieldNumber)
|
||||
return fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber));
|
||||
|
||||
while(pos < length && pos < lastTermTextLength) {
|
||||
final char c1 = lastTermText[pos];
|
||||
final char c2 = termText[pos + start];
|
||||
if (c1 < c2)
|
||||
return -1;
|
||||
else if (c1 > c2)
|
||||
return 1;
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (pos < lastTermTextLength)
|
||||
// Last term was longer
|
||||
return 1;
|
||||
else if (pos < length)
|
||||
// Last term was shorter
|
||||
return -1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** Adds a new <<fieldNumber, termText>, TermInfo> pair to the set.
|
||||
Term must be lexicographically greater than all previous Terms added.
|
||||
TermInfo pointers must be positive and greater than all previous.*/
|
||||
final void add(Term term, TermInfo ti)
|
||||
throws CorruptIndexException, IOException {
|
||||
if (!isIndex && term.compareTo(lastTerm) <= 0)
|
||||
throw new CorruptIndexException("term out of order (\"" + term +
|
||||
"\".compareTo(\"" + lastTerm + "\") <= 0)");
|
||||
if (ti.freqPointer < lastTi.freqPointer)
|
||||
throw new CorruptIndexException("freqPointer out of order (" + ti.freqPointer +
|
||||
" < " + lastTi.freqPointer + ")");
|
||||
if (ti.proxPointer < lastTi.proxPointer)
|
||||
throw new CorruptIndexException("proxPointer out of order (" + ti.proxPointer +
|
||||
" < " + lastTi.proxPointer + ")");
|
||||
void add(int fieldNumber, char[] termText, int termTextStart, int termTextLength, TermInfo ti)
|
||||
throws IOException {
|
||||
|
||||
assert compareToLastTerm(fieldNumber, termText, termTextStart, termTextLength) < 0 || (isIndex && termTextLength == 0 && lastTermTextLength == 0);
|
||||
assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")";
|
||||
assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")";
|
||||
|
||||
if (!isIndex && size % indexInterval == 0)
|
||||
other.add(lastTerm, lastTi); // add an index term
|
||||
other.add(lastFieldNumber, lastTermText, 0, lastTermTextLength, lastTi); // add an index term
|
||||
|
||||
writeTerm(fieldNumber, termText, termTextStart, termTextLength); // write term
|
||||
|
||||
writeTerm(term); // write term
|
||||
output.writeVInt(ti.docFreq); // write doc freq
|
||||
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
|
||||
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
|
||||
|
@ -125,28 +162,38 @@ final class TermInfosWriter {
|
|||
lastIndexPointer = other.output.getFilePointer(); // write pointer
|
||||
}
|
||||
|
||||
if (lastTermText.length < termTextLength)
|
||||
lastTermText = new char[(int) (termTextLength*1.25)];
|
||||
System.arraycopy(termText, termTextStart, lastTermText, 0, termTextLength);
|
||||
lastTermTextLength = termTextLength;
|
||||
lastFieldNumber = fieldNumber;
|
||||
|
||||
lastTi.set(ti);
|
||||
size++;
|
||||
}
|
||||
|
||||
private final void writeTerm(Term term)
|
||||
private void writeTerm(int fieldNumber, char[] termText, int termTextStart, int termTextLength)
|
||||
throws IOException {
|
||||
int start = StringHelper.stringDifference(lastTerm.text, term.text);
|
||||
int length = term.text.length() - start;
|
||||
|
||||
// Compute prefix in common with last term:
|
||||
int start = 0;
|
||||
final int limit = termTextLength < lastTermTextLength ? termTextLength : lastTermTextLength;
|
||||
while(start < limit) {
|
||||
if (termText[termTextStart+start] != lastTermText[start])
|
||||
break;
|
||||
start++;
|
||||
}
|
||||
|
||||
int length = termTextLength - start;
|
||||
|
||||
output.writeVInt(start); // write shared prefix length
|
||||
output.writeVInt(length); // write delta length
|
||||
output.writeChars(term.text, start, length); // write delta chars
|
||||
|
||||
output.writeVInt(fieldInfos.fieldNumber(term.field)); // write field num
|
||||
|
||||
lastTerm = term;
|
||||
output.writeChars(termText, start+termTextStart, length); // write delta chars
|
||||
output.writeVInt(fieldNumber); // write field num
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Called to complete TermInfos creation. */
|
||||
final void close() throws IOException {
|
||||
void close() throws IOException {
|
||||
output.seek(4); // write size after format
|
||||
output.writeLong(size);
|
||||
output.close();
|
||||
|
|
Loading…
Reference in New Issue