LUCENE-2194: Improve the efficiency of Snowball

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@897449 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-01-09 13:34:11 +00:00
parent 4fb13e4a59
commit 5e8e5a0f05
3 changed files with 32 additions and 6 deletions

View File

@ -86,6 +86,9 @@ Optimizations
take advantage of this for faster performance.
(Steven Rowe, Uwe Schindler, Robert Muir)
* LUCENE-2194: Improve the efficiency of Snowball by not creating 2 new Strings
and 1 new StringBuilder for every word. (Robert Muir)
Test Cases
* LUCENE-2115: Cutover contrib tests to use Java5 generics. (Kay Kay

View File

@ -74,13 +74,16 @@ public final class SnowballFilter extends TokenFilter {
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
String originalTerm = termAtt.term();
stemmer.setCurrent(originalTerm);
char termBuffer[] = termAtt.termBuffer();
final int length = termAtt.termLength();
stemmer.setCurrent(termBuffer, 0, length);
stemmer.stem();
String finalTerm = stemmer.getCurrent();
// Don't bother updating, if it is unchanged.
if (!originalTerm.equals(finalTerm))
termAtt.setTermBuffer(finalTerm);
final StringBuilder finalTerm = stemmer.getCurrentBuffer();
final int newLength = finalTerm.length();
if (newLength > termBuffer.length)
termBuffer = termAtt.resizeTermBuffer(newLength);
finalTerm.getChars(0, newLength, termBuffer, 0);
termAtt.setTermLength(newLength);
return true;
} else {
return false;

View File

@ -77,6 +77,26 @@ public abstract class SnowballProgram {
current = new StringBuilder();
return result;
}
/**
* Set the current string.
*/
public void setCurrent(char text[], int offset, int length) {
current.setLength(0);
current.append(text, offset, length);
cursor = 0;
limit = current.length();
limit_backward = 0;
bra = cursor;
ket = limit;
}
/**
* Get the current buffer containing the stem
*/
public StringBuilder getCurrentBuffer() {
return current;
}
// current string
protected StringBuilder current;