mirror of https://github.com/apache/lucene.git
LUCENE-2194: Improve the efficiency of Snowball
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@897449 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4fb13e4a59
commit
5e8e5a0f05
|
@ -86,6 +86,9 @@ Optimizations
|
|||
take advantage of this for faster performance.
|
||||
(Steven Rowe, Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-2194: Improve the efficiency of Snowball by not creating 2 new Strings
|
||||
and 1 new StringBuilder for every word. (Robert Muir)
|
||||
|
||||
Test Cases
|
||||
|
||||
* LUCENE-2115: Cutover contrib tests to use Java5 generics. (Kay Kay
|
||||
|
|
|
@ -74,13 +74,16 @@ public final class SnowballFilter extends TokenFilter {
|
|||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
String originalTerm = termAtt.term();
|
||||
stemmer.setCurrent(originalTerm);
|
||||
char termBuffer[] = termAtt.termBuffer();
|
||||
final int length = termAtt.termLength();
|
||||
stemmer.setCurrent(termBuffer, 0, length);
|
||||
stemmer.stem();
|
||||
String finalTerm = stemmer.getCurrent();
|
||||
// Don't bother updating, if it is unchanged.
|
||||
if (!originalTerm.equals(finalTerm))
|
||||
termAtt.setTermBuffer(finalTerm);
|
||||
final StringBuilder finalTerm = stemmer.getCurrentBuffer();
|
||||
final int newLength = finalTerm.length();
|
||||
if (newLength > termBuffer.length)
|
||||
termBuffer = termAtt.resizeTermBuffer(newLength);
|
||||
finalTerm.getChars(0, newLength, termBuffer, 0);
|
||||
termAtt.setTermLength(newLength);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
|
|
@ -77,6 +77,26 @@ public abstract class SnowballProgram {
|
|||
current = new StringBuilder();
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the current string.
|
||||
*/
|
||||
public void setCurrent(char text[], int offset, int length) {
|
||||
current.setLength(0);
|
||||
current.append(text, offset, length);
|
||||
cursor = 0;
|
||||
limit = current.length();
|
||||
limit_backward = 0;
|
||||
bra = cursor;
|
||||
ket = limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current buffer containing the stem
|
||||
*/
|
||||
public StringBuilder getCurrentBuffer() {
|
||||
return current;
|
||||
}
|
||||
|
||||
// current string
|
||||
protected StringBuilder current;
|
||||
|
|
Loading…
Reference in New Issue