SOLR-1874: Optimize PatternReplaceFilter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932752 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-04-10 16:00:45 +00:00
parent d02cbfe3c1
commit 8804fc542d
2 changed files with 13 additions and 11 deletions

View File

@ -146,6 +146,8 @@ Optimizations
are necessary for the current log level.
(Fuad Efendi and hossman)
* SOLR-1874: Optimize PatternReplaceFilter for better performance. (rmuir, uschindler)
Bug Fixes
----------------------

View File

@ -19,12 +19,11 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.IOException;
import java.nio.CharBuffer;
/**
* A TokenFilter which applies a Pattern to each token in the stream,
@ -43,7 +42,9 @@ public final class PatternReplaceFilter extends TokenFilter {
private final Pattern p;
private final String replacement;
private final boolean all;
private final TermAttribute termAtt;
private final CharTermAttribute termAtt;
private final Matcher m;
/**
* Constructs an instance to replace either the first, or all occurances
*
@ -63,20 +64,19 @@ public final class PatternReplaceFilter extends TokenFilter {
this.p=p;
this.replacement = (null == replacement) ? "" : replacement;
this.all=all;
this.termAtt = addAttribute(TermAttribute.class);
this.termAtt = addAttribute(CharTermAttribute.class);
this.m = p.matcher(termAtt);
}
@Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
CharSequence text = CharBuffer.wrap(termAtt.termBuffer(), 0, termAtt.termLength());
Matcher m = p.matcher(text);
if (all) {
termAtt.setTermBuffer(m.replaceAll(replacement));
} else {
termAtt.setTermBuffer(m.replaceFirst(replacement));
m.reset();
if (m.find()) {
// replaceAll/replaceFirst will reset() this previous find.
String transformed = all ? m.replaceAll(replacement) : m.replaceFirst(replacement);
termAtt.setEmpty().append(transformed);
}
return true;