Remove Lucene's deprecated PatternTokenizer

Instead of using the PatternTokenizer, the functionality was replicated by using Lucene's StopFilter, PatterTokenizer and LowerCaseFilter

Closes #6717
This commit is contained in:
Areek Zillur 2014-07-08 11:36:58 -04:00
parent 867d88795b
commit 14af0cb0f3
1 changed files with 32 additions and 2 deletions

View File

@ -19,8 +19,12 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.Version;
@ -33,15 +37,41 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.io.Reader;
import java.util.regex.Pattern;
/**
*
*/
public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<PatternAnalyzer> {
public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Analyzer> {
private final PatternAnalyzer analyzer;
private static final class PatternAnalyzer extends Analyzer {
private final org.apache.lucene.util.Version version;
private final Pattern pattern;
private final boolean lowercase;
private final CharArraySet stopWords;
PatternAnalyzer(org.apache.lucene.util.Version version, Pattern pattern, boolean lowercase, CharArraySet stopWords) {
this.version = version;
this.pattern = pattern;
this.lowercase = lowercase;
this.stopWords = stopWords;
}
@Override
protected TokenStreamComponents createComponents(String s, Reader reader) {
final TokenStreamComponents source = new TokenStreamComponents(new PatternTokenizer(reader, pattern, -1));
TokenStream result = null;
if (lowercase) {
result = new LowerCaseFilter(version, source.getTokenStream());
}
result = new StopFilter(version, (result == null) ? source.getTokenStream() : result, stopWords);
return new TokenStreamComponents(source.getTokenizer(), result);
}
}
@Inject
public PatternAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);