From 14af0cb0f3640ac5773b5533f36d2cb0b76c9c8c Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Tue, 8 Jul 2014 11:36:58 -0400 Subject: [PATCH] Remove Lucene's deprecated PatternTokenizer Instead of using the PatternTokenizer, the functionality was replicated by using Lucene's StopFilter, PatterTokenizer and LowerCaseFilter Closes #6717 --- .../analysis/PatternAnalyzerProvider.java | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/PatternAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/PatternAnalyzerProvider.java index af053b3d47f..5acd1f23fa7 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PatternAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/PatternAnalyzerProvider.java @@ -19,8 +19,12 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopAnalyzer; -import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer; +import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.pattern.PatternTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.Version; @@ -33,15 +37,41 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; +import java.io.Reader; import java.util.regex.Pattern; /** * */ -public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider { +public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final PatternAnalyzer analyzer; + private static final class PatternAnalyzer extends Analyzer { + private final org.apache.lucene.util.Version version; + private final Pattern pattern; + private final boolean lowercase; + private final CharArraySet stopWords; + + PatternAnalyzer(org.apache.lucene.util.Version version, Pattern pattern, boolean lowercase, CharArraySet stopWords) { + this.version = version; + this.pattern = pattern; + this.lowercase = lowercase; + this.stopWords = stopWords; + } + + @Override + protected TokenStreamComponents createComponents(String s, Reader reader) { + final TokenStreamComponents source = new TokenStreamComponents(new PatternTokenizer(reader, pattern, -1)); + TokenStream result = null; + if (lowercase) { + result = new LowerCaseFilter(version, source.getTokenStream()); + } + result = new StopFilter(version, (result == null) ? source.getTokenStream() : result, stopWords); + return new TokenStreamComponents(source.getTokenizer(), result); + } + } + @Inject public PatternAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings);