Remove Lucene's deprecated PatternTokenizer
Instead of using the PatternTokenizer, the functionality was replicated by using Lucene's StopFilter, PatterTokenizer and LowerCaseFilter Closes #6717
This commit is contained in:
parent
867d88795b
commit
14af0cb0f3
|
@ -19,8 +19,12 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.pattern.PatternTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.Version;
|
||||
|
@ -33,15 +37,41 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<PatternAnalyzer> {
|
||||
public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Analyzer> {
|
||||
|
||||
private final PatternAnalyzer analyzer;
|
||||
|
||||
private static final class PatternAnalyzer extends Analyzer {
|
||||
private final org.apache.lucene.util.Version version;
|
||||
private final Pattern pattern;
|
||||
private final boolean lowercase;
|
||||
private final CharArraySet stopWords;
|
||||
|
||||
PatternAnalyzer(org.apache.lucene.util.Version version, Pattern pattern, boolean lowercase, CharArraySet stopWords) {
|
||||
this.version = version;
|
||||
this.pattern = pattern;
|
||||
this.lowercase = lowercase;
|
||||
this.stopWords = stopWords;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String s, Reader reader) {
|
||||
final TokenStreamComponents source = new TokenStreamComponents(new PatternTokenizer(reader, pattern, -1));
|
||||
TokenStream result = null;
|
||||
if (lowercase) {
|
||||
result = new LowerCaseFilter(version, source.getTokenStream());
|
||||
}
|
||||
result = new StopFilter(version, (result == null) ? source.getTokenStream() : result, stopWords);
|
||||
return new TokenStreamComponents(source.getTokenizer(), result);
|
||||
}
|
||||
}
|
||||
|
||||
@Inject
|
||||
public PatternAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
|
|
Loading…
Reference in New Issue