Handle TokenizerFactory TODOs (#32063)
* Don't replace Replace TokenizerFactory with Supplier, this approach was rejected in #32063 * Remove unused parameter from constructor
This commit is contained in:
parent
a7e477126f
commit
ed3b44fb4c
|
@ -39,7 +39,7 @@ public class CharGroupTokenizerFactory extends AbstractTokenizerFactory{
|
||||||
private boolean tokenizeOnSymbol = false;
|
private boolean tokenizeOnSymbol = false;
|
||||||
|
|
||||||
public CharGroupTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
public CharGroupTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
|
|
||||||
for (final String c : settings.getAsList("tokenize_on_chars")) {
|
for (final String c : settings.getAsList("tokenize_on_chars")) {
|
||||||
if (c == null || c.length() == 0) {
|
if (c == null || c.length() == 0) {
|
||||||
|
|
|
@ -35,7 +35,7 @@ public class ClassicTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final int maxTokenLength;
|
private final int maxTokenLength;
|
||||||
|
|
||||||
ClassicTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
ClassicTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final CharMatcher matcher;
|
private final CharMatcher matcher;
|
||||||
|
|
||||||
EdgeNGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
EdgeNGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||||
this.matcher = parseTokenChars(settings.getAsList("token_chars"));
|
this.matcher = parseTokenChars(settings.getAsList("token_chars"));
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class KeywordTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final int bufferSize;
|
private final int bufferSize;
|
||||||
|
|
||||||
KeywordTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
KeywordTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
bufferSize = settings.getAsInt("buffer_size", 256);
|
bufferSize = settings.getAsInt("buffer_size", 256);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
|
||||||
public class LetterTokenizerFactory extends AbstractTokenizerFactory {
|
public class LetterTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
LetterTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
LetterTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||||
public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory implements MultiTermAwareComponent {
|
public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory implements MultiTermAwareComponent {
|
||||||
|
|
||||||
LowerCaseTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
LowerCaseTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
|
int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
|
||||||
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||||
|
|
|
@ -37,7 +37,7 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final boolean reverse;
|
private final boolean reverse;
|
||||||
|
|
||||||
PathHierarchyTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
PathHierarchyTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
bufferSize = settings.getAsInt("buffer_size", 1024);
|
bufferSize = settings.getAsInt("buffer_size", 1024);
|
||||||
String delimiter = settings.get("delimiter");
|
String delimiter = settings.get("delimiter");
|
||||||
if (delimiter == null) {
|
if (delimiter == null) {
|
||||||
|
|
|
@ -35,7 +35,7 @@ public class PatternTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final int group;
|
private final int group;
|
||||||
|
|
||||||
PatternTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
PatternTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
|
|
||||||
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
|
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
|
||||||
if (sPattern == null) {
|
if (sPattern == null) {
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class SimplePatternSplitTokenizerFactory extends AbstractTokenizerFactory
|
||||||
private final String pattern;
|
private final String pattern;
|
||||||
|
|
||||||
public SimplePatternSplitTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
public SimplePatternSplitTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
|
|
||||||
pattern = settings.get("pattern", "");
|
pattern = settings.get("pattern", "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class SimplePatternTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final String pattern;
|
private final String pattern;
|
||||||
|
|
||||||
public SimplePatternTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
public SimplePatternTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
|
|
||||||
pattern = settings.get("pattern", "");
|
pattern = settings.get("pattern", "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,7 @@ import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
|
||||||
public class ThaiTokenizerFactory extends AbstractTokenizerFactory {
|
public class ThaiTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
ThaiTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
ThaiTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -32,7 +32,7 @@ public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final int maxTokenLength;
|
private final int maxTokenLength;
|
||||||
|
|
||||||
UAX29URLEmailTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
UAX29URLEmailTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ public class WhitespaceTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private Integer maxTokenLength;
|
private Integer maxTokenLength;
|
||||||
|
|
||||||
WhitespaceTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
WhitespaceTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
maxTokenLength = settings.getAsInt(MAX_TOKEN_LENGTH, StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
maxTokenLength = settings.getAsInt(MAX_TOKEN_LENGTH, StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class IcuTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private static final String RULE_FILES = "rule_files";
|
private static final String RULE_FILES = "rule_files";
|
||||||
|
|
||||||
public IcuTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
public IcuTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
config = getIcuConfig(environment, settings);
|
config = getIcuConfig(environment, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private boolean discartPunctuation;
|
private boolean discartPunctuation;
|
||||||
|
|
||||||
public KuromojiTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
public KuromojiTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
mode = getMode(settings);
|
mode = getMode(settings);
|
||||||
userDictionary = getUserDictionary(env, settings);
|
userDictionary = getUserDictionary(env, settings);
|
||||||
discartPunctuation = settings.getAsBoolean("discard_punctuation", true);
|
discartPunctuation = settings.getAsBoolean("discard_punctuation", true);
|
||||||
|
|
|
@ -38,7 +38,7 @@ public class NoriTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final KoreanTokenizer.DecompoundMode decompoundMode;
|
private final KoreanTokenizer.DecompoundMode decompoundMode;
|
||||||
|
|
||||||
public NoriTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
public NoriTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
decompoundMode = getMode(settings);
|
decompoundMode = getMode(settings);
|
||||||
userDictionary = getUserDictionary(env, settings);
|
userDictionary = getUserDictionary(env, settings);
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.elasticsearch.index.IndexSettings;
|
||||||
public class SmartChineseTokenizerTokenizerFactory extends AbstractTokenizerFactory {
|
public class SmartChineseTokenizerTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
public SmartChineseTokenizerTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
public SmartChineseTokenizerTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -27,8 +27,7 @@ import org.elasticsearch.index.IndexSettings;
|
||||||
public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements TokenizerFactory {
|
public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements TokenizerFactory {
|
||||||
protected final Version version;
|
protected final Version version;
|
||||||
|
|
||||||
// TODO drop `String ignored` in a followup
|
public AbstractTokenizerFactory(IndexSettings indexSettings, Settings settings) {
|
||||||
public AbstractTokenizerFactory(IndexSettings indexSettings, String ignored, Settings settings) {
|
|
||||||
super(indexSettings);
|
super(indexSettings);
|
||||||
this.version = Analysis.parseAnalysisVersion(this.indexSettings.getSettings(), settings, logger);
|
this.version = Analysis.parseAnalysisVersion(this.indexSettings.getSettings(), settings, logger);
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class StandardTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final int maxTokenLength;
|
private final int maxTokenLength;
|
||||||
|
|
||||||
public StandardTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
public StandardTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,6 @@ package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
|
||||||
public interface TokenizerFactory { // TODO replace with Supplier<Tokenizer>
|
public interface TokenizerFactory {
|
||||||
Tokenizer create();
|
Tokenizer create();
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
|
||||||
public class MlClassicTokenizerFactory extends AbstractTokenizerFactory {
|
public class MlClassicTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
public MlClassicTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
public MlClassicTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue