Add name() method to TokenizerFactory (#43909)

This brings TokenizerFactory into line with CharFilterFactory and TokenFilterFactory,
and removes the need to pass around tokenizer names when building custom analyzers.

As this means that TokenizerFactory is no longer a functional interface, the commit also
adds a factory method to TokenizerFactory to make construction simpler.
This commit is contained in:
Alan Woodward 2019-07-04 11:23:27 +01:00
parent 1b6109517a
commit 4b99255fed
43 changed files with 130 additions and 106 deletions

View File

@ -0,0 +1,30 @@
[[breaking-changes-7.4]]
== Breaking changes in 7.4
++++
<titleabbrev>7.4</titleabbrev>
++++
This section discusses the changes that you need to be aware of when migrating
your application to Elasticsearch 7.4.
See also <<release-highlights>> and <<es-release-notes>>.
coming[7.4.0]
//NOTE: The notable-breaking-changes tagged regions are re-used in the
//Installation and Upgrade Guide
//tag::notable-breaking-changes[]
// end::notable-breaking-changes[]
[[breaking_74_plugin_changes]]
=== Plugins changes
[float]
==== TokenizerFactory changes
TokenizerFactory now has a `name()` method that must be implemented. Most
plugin-provided TokenizerFactory implementations will extend `AbstractTokenizerFactory`,
which now takes a `name` parameter in its constructor.

View File

@ -39,7 +39,7 @@ public class CharGroupTokenizerFactory extends AbstractTokenizerFactory{
private boolean tokenizeOnSymbol = false;
public CharGroupTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
for (final String c : settings.getAsList("tokenize_on_chars")) {
if (c == null || c.length() == 0) {

View File

@ -35,7 +35,7 @@ public class ClassicTokenizerFactory extends AbstractTokenizerFactory {
private final int maxTokenLength;
ClassicTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
}

View File

@ -36,7 +36,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
private final CharMatcher matcher;
EdgeNGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
this.matcher = parseTokenChars(settings.getAsList("token_chars"));

View File

@ -31,7 +31,7 @@ public class KeywordTokenizerFactory extends AbstractTokenizerFactory {
private final int bufferSize;
KeywordTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
bufferSize = settings.getAsInt("buffer_size", 256);
}

View File

@ -29,7 +29,7 @@ import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
public class LetterTokenizerFactory extends AbstractTokenizerFactory {
LetterTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
}
@Override

View File

@ -85,7 +85,7 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
}
NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);

View File

@ -37,7 +37,7 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory {
private final boolean reverse;
PathHierarchyTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
bufferSize = settings.getAsInt("buffer_size", 1024);
String delimiter = settings.get("delimiter");
if (delimiter == null) {

View File

@ -35,7 +35,7 @@ public class PatternTokenizerFactory extends AbstractTokenizerFactory {
private final int group;
PatternTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
if (sPattern == null) {

View File

@ -31,7 +31,7 @@ public class SimplePatternSplitTokenizerFactory extends AbstractTokenizerFactory
private final String pattern;
public SimplePatternSplitTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
pattern = settings.get("pattern", "");
}

View File

@ -31,7 +31,7 @@ public class SimplePatternTokenizerFactory extends AbstractTokenizerFactory {
private final String pattern;
public SimplePatternTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
pattern = settings.get("pattern", "");
}

View File

@ -116,7 +116,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
List<TokenFilterFactory> tokenFilters, Function<String, TokenFilterFactory> allFilters) {
return new CustomAnalyzer("synonyms", tokenizer, charFilters.toArray(new CharFilterFactory[0]),
return new CustomAnalyzer(tokenizer, charFilters.toArray(new CharFilterFactory[0]),
tokenFilters.stream()
.map(TokenFilterFactory::getSynonymFilter)
.toArray(TokenFilterFactory[]::new));

View File

@ -32,7 +32,7 @@ import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
public class ThaiTokenizerFactory extends AbstractTokenizerFactory {
ThaiTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
}
@Override

View File

@ -32,7 +32,7 @@ public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory {
private final int maxTokenLength;
UAX29URLEmailTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
}

View File

@ -34,7 +34,7 @@ public class WhitespaceTokenizerFactory extends AbstractTokenizerFactory {
private Integer maxTokenLength;
WhitespaceTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
maxTokenLength = settings.getAsInt(MAX_TOKEN_LENGTH, StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
}

View File

@ -30,7 +30,7 @@ import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
public class XLowerCaseTokenizerFactory extends AbstractTokenizerFactory {
public XLowerCaseTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
}
@Override

View File

@ -19,7 +19,7 @@
tokenizer:
type: keyword
- length: { detail.tokenizer.tokens: 1 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__keyword }
- match: { detail.tokenizer.tokens.0.token: Foo Bar! }
---
@ -48,7 +48,7 @@
type: simple_pattern
pattern: "[abcdef0123456789]{4}"
- length: { detail.tokenizer.tokens: 2 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__simple_pattern }
- match: { detail.tokenizer.tokens.0.token: a6bf }
- match: { detail.tokenizer.tokens.1.token: ff61 }
@ -63,7 +63,7 @@
type: simple_pattern_split
pattern: ==
- length: { detail.tokenizer.tokens: 2 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__simple_pattern_split }
- match: { detail.tokenizer.tokens.0.token: foo }
- match: { detail.tokenizer.tokens.1.token: bar }
@ -77,7 +77,7 @@
tokenizer:
type: thai
- length: { detail.tokenizer.tokens: 2 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__thai }
- match: { detail.tokenizer.tokens.0.token: ภาษา }
- match: { detail.tokenizer.tokens.1.token: ไทย }
@ -104,7 +104,7 @@
min_gram: 3
max_gram: 3
- length: { detail.tokenizer.tokens: 4 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__ngram }
- match: { detail.tokenizer.tokens.0.token: foo }
- match: { detail.tokenizer.tokens.1.token: oob }
- match: { detail.tokenizer.tokens.2.token: oba }
@ -120,7 +120,7 @@
min_gram: 3
max_gram: 3
- length: { detail.tokenizer.tokens: 4 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__ngram }
- match: { detail.tokenizer.tokens.0.token: foo }
- match: { detail.tokenizer.tokens.1.token: oob }
- match: { detail.tokenizer.tokens.2.token: oba }
@ -166,7 +166,7 @@
min_gram: 1
max_gram: 3
- length: { detail.tokenizer.tokens: 3 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__edge_ngram }
- match: { detail.tokenizer.tokens.0.token: f }
- match: { detail.tokenizer.tokens.1.token: fo }
- match: { detail.tokenizer.tokens.2.token: foo }
@ -181,7 +181,7 @@
min_gram: 1
max_gram: 3
- length: { detail.tokenizer.tokens: 3 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__edge_ngram }
- match: { detail.tokenizer.tokens.0.token: f }
- match: { detail.tokenizer.tokens.1.token: fo }
- match: { detail.tokenizer.tokens.2.token: foo }
@ -218,7 +218,7 @@
tokenizer:
type: classic
- length: { detail.tokenizer.tokens: 4 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__classic }
- match: { detail.tokenizer.tokens.0.token: Brown }
- match: { detail.tokenizer.tokens.1.token: Foxes }
- match: { detail.tokenizer.tokens.2.token: don't }
@ -247,7 +247,7 @@
tokenizer:
type: letter
- length: { detail.tokenizer.tokens: 5 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__letter }
- match: { detail.tokenizer.tokens.0.token: Brown }
- match: { detail.tokenizer.tokens.1.token: Foxes }
- match: { detail.tokenizer.tokens.2.token: don }
@ -278,7 +278,7 @@
tokenizer:
type: lowercase
- length: { detail.tokenizer.tokens: 5 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__lowercase }
- match: { detail.tokenizer.tokens.0.token: brown }
- match: { detail.tokenizer.tokens.1.token: foxes }
- match: { detail.tokenizer.tokens.2.token: don }
@ -309,7 +309,7 @@
tokenizer:
type: path_hierarchy
- length: { detail.tokenizer.tokens: 3 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__path_hierarchy }
- match: { detail.tokenizer.tokens.0.token: a }
- match: { detail.tokenizer.tokens.1.token: a/b }
- match: { detail.tokenizer.tokens.2.token: a/b/c }
@ -322,7 +322,7 @@
tokenizer:
type: PathHierarchy
- length: { detail.tokenizer.tokens: 3 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__PathHierarchy }
- match: { detail.tokenizer.tokens.0.token: a }
- match: { detail.tokenizer.tokens.1.token: a/b }
- match: { detail.tokenizer.tokens.2.token: a/b/c }
@ -361,7 +361,7 @@
tokenizer:
type: pattern
- length: { detail.tokenizer.tokens: 5 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__pattern }
- match: { detail.tokenizer.tokens.0.token: split }
- match: { detail.tokenizer.tokens.1.token: by }
- match: { detail.tokenizer.tokens.2.token: whitespace }
@ -392,7 +392,7 @@
tokenizer:
type: uax_url_email
- length: { detail.tokenizer.tokens: 4 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__uax_url_email }
- match: { detail.tokenizer.tokens.0.token: Email }
- match: { detail.tokenizer.tokens.1.token: me }
- match: { detail.tokenizer.tokens.2.token: at }
@ -421,7 +421,7 @@
tokenizer:
type: whitespace
- length: { detail.tokenizer.tokens: 3 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.name: __anonymous__whitespace }
- match: { detail.tokenizer.tokens.0.token: split }
- match: { detail.tokenizer.tokens.1.token: by }
- match: { detail.tokenizer.tokens.2.token: whitespace }

View File

@ -106,7 +106,7 @@
- length: { detail.tokenizer.tokens: 1 }
- length: { detail.tokenfilters.0.tokens: 1 }
- match: { detail.tokenizer.name: keyword_for_normalizer }
- match: { detail.tokenizer.name: keyword }
- match: { detail.tokenizer.tokens.0.token: ABc }
- match: { detail.tokenfilters.0.name: lowercase }
- match: { detail.tokenfilters.0.tokens.0.token: abc }

View File

@ -47,7 +47,7 @@ public class IcuTokenizerFactory extends AbstractTokenizerFactory {
private static final String RULE_FILES = "rule_files";
public IcuTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
config = getIcuConfig(environment, settings);
}

View File

@ -45,7 +45,7 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
private boolean discartPunctuation;
public KuromojiTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
mode = getMode(settings);
userDictionary = getUserDictionary(env, settings);
discartPunctuation = settings.getAsBoolean("discard_punctuation", true);

View File

@ -41,7 +41,7 @@ public class NoriTokenizerFactory extends AbstractTokenizerFactory {
private final KoreanTokenizer.DecompoundMode decompoundMode;
public NoriTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
decompoundMode = getMode(settings);
userDictionary = getUserDictionary(env, settings);
}

View File

@ -28,7 +28,7 @@ import org.elasticsearch.index.IndexSettings;
public class SmartChineseTokenizerTokenizerFactory extends AbstractTokenizerFactory {
public SmartChineseTokenizerTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
}
@Override

View File

@ -278,7 +278,6 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeAc
CharFilterFactory[] charFilterFactories = components.getCharFilters();
TokenizerFactory tokenizerFactory = components.getTokenizerFactory();
TokenFilterFactory[] tokenFilterFactories = components.getTokenFilters();
String tokenizerName = components.getTokenizerName();
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
@ -338,7 +337,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeAc
}
}
detailResponse = new AnalyzeAction.DetailAnalyzeResponse(charFilteredLists,
new AnalyzeAction.AnalyzeTokenList(tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
new AnalyzeAction.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()),
tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {

View File

@ -26,13 +26,20 @@ import org.elasticsearch.index.IndexSettings;
public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements TokenizerFactory {
protected final Version version;
private final String name;
public AbstractTokenizerFactory(IndexSettings indexSettings, Settings settings) {
public AbstractTokenizerFactory(IndexSettings indexSettings, Settings settings, String name) {
super(indexSettings);
this.version = Analysis.parseAnalysisVersion(this.indexSettings.getSettings(), settings, logger);
this.name = name;
}
public final Version version() {
return version;
}
@Override
public String name() {
return name;
}
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
@ -251,11 +252,7 @@ public final class AnalysisRegistry implements Closeable {
tokenFilterFactories.add(tff);
}
String tokenizerName = tokenizer.name == null ? "_anonymous_tokenizer" : tokenizer.name;
if (normalizer) {
tokenizerName = "keyword_for_normalizer";
}
Analyzer analyzer = new CustomAnalyzer(tokenizerName, tokenizerFactory,
Analyzer analyzer = new CustomAnalyzer(tokenizerFactory,
charFilterFactories.toArray(new CharFilterFactory[]{}),
tokenFilterFactories.toArray(new TokenFilterFactory[]{}));
return produceAnalyzer("__custom__", new AnalyzerProvider<Analyzer>() {
@ -537,10 +534,12 @@ public final class AnalysisRegistry implements Closeable {
});
}
for (Map.Entry<String, AnalyzerProvider<?>> entry : normalizerProviders.entrySet()) {
processNormalizerFactory(entry.getKey(), entry.getValue(), normalizers, "keyword",
tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories);
processNormalizerFactory(entry.getKey(), entry.getValue(), normalizers,
TokenizerFactory.newFactory("keyword", KeywordTokenizer::new),
tokenFilterFactoryFactories, charFilterFactoryFactories);
processNormalizerFactory(entry.getKey(), entry.getValue(), whitespaceNormalizers,
"whitespace", () -> new WhitespaceTokenizer(), tokenFilterFactoryFactories, charFilterFactoryFactories);
TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new),
tokenFilterFactoryFactories, charFilterFactoryFactories);
}
if (!analyzers.containsKey(DEFAULT_ANALYZER_NAME)) {
@ -613,7 +612,6 @@ public final class AnalysisRegistry implements Closeable {
String name,
AnalyzerProvider<?> normalizerFactory,
Map<String, NamedAnalyzer> normalizers,
String tokenizerName,
TokenizerFactory tokenizerFactory,
Map<String, TokenFilterFactory> tokenFilters,
Map<String, CharFilterFactory> charFilters) {
@ -622,7 +620,7 @@ public final class AnalysisRegistry implements Closeable {
}
if (normalizerFactory instanceof CustomNormalizerProvider) {
((CustomNormalizerProvider) normalizerFactory).build(tokenizerName, tokenizerFactory, charFilters, tokenFilters);
((CustomNormalizerProvider) normalizerFactory).build(tokenizerFactory, charFilters, tokenFilters);
}
if (normalizers.containsKey(name)) {
throw new IllegalStateException("already registered analyzer with name: " + name);

View File

@ -30,15 +30,15 @@ import java.util.Map;
* See {@link ReloadableCustomAnalyzer} for an example usage.
*/
public final class AnalyzerComponents {
private final String tokenizerName;
private final TokenizerFactory tokenizerFactory;
private final CharFilterFactory[] charFilters;
private final TokenFilterFactory[] tokenFilters;
private final AnalysisMode analysisMode;
AnalyzerComponents(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
AnalyzerComponents(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
TokenFilterFactory[] tokenFilters) {
this.tokenizerName = tokenizerName;
this.tokenizerFactory = tokenizerFactory;
this.charFilters = charFilters;
this.tokenFilters = tokenFilters;
@ -85,14 +85,10 @@ public final class AnalyzerComponents {
tokenFilterList.add(tokenFilter);
}
return new AnalyzerComponents(tokenizerName, tokenizer, charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
return new AnalyzerComponents(tokenizer, charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()]));
}
public String getTokenizerName() {
return tokenizerName;
}
public TokenizerFactory getTokenizerFactory() {
return tokenizerFactory;
}
@ -108,4 +104,4 @@ public final class AnalyzerComponents {
public AnalysisMode analysisMode() {
return this.analysisMode;
}
}
}

View File

@ -32,14 +32,14 @@ public final class CustomAnalyzer extends Analyzer implements AnalyzerComponents
private final int offsetGap;
private final AnalysisMode analysisMode;
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
public CustomAnalyzer(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
TokenFilterFactory[] tokenFilters) {
this(tokenizerName, tokenizerFactory, charFilters, tokenFilters, 0, -1);
this(tokenizerFactory, charFilters, tokenFilters, 0, -1);
}
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
public CustomAnalyzer(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
TokenFilterFactory[] tokenFilters, int positionIncrementGap, int offsetGap) {
this.components = new AnalyzerComponents(tokenizerName, tokenizerFactory, charFilters, tokenFilters);
this.components = new AnalyzerComponents(tokenizerFactory, charFilters, tokenFilters);
this.positionIncrementGap = positionIncrementGap;
this.offsetGap = offsetGap;
// merge and transfer token filter analysis modes with analyzer
@ -50,13 +50,6 @@ public final class CustomAnalyzer extends Analyzer implements AnalyzerComponents
this.analysisMode = mode;
}
/**
* The name of the tokenizer as configured by the user.
*/
public String getTokenizerName() {
return this.components.getTokenizerName();
}
public TokenizerFactory tokenizerFactory() {
return this.components.getTokenizerFactory();
}

View File

@ -64,7 +64,7 @@ public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<Analyz
if (components.analysisMode().equals(AnalysisMode.SEARCH_TIME)) {
return new ReloadableCustomAnalyzer(components, positionIncrementGap, offsetGap);
} else {
return new CustomAnalyzer(components.getTokenizerName(), components.getTokenizerFactory(), components.getCharFilters(),
return new CustomAnalyzer(components.getTokenizerFactory(), components.getCharFilters(),
components.getTokenFilters(), positionIncrementGap, offsetGap);
}
}

View File

@ -43,7 +43,7 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
this.analyzerSettings = settings;
}
public void build(final String tokenizerName, final TokenizerFactory tokenizerFactory, final Map<String, CharFilterFactory> charFilters,
public void build(final TokenizerFactory tokenizerFactory, final Map<String, CharFilterFactory> charFilters,
final Map<String, TokenFilterFactory> tokenFilters) {
if (analyzerSettings.get("tokenizer") != null) {
throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer");
@ -79,7 +79,6 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
}
this.customAnalyzer = new CustomAnalyzer(
tokenizerName,
tokenizerFactory,
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])

View File

@ -32,7 +32,7 @@ import java.io.IOException;
* Shared implementation for pre-configured analysis components.
*/
public abstract class PreConfiguredAnalysisComponent<T> implements AnalysisModule.AnalysisProvider<T> {
private final String name;
protected final String name;
protected final PreBuiltCacheFactory.PreBuiltCache<T> cache;
protected PreConfiguredAnalysisComponent(String name, PreBuiltCacheFactory.CachingStrategy cache) {

View File

@ -70,6 +70,6 @@ public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent
@Override
protected TokenizerFactory create(Version version) {
return () -> create.apply(version);
return TokenizerFactory.newFactory(name, () -> create.apply(version));
}
}

View File

@ -31,7 +31,7 @@ public class StandardTokenizerFactory extends AbstractTokenizerFactory {
private final int maxTokenLength;
public StandardTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
}

View File

@ -21,6 +21,25 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Tokenizer;
import java.util.function.Supplier;
public interface TokenizerFactory {
String name();
Tokenizer create();
static TokenizerFactory newFactory(String name, Supplier<Tokenizer> supplier) {
return new TokenizerFactory() {
@Override
public String name() {
return name;
}
@Override
public Tokenizer create() {
return supplier.get();
}
};
}
}

View File

@ -132,7 +132,7 @@ public class TransportAnalyzeActionTests extends ESTestCase {
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("keyword", (indexSettings, environment, name, settings) ->
() -> new MockTokenizer(MockTokenizer.KEYWORD, false));
TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
@Override

View File

@ -120,7 +120,7 @@ public class AnalysisRegistryTests extends ESTestCase {
return null;
}
};
Analyzer analyzer = new CustomAnalyzer("tokenizerName", null, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter });
Analyzer analyzer = new CustomAnalyzer(null, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter });
MapperException ex = expectThrows(MapperException.class,
() -> emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings),
singletonMap("default", new PreBuiltAnalyzerProvider("default", AnalyzerScope.INDEX, analyzer)), emptyMap(),

View File

@ -170,7 +170,7 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("keyword", (indexSettings, environment, name, settings) ->
() -> new MockTokenizer(MockTokenizer.KEYWORD, false));
TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
}
}

View File

@ -73,7 +73,7 @@ public class NamedAnalyzerTests extends ESTestCase {
return mode;
}
};
return new CustomAnalyzer("tokenizerName", null, new CharFilterFactory[0],
return new CustomAnalyzer(null, new CharFilterFactory[0],
new TokenFilterFactory[] { tokenFilter });
}
}

View File

@ -94,7 +94,7 @@ public class ReloadableCustomAnalyzerTests extends ESTestCase {
try (ReloadableCustomAnalyzer analyzer = new ReloadableCustomAnalyzer(components, positionIncrementGap, offsetGap)) {
assertEquals(positionIncrementGap, analyzer.getPositionIncrementGap(randomAlphaOfLength(5)));
assertEquals(offsetGap >= 0 ? offsetGap : 1, analyzer.getOffsetGap(randomAlphaOfLength(5)));
assertEquals("standard", analyzer.getComponents().getTokenizerName());
assertEquals("standard", analyzer.getComponents().getTokenizerFactory().name());
assertEquals(0, analyzer.getComponents().getCharFilters().length);
assertSame(testAnalysis.tokenizer.get("standard"), analyzer.getComponents().getTokenizerFactory());
assertEquals(1, analyzer.getComponents().getTokenFilters().length);

View File

@ -21,7 +21,6 @@ package org.elasticsearch.index.mapper;
import org.apache.lucene.analysis.MockLowerCaseFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
@ -69,16 +68,8 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("keyword", (indexSettings, environment, name, settings) -> {
class Factory implements TokenizerFactory {
@Override
public Tokenizer create() {
return new MockTokenizer(MockTokenizer.KEYWORD, false);
}
}
return new Factory();
});
return singletonMap("keyword", (indexSettings, environment, name, settings) ->
TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
}

View File

@ -206,7 +206,7 @@ public class TypeParsersTests extends ESTestCase {
return null;
}
};
return new CustomAnalyzer("tokenizerName", null, new CharFilterFactory[0],
return new CustomAnalyzer(null, new CharFilterFactory[0],
new TokenFilterFactory[] { tokenFilter });
}
}

View File

@ -281,8 +281,8 @@ public class AnalysisModuleTests extends ESTestCase {
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
// Need mock keyword tokenizer here, because alpha / beta versions are broken up by the dash.
return singletonMap("keyword", (indexSettings, environment, name, settings) ->
() -> new MockTokenizer(MockTokenizer.KEYWORD, false));
return singletonMap("keyword", (indexSettings, environment, name, settings)
-> TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
})).getAnalysisRegistry();

View File

@ -19,7 +19,6 @@
package org.elasticsearch.test;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.plugins.AnalysisPlugin;
@ -40,15 +39,7 @@ public class MockKeywordPlugin extends Plugin implements AnalysisPlugin {
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("keyword", (indexSettings, environment, name, settings) -> {
class Factory implements TokenizerFactory {
@Override
public Tokenizer create() {
return new MockTokenizer(MockTokenizer.KEYWORD, false);
}
}
return new Factory();
});
return singletonMap("keyword", (indexSettings, environment, name, settings) ->
TokenizerFactory.newFactory(name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
}

View File

@ -20,7 +20,7 @@ import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
public class MlClassicTokenizerFactory extends AbstractTokenizerFactory {
public MlClassicTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
super(indexSettings, settings, name);
}
@Override