Drop name from TokenizerFactory (#24869)

Drops `TokenizerFactory#name`, replacing it with
`CustomAnalyzer#getTokenizerName` which is much better targeted at
its single use case inside the analysis API.

Drops a test that I would have had to refactor which is duplicated by
`AnalysisModuleTests`.

To keep this change from blowing up in size I've left two mostly
mechanical changes to be done in followups:
1. `TokenizerFactory` can now be entirely dropped and replaced with
`Supplier<Tokenizer>`.
2. `AbstractTokenizerFactory`'s ctor still takes a `String` parameter
where the name once was.
This commit is contained in:
Nik Everett 2017-05-30 10:39:22 -04:00 committed by GitHub
parent b8d7b83f8e
commit 6d9ce957d4
15 changed files with 32 additions and 323 deletions

View File

@ -39,6 +39,7 @@ import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.routing.ShardsIterator;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.settings.Settings;
@ -179,7 +180,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
} else if (request.tokenizer() != null) {
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
TokenizerFactory tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, analysisRegistry, environment);
Tuple<String, TokenizerFactory> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers,
analysisRegistry, environment);
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
tokenFilterFactories = getTokenFilterFactories(request, indexSettings, analysisRegistry, environment, tokenFilterFactories);
@ -187,7 +189,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
CharFilterFactory[] charFilterFactories = new CharFilterFactory[0];
charFilterFactories = getCharFilterFactories(request, indexSettings, analysisRegistry, environment, charFilterFactories);
analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories);
analyzer = new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(), charFilterFactories, tokenFilterFactories);
closeAnalyzer = true;
} else if (analyzer == null) {
if (indexAnalyzers == null) {
@ -325,7 +327,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
}
}
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(
customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
@ -551,8 +554,9 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
return tokenFilterFactories;
}
private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers,
private static Tuple<String, TokenizerFactory> parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers,
AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
String name;
TokenizerFactory tokenizerFactory;
final AnalyzeRequest.NameOrDefinition tokenizer = request.tokenizer();
// parse anonymous settings
@ -568,6 +572,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizerTypeName + "]");
}
// Need to set anonymous "name" of tokenizer
name = "_anonymous_tokenizer";
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
} else {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
@ -576,18 +581,20 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
}
name = tokenizer.name;
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings());
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
}
name = tokenizer.name;
tokenizerFactory = tokenizerFactoryFactory.get(indexAnalzyers.getIndexSettings(), environment, tokenizer.name,
AnalysisRegistry.getSettingsFromIndexSettings(indexAnalzyers.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name));
}
}
return tokenizerFactory;
return new Tuple<>(name, tokenizerFactory);
}
private static IndexSettings getNaIndexSettings(Settings settings) {

View File

@ -25,23 +25,14 @@ import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.IndexSettings;
public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements TokenizerFactory {
private final String name;
protected final Version version;
public AbstractTokenizerFactory(IndexSettings indexSettings, String name, Settings settings) {
// TODO drop `String ignored` in a followup
public AbstractTokenizerFactory(IndexSettings indexSettings, String ignored, Settings settings) {
super(indexSettings);
this.name = name;
this.version = Analysis.parseAnalysisVersion(this.indexSettings.getSettings(), settings, logger);
}
@Override
public String name() {
return this.name;
}
public final Version version() {
return version;
}

View File

@ -27,6 +27,7 @@ import java.io.Reader;
public final class CustomAnalyzer extends Analyzer {
private final String tokenizerName;
private final TokenizerFactory tokenizerFactory;
private final CharFilterFactory[] charFilters;
@ -36,12 +37,14 @@ public final class CustomAnalyzer extends Analyzer {
private final int positionIncrementGap;
private final int offsetGap;
public CustomAnalyzer(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, TokenFilterFactory[] tokenFilters) {
this(tokenizerFactory, charFilters, tokenFilters, 0, -1);
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
TokenFilterFactory[] tokenFilters) {
this(tokenizerName, tokenizerFactory, charFilters, tokenFilters, 0, -1);
}
public CustomAnalyzer(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, TokenFilterFactory[] tokenFilters,
int positionIncrementGap, int offsetGap) {
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
TokenFilterFactory[] tokenFilters, int positionIncrementGap, int offsetGap) {
this.tokenizerName = tokenizerName;
this.tokenizerFactory = tokenizerFactory;
this.charFilters = charFilters;
this.tokenFilters = tokenFilters;
@ -49,6 +52,12 @@ public final class CustomAnalyzer extends Analyzer {
this.offsetGap = offsetGap;
}
/**
* The name of the tokenizer as configured by the user.
*/
public String getTokenizerName() {
return tokenizerName;
}
public TokenizerFactory tokenizerFactory() {
return tokenizerFactory;

View File

@ -80,7 +80,7 @@ public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<Custom
positionIncrementGap = analyzerSettings.getAsInt("position_increment_gap", positionIncrementGap);
int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);;
this.customAnalyzer = new CustomAnalyzer(tokenizer,
this.customAnalyzer = new CustomAnalyzer(tokenizerName, tokenizer,
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()]),
positionIncrementGap,

View File

@ -82,6 +82,7 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
}
this.customAnalyzer = new CustomAnalyzer(
"keyword",
PreBuiltTokenizers.KEYWORD.getTokenizerFactory(indexSettings.getIndexVersionCreated()),
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])

View File

@ -96,11 +96,6 @@ public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent
protected TokenizerFactory create(Version version) {
if (multiTermComponent != null) {
return new MultiTermAwareTokenizerFactory() {
@Override
public String name() {
return getName();
}
@Override
public Tokenizer create() {
return create.apply(version);
@ -112,17 +107,7 @@ public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent
}
};
} else {
return new TokenizerFactory() {
@Override
public String name() {
return getName();
}
@Override
public Tokenizer create() {
return create.apply(version);
}
};
return () -> create.apply(version);
}
}
}

View File

@ -21,9 +21,6 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Tokenizer;
public interface TokenizerFactory {
String name();
public interface TokenizerFactory { // TODO replace with Supplier<Tokenizer>
Tokenizer create();
}

View File

@ -38,8 +38,6 @@ import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import java.util.Locale;
public enum PreBuiltTokenizers {
STANDARD(CachingStrategy.ONE) {
@ -148,14 +146,8 @@ public enum PreBuiltTokenizers {
public synchronized TokenizerFactory getTokenizerFactory(final Version version) {
TokenizerFactory tokenizerFactory = cache.get(version);
if (tokenizerFactory == null) {
final String finalName = name().toLowerCase(Locale.ROOT);
if (getMultiTermComponent(version) != null) {
tokenizerFactory = new MultiTermAwareTokenizerFactory() {
@Override
public String name() {
return finalName;
}
@Override
public Tokenizer create() {
return PreBuiltTokenizers.this.create(version);
@ -168,11 +160,6 @@ public enum PreBuiltTokenizers {
};
} else {
tokenizerFactory = new TokenizerFactory() {
@Override
public String name() {
return finalName;
}
@Override
public Tokenizer create() {
return PreBuiltTokenizers.this.create(version);

View File

@ -1,56 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
import java.util.Map;
import static java.util.Collections.singletonMap;
public class DummyAnalysisPlugin extends Plugin implements AnalysisPlugin {
@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
return singletonMap("dummy_char_filter", (a, b, c, d) -> new DummyCharFilterFactory());
}
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("dummy_token_filter", (a, b, c, d) -> new DummyTokenFilterFactory());
}
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("dummy_tokenizer", (a, b, c, d) -> new DummyTokenizerFactory());
}
@Override
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
return singletonMap("dummy", (a, b, c, d) -> new DummyAnalyzerProvider());
}
}

View File

@ -1,33 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
public class DummyAnalyzer extends StopwordAnalyzerBase {
protected DummyAnalyzer() {
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return null;
}
}

View File

@ -1,40 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.AnalyzerScope;
public class DummyAnalyzerProvider implements AnalyzerProvider<DummyAnalyzer> {
@Override
public String name() {
return "dummy";
}
@Override
public AnalyzerScope scope() {
return AnalyzerScope.INDICES;
}
@Override
public DummyAnalyzer get() {
return new DummyAnalyzer();
}
}

View File

@ -1,36 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.elasticsearch.index.analysis.CharFilterFactory;
import java.io.Reader;
public class DummyCharFilterFactory implements CharFilterFactory {
@Override
public String name() {
return "dummy_char_filter";
}
@Override
public Reader create(Reader reader) {
return null;
}
}

View File

@ -1,33 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.index.analysis.TokenFilterFactory;
public class DummyTokenFilterFactory implements TokenFilterFactory {
@Override public String name() {
return "dummy_token_filter";
}
@Override public TokenStream create(TokenStream tokenStream) {
return null;
}
}

View File

@ -1,35 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.index.analysis.TokenizerFactory;
public class DummyTokenizerFactory implements TokenizerFactory {
@Override
public String name() {
return "dummy_tokenizer";
}
@Override
public Tokenizer create() {
return null;
}
}

View File

@ -46,7 +46,7 @@ import static org.hamcrest.Matchers.notNullValue;
public class PreBuiltAnalyzerIntegrationIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(DummyAnalysisPlugin.class, InternalSettingsPlugin.class);
return Arrays.asList(InternalSettingsPlugin.class);
}
public void testThatPreBuiltAnalyzersAreNotClosedOnIndexClose() throws Exception {
@ -114,41 +114,6 @@ public class PreBuiltAnalyzerIntegrationIT extends ESIntegTestCase {
assertLuceneAnalyzersAreNotClosed(loadedAnalyzers);
}
/**
* Test case for #5030: Upgrading analysis plugins fails
* See https://github.com/elastic/elasticsearch/issues/5030
*/
public void testThatPluginAnalyzersCanBeUpdated() throws Exception {
final XContentBuilder mapping = jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("foo")
.field("type", "text")
.field("analyzer", "dummy")
.endObject()
.startObject("bar")
.field("type", "text")
.field("analyzer", "my_dummy")
.endObject()
.endObject()
.endObject()
.endObject();
Settings versionSettings = settings(randomVersion(random()))
.put("index.analysis.analyzer.my_dummy.type", "custom")
.put("index.analysis.analyzer.my_dummy.filter", "my_dummy_token_filter")
.put("index.analysis.analyzer.my_dummy.char_filter", "my_dummy_char_filter")
.put("index.analysis.analyzer.my_dummy.tokenizer", "my_dummy_tokenizer")
.put("index.analysis.tokenizer.my_dummy_tokenizer.type", "dummy_tokenizer")
.put("index.analysis.filter.my_dummy_token_filter.type", "dummy_token_filter")
.put("index.analysis.char_filter.my_dummy_char_filter.type", "dummy_char_filter")
.build();
client().admin().indices().prepareCreate("test-analysis-dummy").addMapping("type", mapping).setSettings(versionSettings).get();
ensureGreen();
}
private void assertThatAnalyzersHaveBeenLoaded(Map<PreBuiltAnalyzers, List<Version>> expectedLoadedAnalyzers) {
for (Map.Entry<PreBuiltAnalyzers, List<Version>> entry : expectedLoadedAnalyzers.entrySet()) {
for (Version version : entry.getValue()) {