From b6340658f41dead30a5ded6d03506032a3eb8100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Thu, 17 May 2018 12:52:22 +0200 Subject: [PATCH] Deprecate `nGram` and `edgeNGram` names for ngram filters (#30209) The camel case name `nGram` should be removed in favour of `ngram` and similar for `edgeNGram` and `edge_ngram`. Before removal, we need to deprecate the camel case names first. This change adds deprecation warnings for indices with versions 6.4.0 and higher and logs deprecation warnings. --- .../analysis/common/CommonAnalysisPlugin.java | 21 +++- .../common/CommonAnalysisPluginTests.java | 119 ++++++++++++++++++ .../common/NGramTokenizerFactoryTests.java | 21 +--- sdfhksldj | 0 .../analysis/PreConfiguredTokenFilter.java | 9 ++ 5 files changed, 146 insertions(+), 24 deletions(-) create mode 100644 modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java create mode 100644 sdfhksldj diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index c9b48f0c865..624194092a0 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -237,9 +237,14 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin { filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer()))); filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input -> new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE))); - // TODO deprecate edgeNGram - filters.add(PreConfiguredTokenFilter.singleton("edgeNGram", false, input -> - new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE))); + filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> { + if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + DEPRECATION_LOGGER.deprecatedAndMaybeLog("edgeNGram_deprecation", + "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead."); + } + return new EdgeNGramTokenFilter(reader, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE); + })); filters.add(PreConfiguredTokenFilter.singleton("elision", true, input -> new ElisionFilter(input, FrenchAnalyzer.DEFAULT_ARTICLES))); filters.add(PreConfiguredTokenFilter.singleton("french_stem", false, input -> new SnowballFilter(input, new FrenchStemmer()))); @@ -256,8 +261,14 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin { LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT, LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS))); filters.add(PreConfiguredTokenFilter.singleton("ngram", false, NGramTokenFilter::new)); - // TODO deprecate nGram - filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new)); + filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, (reader, version) -> { + if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + DEPRECATION_LOGGER.deprecatedAndMaybeLog("nGram_deprecation", + "The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead."); + } + return new NGramTokenFilter(reader); + })); filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new)); filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new)); filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new)); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java new file mode 100644 index 00000000000..1d2b8a36810 --- /dev/null +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -0,0 +1,119 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.analysis.common; + +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.Tokenizer; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Map; + +public class CommonAnalysisPluginTests extends ESTestCase { + + /** + * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0 + */ + public void testNGramDeprecationWarning() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + assertWarnings( + "The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead."); + } + } + + /** + * Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0 + */ + public void testNGramNoDeprecationWarningPre6_4() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_3_0)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + } + } + + /** + * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0 + */ + public void testEdgeNGramDeprecationWarning() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + assertWarnings( + "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead."); + } + } + + /** + * Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0 + */ + public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_3_0)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + } + } +} diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java index 078e0a9cb9e..1cf6ef4696d 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java @@ -32,15 +32,11 @@ import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.test.ESTokenStreamTestCase; import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; import java.io.IOException; import java.io.StringReader; -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; -import java.util.Random; import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; import static org.hamcrest.Matchers.instanceOf; @@ -129,7 +125,7 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase { for (int i = 0; i < iters; i++) { final Index index = new Index("test", "_na_"); final String name = "ngr"; - Version v = randomVersion(random()); + Version v = VersionUtils.randomVersion(random()); Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3); boolean reverse = random().nextBoolean(); if (reverse) { @@ -150,7 +146,6 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase { } } - /*` * test that throws an error when trying to get a NGramTokenizer where difference between max_gram and min_gram * is greater than the allowed value of max_ngram_diff @@ -175,16 +170,4 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase { + IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.", ex.getMessage()); } - - private Version randomVersion(Random random) throws IllegalArgumentException, IllegalAccessException { - Field[] declaredFields = Version.class.getFields(); - List versionFields = new ArrayList<>(); - for (Field field : declaredFields) { - if ((field.getModifiers() & Modifier.STATIC) != 0 && field.getName().startsWith("V_") && field.getType() == Version.class) { - versionFields.add(field); - } - } - return (Version) versionFields.get(random.nextInt(versionFields.size())).get(Version.class); - } - } diff --git a/sdfhksldj b/sdfhksldj new file mode 100644 index 00000000000..e69de29bb2d diff --git a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java index 777fb589c9d..12130e856f3 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java @@ -41,6 +41,15 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone (tokenStream, version) -> create.apply(tokenStream)); } + /** + * Create a pre-configured token filter that may not vary at all. + */ + public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries, + BiFunction create) { + return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, CachingStrategy.ONE, + (tokenStream, version) -> create.apply(tokenStream, version)); + } + /** * Create a pre-configured token filter that may vary based on the Lucene version. */