From 4b77d0434aac3a28f76c3ce09d371353119f3846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Thu, 21 Feb 2019 16:47:02 +0100 Subject: [PATCH] Remove `nGram` and `edgeNGram` token filter names (#39070) In #30209 we deprecated the camel case `nGram` filter name in favour of `ngram` and did the same for `edgeNGram` and `edge_ngram` and we are removing those names in 8.0. This change disallows using the deprecated names for new indices created in 7.0 by throwing an error if these filters are used. Relates to #38911 --- .../edgengram-tokenfilter.asciidoc | 4 +- .../tokenfilters/ngram-tokenfilter.asciidoc | 4 +- .../migration/migrate_7_0/analysis.asciidoc | 11 +++++- .../analysis/common/CommonAnalysisPlugin.java | 11 +++++- .../common/CommonAnalysisPluginTests.java | 37 +++++++++++-------- .../common/HighlighterWithAnalyzersTests.java | 2 +- .../test/analysis-common/30_tokenizers.yml | 33 ++++------------- .../test/indices.analyze/10_analyze.yml | 2 +- 8 files changed, 55 insertions(+), 49 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc index be37d24f7dd..e460725523c 100644 --- a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc @@ -1,9 +1,9 @@ [[analysis-edgengram-tokenfilter]] === Edge NGram Token Filter -A token filter of type `edgeNGram`. +A token filter of type `edge_ngram`. -The following are settings that can be set for a `edgeNGram` token +The following are settings that can be set for a `edge_ngram` token filter type: [cols="<,<",options="header",] diff --git a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc index acc178a2741..53bda23d12b 100644 --- a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc @@ -1,9 +1,9 @@ [[analysis-ngram-tokenfilter]] === NGram Token Filter -A token filter of type `nGram`. +A token filter of type `ngram`. -The following are settings that can be set for a `nGram` token filter +The following are settings that can be set for a `ngram` token filter type: [cols="<,<",options="header",] diff --git a/docs/reference/migration/migrate_7_0/analysis.asciidoc b/docs/reference/migration/migrate_7_0/analysis.asciidoc index 36ad41be09a..3e047e001f1 100644 --- a/docs/reference/migration/migrate_7_0/analysis.asciidoc +++ b/docs/reference/migration/migrate_7_0/analysis.asciidoc @@ -38,4 +38,13 @@ The `standard` token filter has been removed because it doesn't change anything The `standard_html_strip` analyzer has been deprecated, and should be replaced with a combination of the `standard` tokenizer and `html_strip` char_filter. Indexes created using this analyzer will still be readable in elasticsearch 7.0, -but it will not be possible to create new indexes using it. \ No newline at end of file +but it will not be possible to create new indexes using it. + +[float] +==== The deprecated `nGram` and `edgeNGram` token filter cannot be used on new indices + +The `nGram` and `edgeNGram` token filter names have been deprecated in an earlier 6.x version. +Indexes created using these token filters will still be readable in elasticsearch 7.0 but indexing +documents using those filter names will issue a deprecation warning. Using the deprecated names on +new indices starting with version 7.0.0 will be prohibited and throw an error when indexing +or analyzing documents. Both names should be replaces by `ngram` or `edge_ngram` respectively. diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 0059f7460a8..c2886408437 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -415,7 +415,11 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input -> new EdgeNGramTokenFilter(input, 1))); filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> { - if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) { + throw new IllegalArgumentException( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [edge_ngram] instead."); + } else { deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation", "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [edge_ngram] instead."); @@ -439,7 +443,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS))); filters.add(PreConfiguredTokenFilter.singleton("ngram", false, reader -> new NGramTokenFilter(reader, 1, 2, false))); filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, (reader, version) -> { - if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) { + throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [ngram] instead."); + } else { deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation", "The [nGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [ngram] instead."); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java index c52c78ffe27..ce25646050a 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -41,11 +41,12 @@ import java.util.Map; public class CommonAnalysisPluginTests extends ESTestCase { /** - * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0 + * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.0.0 */ public void testNGramDeprecationWarning() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0))) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -62,12 +63,11 @@ public class CommonAnalysisPluginTests extends ESTestCase { } /** - * Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0 + * Check that the deprecated name "nGram" throws an error since 7.0.0 */ - public void testNGramNoDeprecationWarningPre6_4() throws IOException { + public void testNGramDeprecationError() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, - VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0)) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -76,16 +76,21 @@ public class CommonAnalysisPluginTests extends ESTestCase { TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); Tokenizer tokenizer = new MockTokenizer(); tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + assertEquals( + "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" + + " name to [ngram] instead.", + ex.getMessage()); } } /** - * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0 + * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.0.0 */ public void testEdgeNGramDeprecationWarning() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, VersionUtils.getPreviousVersion(Version.V_7_0_0))) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -102,12 +107,11 @@ public class CommonAnalysisPluginTests extends ESTestCase { } /** - * Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0 + * Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0 */ - public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException { + public void testEdgeNGramDeprecationError() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, - VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0)) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -116,11 +120,14 @@ public class CommonAnalysisPluginTests extends ESTestCase { TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); Tokenizer tokenizer = new MockTokenizer(); tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + assertEquals( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" + + " name to [edge_ngram] instead.", + ex.getMessage()); } } - /** * Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0 */ diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java index e96243efc42..8f58a074cf1 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java @@ -81,7 +81,7 @@ public class HighlighterWithAnalyzersTests extends ESIntegTestCase { .put("analysis.tokenizer.autocomplete.max_gram", 20) .put("analysis.tokenizer.autocomplete.min_gram", 1) .put("analysis.tokenizer.autocomplete.token_chars", "letter,digit") - .put("analysis.tokenizer.autocomplete.type", "nGram") + .put("analysis.tokenizer.autocomplete.type", "ngram") .put("analysis.filter.wordDelimiter.type", "word_delimiter") .putList("analysis.filter.wordDelimiter.type_table", "& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM", diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml index 9a7c158fc47..460bc8ecf83 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml @@ -23,24 +23,7 @@ - match: { detail.tokenizer.tokens.0.token: Foo Bar! } --- -"nGram": - - do: - indices.analyze: - body: - text: good - explain: true - tokenizer: - type: nGram - min_gram: 2 - max_gram: 2 - - length: { detail.tokenizer.tokens: 3 } - - match: { detail.tokenizer.name: _anonymous_tokenizer } - - match: { detail.tokenizer.tokens.0.token: go } - - match: { detail.tokenizer.tokens.1.token: oo } - - match: { detail.tokenizer.tokens.2.token: od } - ---- -"nGram_exception": +"ngram_exception": - skip: version: " - 6.99.99" reason: only starting from version 7.x this throws an error @@ -51,7 +34,7 @@ text: good explain: true tokenizer: - type: nGram + type: ngram min_gram: 2 max_gram: 4 --- @@ -133,7 +116,7 @@ text: "foobar" explain: true tokenizer: - type: nGram + type: ngram min_gram: 3 max_gram: 3 - length: { detail.tokenizer.tokens: 4 } @@ -162,9 +145,9 @@ body: text: "foo" explain: true - tokenizer: nGram + tokenizer: ngram - length: { detail.tokenizer.tokens: 5 } - - match: { detail.tokenizer.name: nGram } + - match: { detail.tokenizer.name: ngram } - match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.1.token: fo } - match: { detail.tokenizer.tokens.2.token: o } @@ -194,7 +177,7 @@ text: "foo" explain: true tokenizer: - type: edgeNGram + type: edge_ngram min_gram: 1 max_gram: 3 - length: { detail.tokenizer.tokens: 3 } @@ -219,9 +202,9 @@ body: text: "foo" explain: true - tokenizer: edgeNGram + tokenizer: edge_ngram - length: { detail.tokenizer.tokens: 2 } - - match: { detail.tokenizer.name: edgeNGram } + - match: { detail.tokenizer.name: edge_ngram } - match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.1.token: fo } diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml index ec00b6d41f1..56bbed7044e 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml @@ -76,7 +76,7 @@ analysis: tokenizer: trigram: - type: nGram + type: ngram min_gram: 3 max_gram: 3 filter: