diff --git a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc index be37d24f7dd..e460725523c 100644 --- a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc @@ -1,9 +1,9 @@ [[analysis-edgengram-tokenfilter]] === Edge NGram Token Filter -A token filter of type `edgeNGram`. +A token filter of type `edge_ngram`. -The following are settings that can be set for a `edgeNGram` token +The following are settings that can be set for a `edge_ngram` token filter type: [cols="<,<",options="header",] diff --git a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc index acc178a2741..53bda23d12b 100644 --- a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc @@ -1,9 +1,9 @@ [[analysis-ngram-tokenfilter]] === NGram Token Filter -A token filter of type `nGram`. +A token filter of type `ngram`. -The following are settings that can be set for a `nGram` token filter +The following are settings that can be set for a `ngram` token filter type: [cols="<,<",options="header",] diff --git a/docs/reference/migration/migrate_7_0/analysis.asciidoc b/docs/reference/migration/migrate_7_0/analysis.asciidoc index 36ad41be09a..3e047e001f1 100644 --- a/docs/reference/migration/migrate_7_0/analysis.asciidoc +++ b/docs/reference/migration/migrate_7_0/analysis.asciidoc @@ -38,4 +38,13 @@ The `standard` token filter has been removed because it doesn't change anything The `standard_html_strip` analyzer has been deprecated, and should be replaced with a combination of the `standard` tokenizer and `html_strip` char_filter. Indexes created using this analyzer will still be readable in elasticsearch 7.0, -but it will not be possible to create new indexes using it. \ No newline at end of file +but it will not be possible to create new indexes using it. + +[float] +==== The deprecated `nGram` and `edgeNGram` token filter cannot be used on new indices + +The `nGram` and `edgeNGram` token filter names have been deprecated in an earlier 6.x version. +Indexes created using these token filters will still be readable in elasticsearch 7.0 but indexing +documents using those filter names will issue a deprecation warning. Using the deprecated names on +new indices starting with version 7.0.0 will be prohibited and throw an error when indexing +or analyzing documents. Both names should be replaces by `ngram` or `edge_ngram` respectively. diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 0059f7460a8..c2886408437 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -415,7 +415,11 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input -> new EdgeNGramTokenFilter(input, 1))); filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> { - if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) { + throw new IllegalArgumentException( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [edge_ngram] instead."); + } else { deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation", "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [edge_ngram] instead."); @@ -439,7 +443,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS))); filters.add(PreConfiguredTokenFilter.singleton("ngram", false, reader -> new NGramTokenFilter(reader, 1, 2, false))); filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, (reader, version) -> { - if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) { + throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [ngram] instead."); + } else { deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation", "The [nGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [ngram] instead."); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java index c52c78ffe27..ce25646050a 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -41,11 +41,12 @@ import java.util.Map; public class CommonAnalysisPluginTests extends ESTestCase { /** - * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0 + * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.0.0 */ public void testNGramDeprecationWarning() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0))) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -62,12 +63,11 @@ public class CommonAnalysisPluginTests extends ESTestCase { } /** - * Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0 + * Check that the deprecated name "nGram" throws an error since 7.0.0 */ - public void testNGramNoDeprecationWarningPre6_4() throws IOException { + public void testNGramDeprecationError() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, - VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0)) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -76,16 +76,21 @@ public class CommonAnalysisPluginTests extends ESTestCase { TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); Tokenizer tokenizer = new MockTokenizer(); tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + assertEquals( + "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" + + " name to [ngram] instead.", + ex.getMessage()); } } /** - * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0 + * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.0.0 */ public void testEdgeNGramDeprecationWarning() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, VersionUtils.getPreviousVersion(Version.V_7_0_0))) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -102,12 +107,11 @@ public class CommonAnalysisPluginTests extends ESTestCase { } /** - * Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0 + * Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0 */ - public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException { + public void testEdgeNGramDeprecationError() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, - VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0)) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null)) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); @@ -116,11 +120,14 @@ public class CommonAnalysisPluginTests extends ESTestCase { TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); Tokenizer tokenizer = new MockTokenizer(); tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer)); + assertEquals( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter" + + " name to [edge_ngram] instead.", + ex.getMessage()); } } - /** * Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0 */ diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java index e96243efc42..8f58a074cf1 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java @@ -81,7 +81,7 @@ public class HighlighterWithAnalyzersTests extends ESIntegTestCase { .put("analysis.tokenizer.autocomplete.max_gram", 20) .put("analysis.tokenizer.autocomplete.min_gram", 1) .put("analysis.tokenizer.autocomplete.token_chars", "letter,digit") - .put("analysis.tokenizer.autocomplete.type", "nGram") + .put("analysis.tokenizer.autocomplete.type", "ngram") .put("analysis.filter.wordDelimiter.type", "word_delimiter") .putList("analysis.filter.wordDelimiter.type_table", "& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM", diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml index 9a7c158fc47..460bc8ecf83 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml @@ -23,24 +23,7 @@ - match: { detail.tokenizer.tokens.0.token: Foo Bar! } --- -"nGram": - - do: - indices.analyze: - body: - text: good - explain: true - tokenizer: - type: nGram - min_gram: 2 - max_gram: 2 - - length: { detail.tokenizer.tokens: 3 } - - match: { detail.tokenizer.name: _anonymous_tokenizer } - - match: { detail.tokenizer.tokens.0.token: go } - - match: { detail.tokenizer.tokens.1.token: oo } - - match: { detail.tokenizer.tokens.2.token: od } - ---- -"nGram_exception": +"ngram_exception": - skip: version: " - 6.99.99" reason: only starting from version 7.x this throws an error @@ -51,7 +34,7 @@ text: good explain: true tokenizer: - type: nGram + type: ngram min_gram: 2 max_gram: 4 --- @@ -133,7 +116,7 @@ text: "foobar" explain: true tokenizer: - type: nGram + type: ngram min_gram: 3 max_gram: 3 - length: { detail.tokenizer.tokens: 4 } @@ -162,9 +145,9 @@ body: text: "foo" explain: true - tokenizer: nGram + tokenizer: ngram - length: { detail.tokenizer.tokens: 5 } - - match: { detail.tokenizer.name: nGram } + - match: { detail.tokenizer.name: ngram } - match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.1.token: fo } - match: { detail.tokenizer.tokens.2.token: o } @@ -194,7 +177,7 @@ text: "foo" explain: true tokenizer: - type: edgeNGram + type: edge_ngram min_gram: 1 max_gram: 3 - length: { detail.tokenizer.tokens: 3 } @@ -219,9 +202,9 @@ body: text: "foo" explain: true - tokenizer: edgeNGram + tokenizer: edge_ngram - length: { detail.tokenizer.tokens: 2 } - - match: { detail.tokenizer.name: edgeNGram } + - match: { detail.tokenizer.name: edge_ngram } - match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.1.token: fo } diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml index ec00b6d41f1..56bbed7044e 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml @@ -76,7 +76,7 @@ analysis: tokenizer: trigram: - type: nGram + type: ngram min_gram: 3 max_gram: 3 filter: