Remove nGram and edgeNGram token filter names (#39070)

In #30209 we deprecated the camel case `nGram` filter name in favour of `ngram` and
did the same for `edgeNGram` and `edge_ngram` and we are removing those names in
8.0. This change disallows using the deprecated names for new indices created in 7.0 by
throwing an error if these filters are used.

Relates to #38911
This commit is contained in:
Christoph Büscher 2019-02-21 16:47:02 +01:00
parent 08ad740d48
commit 4b77d0434a
8 changed files with 55 additions and 49 deletions

View File

@ -1,9 +1,9 @@
[[analysis-edgengram-tokenfilter]] [[analysis-edgengram-tokenfilter]]
=== Edge NGram Token Filter === Edge NGram Token Filter
A token filter of type `edgeNGram`. A token filter of type `edge_ngram`.
The following are settings that can be set for a `edgeNGram` token The following are settings that can be set for a `edge_ngram` token
filter type: filter type:
[cols="<,<",options="header",] [cols="<,<",options="header",]

View File

@ -1,9 +1,9 @@
[[analysis-ngram-tokenfilter]] [[analysis-ngram-tokenfilter]]
=== NGram Token Filter === NGram Token Filter
A token filter of type `nGram`. A token filter of type `ngram`.
The following are settings that can be set for a `nGram` token filter The following are settings that can be set for a `ngram` token filter
type: type:
[cols="<,<",options="header",] [cols="<,<",options="header",]

View File

@ -39,3 +39,12 @@ The `standard_html_strip` analyzer has been deprecated, and should be replaced
with a combination of the `standard` tokenizer and `html_strip` char_filter. with a combination of the `standard` tokenizer and `html_strip` char_filter.
Indexes created using this analyzer will still be readable in elasticsearch 7.0, Indexes created using this analyzer will still be readable in elasticsearch 7.0,
but it will not be possible to create new indexes using it. but it will not be possible to create new indexes using it.
[float]
==== The deprecated `nGram` and `edgeNGram` token filter cannot be used on new indices
The `nGram` and `edgeNGram` token filter names have been deprecated in an earlier 6.x version.
Indexes created using these token filters will still be readable in elasticsearch 7.0 but indexing
documents using those filter names will issue a deprecation warning. Using the deprecated names on
new indices starting with version 7.0.0 will be prohibited and throw an error when indexing
or analyzing documents. Both names should be replaces by `ngram` or `edge_ngram` respectively.

View File

@ -415,7 +415,11 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input -> filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input ->
new EdgeNGramTokenFilter(input, 1))); new EdgeNGramTokenFilter(input, 1)));
filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> { filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> {
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
throw new IllegalArgumentException(
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead.");
} else {
deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation", deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation",
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. " "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead."); + "Please change the filter name to [edge_ngram] instead.");
@ -439,7 +443,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS))); LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
filters.add(PreConfiguredTokenFilter.singleton("ngram", false, reader -> new NGramTokenFilter(reader, 1, 2, false))); filters.add(PreConfiguredTokenFilter.singleton("ngram", false, reader -> new NGramTokenFilter(reader, 1, 2, false)));
filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, (reader, version) -> { filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, (reader, version) -> {
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead.");
} else {
deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation", deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation",
"The [nGram] token filter name is deprecated and will be removed in a future version. " "The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead."); + "Please change the filter name to [ngram] instead.");

View File

@ -41,11 +41,12 @@ import java.util.Map;
public class CommonAnalysisPluginTests extends ESTestCase { public class CommonAnalysisPluginTests extends ESTestCase {
/** /**
* Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0 * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.0.0
*/ */
public void testNGramDeprecationWarning() throws IOException { public void testNGramDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) .put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)))
.build(); .build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@ -62,12 +63,11 @@ public class CommonAnalysisPluginTests extends ESTestCase {
} }
/** /**
* Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0 * Check that the deprecated name "nGram" throws an error since 7.0.0
*/ */
public void testNGramNoDeprecationWarningPre6_4() throws IOException { public void testNGramDeprecationError() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0))
.build(); .build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@ -76,16 +76,21 @@ public class CommonAnalysisPluginTests extends ESTestCase {
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
Tokenizer tokenizer = new MockTokenizer(); Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar")); tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer)); IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
assertEquals(
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
+ " name to [ngram] instead.",
ex.getMessage());
} }
} }
/** /**
* Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0 * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.0.0
*/ */
public void testEdgeNGramDeprecationWarning() throws IOException { public void testEdgeNGramDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) .put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)))
.build(); .build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@ -102,12 +107,11 @@ public class CommonAnalysisPluginTests extends ESTestCase {
} }
/** /**
* Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0 * Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0
*/ */
public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException { public void testEdgeNGramDeprecationError() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0))
.build(); .build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@ -116,11 +120,14 @@ public class CommonAnalysisPluginTests extends ESTestCase {
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
Tokenizer tokenizer = new MockTokenizer(); Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar")); tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer)); IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
assertEquals(
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
+ " name to [edge_ngram] instead.",
ex.getMessage());
} }
} }
/** /**
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0 * Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
*/ */

View File

@ -81,7 +81,7 @@ public class HighlighterWithAnalyzersTests extends ESIntegTestCase {
.put("analysis.tokenizer.autocomplete.max_gram", 20) .put("analysis.tokenizer.autocomplete.max_gram", 20)
.put("analysis.tokenizer.autocomplete.min_gram", 1) .put("analysis.tokenizer.autocomplete.min_gram", 1)
.put("analysis.tokenizer.autocomplete.token_chars", "letter,digit") .put("analysis.tokenizer.autocomplete.token_chars", "letter,digit")
.put("analysis.tokenizer.autocomplete.type", "nGram") .put("analysis.tokenizer.autocomplete.type", "ngram")
.put("analysis.filter.wordDelimiter.type", "word_delimiter") .put("analysis.filter.wordDelimiter.type", "word_delimiter")
.putList("analysis.filter.wordDelimiter.type_table", .putList("analysis.filter.wordDelimiter.type_table",
"& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM", "& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM",

View File

@ -23,24 +23,7 @@
- match: { detail.tokenizer.tokens.0.token: Foo Bar! } - match: { detail.tokenizer.tokens.0.token: Foo Bar! }
--- ---
"nGram": "ngram_exception":
- do:
indices.analyze:
body:
text: good
explain: true
tokenizer:
type: nGram
min_gram: 2
max_gram: 2
- length: { detail.tokenizer.tokens: 3 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.tokens.0.token: go }
- match: { detail.tokenizer.tokens.1.token: oo }
- match: { detail.tokenizer.tokens.2.token: od }
---
"nGram_exception":
- skip: - skip:
version: " - 6.99.99" version: " - 6.99.99"
reason: only starting from version 7.x this throws an error reason: only starting from version 7.x this throws an error
@ -51,7 +34,7 @@
text: good text: good
explain: true explain: true
tokenizer: tokenizer:
type: nGram type: ngram
min_gram: 2 min_gram: 2
max_gram: 4 max_gram: 4
--- ---
@ -133,7 +116,7 @@
text: "foobar" text: "foobar"
explain: true explain: true
tokenizer: tokenizer:
type: nGram type: ngram
min_gram: 3 min_gram: 3
max_gram: 3 max_gram: 3
- length: { detail.tokenizer.tokens: 4 } - length: { detail.tokenizer.tokens: 4 }
@ -162,9 +145,9 @@
body: body:
text: "foo" text: "foo"
explain: true explain: true
tokenizer: nGram tokenizer: ngram
- length: { detail.tokenizer.tokens: 5 } - length: { detail.tokenizer.tokens: 5 }
- match: { detail.tokenizer.name: nGram } - match: { detail.tokenizer.name: ngram }
- match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.0.token: f }
- match: { detail.tokenizer.tokens.1.token: fo } - match: { detail.tokenizer.tokens.1.token: fo }
- match: { detail.tokenizer.tokens.2.token: o } - match: { detail.tokenizer.tokens.2.token: o }
@ -194,7 +177,7 @@
text: "foo" text: "foo"
explain: true explain: true
tokenizer: tokenizer:
type: edgeNGram type: edge_ngram
min_gram: 1 min_gram: 1
max_gram: 3 max_gram: 3
- length: { detail.tokenizer.tokens: 3 } - length: { detail.tokenizer.tokens: 3 }
@ -219,9 +202,9 @@
body: body:
text: "foo" text: "foo"
explain: true explain: true
tokenizer: edgeNGram tokenizer: edge_ngram
- length: { detail.tokenizer.tokens: 2 } - length: { detail.tokenizer.tokens: 2 }
- match: { detail.tokenizer.name: edgeNGram } - match: { detail.tokenizer.name: edge_ngram }
- match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.0.token: f }
- match: { detail.tokenizer.tokens.1.token: fo } - match: { detail.tokenizer.tokens.1.token: fo }

View File

@ -76,7 +76,7 @@
analysis: analysis:
tokenizer: tokenizer:
trigram: trigram:
type: nGram type: ngram
min_gram: 3 min_gram: 3
max_gram: 3 max_gram: 3
filter: filter: