[Remove] Analyzer Deprecations (#1741)

This commit removes deprecated analyzer instantiation that is no longer
permitted in OpenSearch 2.0.0.

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
This commit is contained in:
Nick Knize 2021-12-16 13:51:49 -05:00 committed by GitHub
parent 5966cc03bf
commit 5550f8d7e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 22 additions and 834 deletions

View File

@ -35,8 +35,6 @@ package org.opensearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -63,9 +61,6 @@ import java.util.Set;
* In all cases, all non-CJK input is passed thru unmodified.
*/
public final class CJKBigramFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(CJKBigramFilterFactory.class);
private final int flags;
private final boolean outputUnigrams;
@ -110,14 +105,7 @@ public final class CJKBigramFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenFilterFactory getSynonymFilter() {
if (outputUnigrams) {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
return this;
}

View File

@ -200,9 +200,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
// TODO remove in 8.0
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.put("pattern", PatternAnalyzerProvider::new);
analyzers.put("snowball", SnowballAnalyzerProvider::new);
@ -265,7 +262,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
requiresAnalysisSettings((i, e, n, s) -> new ScriptedConditionTokenFilterFactory(i, n, s, scriptService.get()))
);
filters.put("decimal_digit", DecimalDigitFilterFactory::new);
filters.put("delimited_payload_filter", LegacyDelimitedPayloadTokenFilterFactory::new);
filters.put("delimited_payload", DelimitedPayloadTokenFilterFactory::new);
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
@ -388,14 +384,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
// TODO remove in 8.0
analyzers.add(
new PreBuiltAnalyzerProviderFactory(
"standard_html_strip",
CachingStrategy.OPENSEARCH,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)
)
);
analyzers.add(
new PreBuiltAnalyzerProviderFactory(
"pattern",
@ -462,16 +450,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
List<PreConfiguredCharFilter> filters = new ArrayList<>();
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
filters.add(PreConfiguredCharFilter.openSearchVersion("htmlStrip", false, (reader, version) -> {
if (version.onOrAfter(LegacyESVersion.V_6_3_0)) {
deprecationLogger.deprecate(
"htmlStrip_deprecation",
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead."
);
}
return new HTMLStripCharFilter(reader);
}));
return filters;
}
@ -492,18 +470,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
);
filters.add(PreConfiguredTokenFilter.singleton("czech_stem", false, CzechStemFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("decimal_digit", true, DecimalDigitFilter::new));
filters.add(PreConfiguredTokenFilter.openSearchVersion("delimited_payload_filter", false, (input, version) -> {
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead"
);
}
return new DelimitedPayloadTokenFilter(
input,
DelimitedPayloadTokenFilterFactory.DEFAULT_DELIMITER,
DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER
);
}));
filters.add(
PreConfiguredTokenFilter.singleton(
"delimited_payload",

View File

@ -36,8 +36,6 @@ import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -46,9 +44,6 @@ import org.opensearch.index.analysis.Analysis;
import org.opensearch.index.analysis.TokenFilterFactory;
public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(CommonGramsTokenFilterFactory.class);
private final CharArraySet words;
private final boolean ignoreCase;
@ -80,15 +75,6 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
}
return this;
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
}

View File

@ -35,8 +35,6 @@ package org.opensearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -44,9 +42,6 @@ import org.opensearch.index.analysis.AbstractTokenFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;
public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(EdgeNGramTokenFilterFactory.class);
private final int minGram;
private final int maxGram;
@ -102,14 +97,6 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
}

View File

@ -34,8 +34,6 @@ package org.opensearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -46,9 +44,6 @@ import static org.opensearch.analysis.common.FingerprintAnalyzerProvider.DEFAULT
import static org.opensearch.analysis.common.FingerprintAnalyzerProvider.MAX_OUTPUT_SIZE;
public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(FingerprintTokenFilterFactory.class);
private final char separator;
private final int maxOutputSize;
@ -67,15 +62,7 @@ public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
}

View File

@ -1,50 +0,0 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.analysis.common;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
public class LegacyDelimitedPayloadTokenFilterFactory extends DelimitedPayloadTokenFilterFactory {
LegacyDelimitedPayloadTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, env, name, settings);
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead"
);
}
}
}

View File

@ -37,9 +37,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.Strings;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -55,9 +53,6 @@ import java.util.List;
import java.util.function.Function;
public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(MultiplexerTokenFilterFactory.class);
private List<String> filterNames;
private final boolean preserveOriginal;
@ -74,20 +69,7 @@ public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
if (preserveOriginal) {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return IDENTITY_FILTER;
}
throw new IllegalArgumentException(
"Token filter [" + name() + "] cannot be used to parse synonyms unless [preserve_original] is [true]"
);
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
@Override
@ -142,20 +124,7 @@ public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
if (preserveOriginal) {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return IDENTITY_FILTER;
}
throw new IllegalArgumentException(
"Token filter [" + name() + "] cannot be used to parse synonyms unless [preserve_original] is [true]"
);
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
@Override

View File

@ -35,7 +35,6 @@ package org.opensearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -43,9 +42,6 @@ import org.opensearch.index.analysis.AbstractTokenFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;
public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(NGramTokenFilterFactory.class);
private final int minGram;
private final int maxGram;
private final boolean preserveOriginal;
@ -88,14 +84,6 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
}

View File

@ -34,7 +34,6 @@ package org.opensearch.analysis.common;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -131,25 +130,15 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
int ngramDiff = maxGram - minGram;
if (ngramDiff > maxAllowedNgramDiff) {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
+ maxAllowedNgramDiff
+ "] but was ["
+ ngramDiff
+ "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey()
+ "] index level setting."
);
} else {
deprecationLogger.deprecate(
"ngram_big_difference",
"Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
+ "expected difference must be less than or equal to: ["
+ maxAllowedNgramDiff
+ "]"
);
}
throw new IllegalArgumentException(
"The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
+ maxAllowedNgramDiff
+ "] but was ["
+ ngramDiff
+ "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey()
+ "] index level setting."
);
}
this.matcher = parseTokenChars(settings);
}

View File

@ -1,78 +0,0 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.opensearch.index.analysis.Analysis;
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(StandardHtmlStripAnalyzerProvider.class);
private final StandardHtmlStripAnalyzer analyzer;
/**
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version);
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"[standard_html_strip] analyzer is not supported for new indices, "
+ "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"
);
} else {
DEPRECATION_LOGGER.deprecate(
"standard_html_strip_deprecation",
"Deprecated analyzer [standard_html_strip] used, "
+ "replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"
);
}
}
@Override
public StandardHtmlStripAnalyzer get() {
return this.analyzer;
}
}

View File

@ -36,8 +36,6 @@ import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -61,9 +59,6 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.
import static org.opensearch.analysis.common.WordDelimiterTokenFilterFactory.parseTypes;
public class WordDelimiterGraphTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(WordDelimiterGraphTokenFilterFactory.class);
private final byte[] charTypeTable;
private final int flags;
private final CharArraySet protoWords;
@ -119,15 +114,7 @@ public class WordDelimiterGraphTokenFilterFactory extends AbstractTokenFilterFac
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
private int getFlag(int flag, Settings settings, String key, boolean defaultValue) {

View File

@ -36,8 +36,6 @@ import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -65,9 +63,6 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.SPLIT
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE;
public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(WordDelimiterTokenFilterFactory.class);
private final byte[] charTypeTable;
private final int flags;
private final CharArraySet protoWords;
@ -119,15 +114,7 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
public int getFlag(int flag, Settings settings, String key, boolean defaultValue) {

View File

@ -1,274 +0,0 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.analysis.common;
import org.apache.lucene.analysis.Tokenizer;
import org.opensearch.LegacyESVersion;
import org.opensearch.Version;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.TokenizerFactory;
import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.test.IndexSettingsModule;
import org.opensearch.test.VersionUtils;
import java.io.IOException;
import java.util.Map;
public class CommonAnalysisPluginTests extends OpenSearchTestCase {
/**
* Check that the deprecated name "nGram" throws an error since 7.0.0
*/
public void testNGramDeprecationError() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, null))
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "nGram")
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
);
assertEquals(
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead.",
e.getMessage()
);
}
}
/**
* Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0
*/
public void testEdgeNGramDeprecationError() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "edgeNGram")
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
);
assertEquals(
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead.",
ex.getMessage()
);
}
}
/**
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
*/
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin)
);
assertEquals(
"[standard_html_strip] analyzer is not supported for new indices, "
+ "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter",
ex.getMessage()
);
}
/**
* Check that the deprecated "nGram" filter logs a warning when the filter is used as a custom filter
*/
public void testnGramFilterInCustomAnalyzerDeprecationError() throws IOException {
final Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
.put("index.analysis.filter.my_ngram.type", "nGram")
.build();
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
assertWarnings(
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead."
);
}
/**
* Check that the deprecated "edgeNGram" filter logs a warning when the filter is used as a custom filter
*/
public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
final Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
.build();
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
assertWarnings(
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead."
);
}
/**
* Check that we log a deprecation warning for "nGram" and "edgeNGram" tokenizer names with 7.6 and
* disallow usages for indices created after 8.0
*/
public void testNGramTokenizerDeprecation() throws IOException {
// tests for prebuilt tokenizer
doTestPrebuiltTokenizerDeprecation(
"nGram",
"ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
false
);
doTestPrebuiltTokenizerDeprecation(
"edgeNGram",
"edge_ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
false
);
doTestPrebuiltTokenizerDeprecation("nGram", "ngram", LegacyESVersion.V_7_6_0, true);
doTestPrebuiltTokenizerDeprecation("edgeNGram", "edge_ngram", LegacyESVersion.V_7_6_0, true);
// same batch of tests for custom tokenizer definition in the settings
doTestCustomTokenizerDeprecation(
"nGram",
"ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
false
);
doTestCustomTokenizerDeprecation(
"edgeNGram",
"edge_ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
false
);
doTestCustomTokenizerDeprecation("nGram", "ngram", LegacyESVersion.V_7_6_0, false);
doTestCustomTokenizerDeprecation("edgeNGram", "edge_ngram", LegacyESVersion.V_7_6_0, false);
}
public void doTestPrebuiltTokenizerDeprecation(String deprecatedName, String replacement, Version version, boolean expectWarning)
throws IOException {
final Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenizerFactory> tokenizers = createTestAnalysis(
IndexSettingsModule.newIndexSettings("index", settings),
settings,
commonAnalysisPlugin
).tokenizer;
TokenizerFactory tokenizerFactory = tokenizers.get(deprecatedName);
Tokenizer tokenizer = tokenizerFactory.create();
assertNotNull(tokenizer);
if (expectWarning) {
assertWarnings(
"The ["
+ deprecatedName
+ "] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to ["
+ replacement
+ "] instead."
);
}
}
}
public void doTestCustomTokenizerDeprecation(String deprecatedName, String replacement, Version version, boolean expectWarning)
throws IOException {
final Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "my_tokenizer")
.put("index.analysis.tokenizer.my_tokenizer.type", deprecatedName)
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
if (expectWarning) {
assertWarnings(
"The ["
+ deprecatedName
+ "] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to ["
+ replacement
+ "] instead."
);
}
}
}
}

View File

@ -1,75 +0,0 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.analysis.common;
import org.opensearch.LegacyESVersion;
import org.opensearch.Version;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.CharFilterFactory;
import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.test.IndexSettingsModule;
import org.opensearch.test.VersionUtils;
import java.io.IOException;
import java.io.StringReader;
import java.util.Map;
public class HtmlStripCharFilterFactoryTests extends OpenSearchTestCase {
/**
* Check that the deprecated name "htmlStrip" issues a deprecation warning for indices created since 6.3.0
*/
public void testDeprecationWarning() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_6_3_0, Version.CURRENT)
)
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, CharFilterFactory> charFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).charFilter;
CharFilterFactory charFilterFactory = charFilters.get("htmlStrip");
assertNotNull(charFilterFactory.create(new StringReader("input")));
assertWarnings(
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead."
);
}
}
}

View File

@ -67,18 +67,6 @@
- length: { tokens: 1 }
- match: { tokens.0.token: a1 b2 c3 d4 }
---
"standard_html_strip":
- skip:
version: " - 6.99.99"
reason: only starting from version 7.x this throws an error
- do:
catch: /\[standard_html_strip\] analyzer is not supported for new indices, use a custom analyzer using \[standard\] tokenizer and \[html_strip\] char_filter, plus \[lowercase\] filter/
indices.analyze:
body:
text: <bold/> <italic/>
analyzer: standard_html_strip
---
"pattern":
- do:

View File

@ -1164,34 +1164,6 @@
- match: { tokens.9.token: 落ち }
- match: { tokens.10.token: ちた }
---
"delimited_payload_filter_error":
- skip:
version: " - 6.99.99"
reason: using delimited_payload_filter throws error from 7.0 on
- do:
catch: /\[delimited_payload_filter\] is not supported for new indices, use \[delimited_payload\] instead/
indices.create:
index: test
body:
settings:
analysis:
filter:
my_delimited_payload_filter:
type: delimited_payload_filter
delimiter: ^
encoding: identity
# Test pre-configured token filter too:
- do:
catch: /\[delimited_payload_filter\] is not supported for new indices, use \[delimited_payload\] instead/
indices.analyze:
body:
text: foo|5
tokenizer: keyword
filter: [delimited_payload_filter]
---
"delimited_payload":
- do:

View File

@ -14,43 +14,6 @@
- match: { error.type: illegal_argument_exception }
- match: { error.reason: "Custom normalizer may not use filter [word_delimiter]" }
---
"htmlStrip_deprecated":
- skip:
version: " - 6.2.99"
reason: deprecated in 6.3
features: "allowed_warnings"
- do:
allowed_warnings:
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
indices.create:
index: test_deprecated_htmlstrip
body:
settings:
index:
analysis:
analyzer:
my_htmlStripWithCharfilter:
tokenizer: keyword
char_filter: ["htmlStrip"]
mappings:
properties:
name:
type: text
analyzer: my_htmlStripWithCharfilter
- do:
allowed_warnings:
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
indices.analyze:
index: test_deprecated_htmlstrip
body:
analyzer: "my_htmlStripWithCharfilter"
text: "<html>foo</html>"
- length: { tokens: 1 }
- match: { tokens.0.token: "\nfoo\n" }
---
"Synonym filter with tokenizer":
- do:

View File

@ -36,8 +36,6 @@ import com.ibm.icu.text.FilteredNormalizer2;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.text.UnicodeSet;
import org.apache.lucene.analysis.TokenStream;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -48,9 +46,6 @@ import org.opensearch.index.IndexSettings;
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
*/
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(IcuNormalizerTokenFilterFactory.class);
private final Normalizer2 normalizer;
public IcuNormalizerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
@ -66,17 +61,7 @@ public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory
}
static Normalizer2 wrapWithUnicodeSetFilter(final IndexSettings indexSettings, final Normalizer2 normalizer, final Settings settings) {
String unicodeSetFilter = settings.get("unicodeSetFilter");
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
if (unicodeSetFilter != null) {
deprecationLogger.deprecate(
"icu_normalizer_unicode_set_filter",
"[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
);
} else {
unicodeSetFilter = settings.get("unicode_set_filter");
}
}
String unicodeSetFilter = settings.get("unicode_set_filter");
if (unicodeSetFilter != null) {
UnicodeSet unicodeSet = new UnicodeSet(unicodeSetFilter);

View File

@ -103,46 +103,3 @@
- match: { tokens.1.token: foo }
- match: { tokens.2.token: bâr }
- match: { tokens.3.token: russ }
---
"Normalization with deprecated unicodeSetFilter":
- skip:
version: " - 6.99.99"
reason: unicodeSetFilter deprecated in 7.0.0, replaced by unicode_set_filter
features: "allowed_warnings"
- do:
allowed_warnings:
- "[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
indices.create:
index: test
body:
settings:
index:
analysis:
char_filter:
charfilter_icu_normalizer:
type: icu_normalizer
unicodeSetFilter: "[^ß]"
filter:
tokenfilter_icu_normalizer:
type: icu_normalizer
unicodeSetFilter: "[^ßB]"
tokenfilter_icu_folding:
type: icu_folding
unicodeSetFilter: "[^â]"
- do:
allowed_warnings:
- "[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
indices.analyze:
index: test
body:
char_filter: ["charfilter_icu_normalizer"]
tokenizer: standard
text: charfilter Föo Bâr Ruß
- length: { tokens: 4 }
- match: { tokens.0.token: charfilter }
- match: { tokens.1.token: föo }
- match: { tokens.2.token: bâr }
- match: { tokens.3.token: ruß }

View File

@ -48,8 +48,6 @@ import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
import org.apache.lucene.analysis.phonetic.DaitchMokotoffSoundexFilter;
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
import org.apache.lucene.analysis.phonetic.PhoneticFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
@ -61,9 +59,6 @@ import java.util.HashSet;
import java.util.List;
public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(PhoneticTokenFilterFactory.class);
private final Encoder encoder;
private final boolean replace;
private int maxcodelength;
@ -158,14 +153,6 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
}

View File

@ -34,9 +34,7 @@ package org.opensearch.index.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.opensearch.LegacyESVersion;
import org.opensearch.Version;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.indices.analysis.PreBuiltCacheFactory;
import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
@ -47,9 +45,6 @@ import java.util.function.Function;
* Provides pre-configured, shared {@link TokenFilter}s.
*/
public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisComponent<TokenFilterFactory> {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(PreConfiguredTokenFilter.class);
/**
* Create a pre-configured token filter that may not vary at all.
*/
@ -180,15 +175,7 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone
if (allowForSynonymParsing) {
return this;
}
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
};
}
@ -208,15 +195,7 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone
if (allowForSynonymParsing) {
return this;
}
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
};
}