[Remove] Analyzer Deprecations (#1741)
This commit removes deprecated analyzer instantiation that is no longer permitted in OpenSearch 2.0.0. Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
This commit is contained in:
parent
5966cc03bf
commit
5550f8d7e2
|
@ -35,8 +35,6 @@ package org.opensearch.analysis.common;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -63,9 +61,6 @@ import java.util.Set;
|
|||
* In all cases, all non-CJK input is passed thru unmodified.
|
||||
*/
|
||||
public final class CJKBigramFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(CJKBigramFilterFactory.class);
|
||||
|
||||
private final int flags;
|
||||
private final boolean outputUnigrams;
|
||||
|
||||
|
@ -110,14 +105,7 @@ public final class CJKBigramFilterFactory extends AbstractTokenFilterFactory {
|
|||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (outputUnigrams) {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -200,9 +200,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
|||
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
|
||||
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
|
||||
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
|
||||
|
||||
// TODO remove in 8.0
|
||||
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
|
||||
analyzers.put("pattern", PatternAnalyzerProvider::new);
|
||||
analyzers.put("snowball", SnowballAnalyzerProvider::new);
|
||||
|
||||
|
@ -265,7 +262,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
|||
requiresAnalysisSettings((i, e, n, s) -> new ScriptedConditionTokenFilterFactory(i, n, s, scriptService.get()))
|
||||
);
|
||||
filters.put("decimal_digit", DecimalDigitFilterFactory::new);
|
||||
filters.put("delimited_payload_filter", LegacyDelimitedPayloadTokenFilterFactory::new);
|
||||
filters.put("delimited_payload", DelimitedPayloadTokenFilterFactory::new);
|
||||
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
|
||||
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
|
||||
|
@ -388,14 +384,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
|||
@Override
|
||||
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
|
||||
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
|
||||
// TODO remove in 8.0
|
||||
analyzers.add(
|
||||
new PreBuiltAnalyzerProviderFactory(
|
||||
"standard_html_strip",
|
||||
CachingStrategy.OPENSEARCH,
|
||||
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)
|
||||
)
|
||||
);
|
||||
analyzers.add(
|
||||
new PreBuiltAnalyzerProviderFactory(
|
||||
"pattern",
|
||||
|
@ -462,16 +450,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
|||
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
|
||||
List<PreConfiguredCharFilter> filters = new ArrayList<>();
|
||||
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
|
||||
filters.add(PreConfiguredCharFilter.openSearchVersion("htmlStrip", false, (reader, version) -> {
|
||||
if (version.onOrAfter(LegacyESVersion.V_6_3_0)) {
|
||||
deprecationLogger.deprecate(
|
||||
"htmlStrip_deprecation",
|
||||
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
|
||||
+ "Please change the filter name to [html_strip] instead."
|
||||
);
|
||||
}
|
||||
return new HTMLStripCharFilter(reader);
|
||||
}));
|
||||
return filters;
|
||||
}
|
||||
|
||||
|
@ -492,18 +470,6 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
|||
);
|
||||
filters.add(PreConfiguredTokenFilter.singleton("czech_stem", false, CzechStemFilter::new));
|
||||
filters.add(PreConfiguredTokenFilter.singleton("decimal_digit", true, DecimalDigitFilter::new));
|
||||
filters.add(PreConfiguredTokenFilter.openSearchVersion("delimited_payload_filter", false, (input, version) -> {
|
||||
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException(
|
||||
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead"
|
||||
);
|
||||
}
|
||||
return new DelimitedPayloadTokenFilter(
|
||||
input,
|
||||
DelimitedPayloadTokenFilterFactory.DEFAULT_DELIMITER,
|
||||
DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER
|
||||
);
|
||||
}));
|
||||
filters.add(
|
||||
PreConfiguredTokenFilter.singleton(
|
||||
"delimited_payload",
|
||||
|
|
|
@ -36,8 +36,6 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -46,9 +44,6 @@ import org.opensearch.index.analysis.Analysis;
|
|||
import org.opensearch.index.analysis.TokenFilterFactory;
|
||||
|
||||
public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(CommonGramsTokenFilterFactory.class);
|
||||
|
||||
private final CharArraySet words;
|
||||
|
||||
private final boolean ignoreCase;
|
||||
|
@ -80,15 +75,6 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
}
|
||||
|
||||
return this;
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,8 +35,6 @@ package org.opensearch.analysis.common;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -44,9 +42,6 @@ import org.opensearch.index.analysis.AbstractTokenFilterFactory;
|
|||
import org.opensearch.index.analysis.TokenFilterFactory;
|
||||
|
||||
public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(EdgeNGramTokenFilterFactory.class);
|
||||
|
||||
private final int minGram;
|
||||
|
||||
private final int maxGram;
|
||||
|
@ -102,14 +97,6 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return this;
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,8 +34,6 @@ package org.opensearch.analysis.common;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -46,9 +44,6 @@ import static org.opensearch.analysis.common.FingerprintAnalyzerProvider.DEFAULT
|
|||
import static org.opensearch.analysis.common.FingerprintAnalyzerProvider.MAX_OUTPUT_SIZE;
|
||||
|
||||
public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(FingerprintTokenFilterFactory.class);
|
||||
|
||||
private final char separator;
|
||||
private final int maxOutputSize;
|
||||
|
||||
|
@ -67,15 +62,7 @@ public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return this;
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* The OpenSearch Contributors require contributions made to
|
||||
* this file be licensed under the Apache-2.0 license or a
|
||||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Modifications Copyright OpenSearch Contributors. See
|
||||
* GitHub history for details.
|
||||
*/
|
||||
|
||||
package org.opensearch.analysis.common;
|
||||
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
||||
public class LegacyDelimitedPayloadTokenFilterFactory extends DelimitedPayloadTokenFilterFactory {
|
||||
|
||||
LegacyDelimitedPayloadTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, env, name, settings);
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException(
|
||||
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -37,9 +37,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.Strings;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -55,9 +53,6 @@ import java.util.List;
|
|||
import java.util.function.Function;
|
||||
|
||||
public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(MultiplexerTokenFilterFactory.class);
|
||||
|
||||
private List<String> filterNames;
|
||||
private final boolean preserveOriginal;
|
||||
|
||||
|
@ -74,20 +69,7 @@ public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
if (preserveOriginal) {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return IDENTITY_FILTER;
|
||||
}
|
||||
throw new IllegalArgumentException(
|
||||
"Token filter [" + name() + "] cannot be used to parse synonyms unless [preserve_original] is [true]"
|
||||
);
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -142,20 +124,7 @@ public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
if (preserveOriginal) {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return IDENTITY_FILTER;
|
||||
}
|
||||
throw new IllegalArgumentException(
|
||||
"Token filter [" + name() + "] cannot be used to parse synonyms unless [preserve_original] is [true]"
|
||||
);
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -35,7 +35,6 @@ package org.opensearch.analysis.common;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -43,9 +42,6 @@ import org.opensearch.index.analysis.AbstractTokenFilterFactory;
|
|||
import org.opensearch.index.analysis.TokenFilterFactory;
|
||||
|
||||
public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(NGramTokenFilterFactory.class);
|
||||
|
||||
private final int minGram;
|
||||
private final int maxGram;
|
||||
private final boolean preserveOriginal;
|
||||
|
@ -88,14 +84,6 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return this;
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,6 @@ package org.opensearch.analysis.common;
|
|||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -131,25 +130,15 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||
int ngramDiff = maxGram - minGram;
|
||||
if (ngramDiff > maxAllowedNgramDiff) {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException(
|
||||
"The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
|
||||
+ maxAllowedNgramDiff
|
||||
+ "] but was ["
|
||||
+ ngramDiff
|
||||
+ "]. This limit can be set by changing the ["
|
||||
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey()
|
||||
+ "] index level setting."
|
||||
);
|
||||
} else {
|
||||
deprecationLogger.deprecate(
|
||||
"ngram_big_difference",
|
||||
"Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
|
||||
+ "expected difference must be less than or equal to: ["
|
||||
+ maxAllowedNgramDiff
|
||||
+ "]"
|
||||
);
|
||||
}
|
||||
throw new IllegalArgumentException(
|
||||
"The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
|
||||
+ maxAllowedNgramDiff
|
||||
+ "] but was ["
|
||||
+ ngramDiff
|
||||
+ "]. This limit can be set by changing the ["
|
||||
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey()
|
||||
+ "] index level setting."
|
||||
);
|
||||
}
|
||||
this.matcher = parseTokenChars(settings);
|
||||
}
|
||||
|
|
|
@ -1,78 +0,0 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* The OpenSearch Contributors require contributions made to
|
||||
* this file be licensed under the Apache-2.0 license or a
|
||||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Modifications Copyright OpenSearch Contributors. See
|
||||
* GitHub history for details.
|
||||
*/
|
||||
|
||||
package org.opensearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.opensearch.index.analysis.Analysis;
|
||||
|
||||
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(StandardHtmlStripAnalyzerProvider.class);
|
||||
|
||||
private final StandardHtmlStripAnalyzer analyzer;
|
||||
|
||||
/**
|
||||
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
|
||||
*/
|
||||
@Deprecated
|
||||
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
|
||||
analyzer = new StandardHtmlStripAnalyzer(stopWords);
|
||||
analyzer.setVersion(version);
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException(
|
||||
"[standard_html_strip] analyzer is not supported for new indices, "
|
||||
+ "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"
|
||||
);
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
"standard_html_strip_deprecation",
|
||||
"Deprecated analyzer [standard_html_strip] used, "
|
||||
+ "replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public StandardHtmlStripAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -36,8 +36,6 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -61,9 +59,6 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.
|
|||
import static org.opensearch.analysis.common.WordDelimiterTokenFilterFactory.parseTypes;
|
||||
|
||||
public class WordDelimiterGraphTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(WordDelimiterGraphTokenFilterFactory.class);
|
||||
|
||||
private final byte[] charTypeTable;
|
||||
private final int flags;
|
||||
private final CharArraySet protoWords;
|
||||
|
@ -119,15 +114,7 @@ public class WordDelimiterGraphTokenFilterFactory extends AbstractTokenFilterFac
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return this;
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
|
||||
private int getFlag(int flag, Settings settings, String key, boolean defaultValue) {
|
||||
|
|
|
@ -36,8 +36,6 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -65,9 +63,6 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.SPLIT
|
|||
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE;
|
||||
|
||||
public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(WordDelimiterTokenFilterFactory.class);
|
||||
|
||||
private final byte[] charTypeTable;
|
||||
private final int flags;
|
||||
private final CharArraySet protoWords;
|
||||
|
@ -119,15 +114,7 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return this;
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
|
||||
public int getFlag(int flag, Settings settings, String key, boolean defaultValue) {
|
||||
|
|
|
@ -1,274 +0,0 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* The OpenSearch Contributors require contributions made to
|
||||
* this file be licensed under the Apache-2.0 license or a
|
||||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Modifications Copyright OpenSearch Contributors. See
|
||||
* GitHub history for details.
|
||||
*/
|
||||
|
||||
package org.opensearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.Version;
|
||||
import org.opensearch.cluster.metadata.IndexMetadata;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.analysis.TokenizerFactory;
|
||||
import org.opensearch.test.OpenSearchTestCase;
|
||||
import org.opensearch.test.IndexSettingsModule;
|
||||
import org.opensearch.test.VersionUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class CommonAnalysisPluginTests extends OpenSearchTestCase {
|
||||
|
||||
/**
|
||||
* Check that the deprecated name "nGram" throws an error since 7.0.0
|
||||
*/
|
||||
public void testNGramDeprecationError() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, null))
|
||||
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
|
||||
.putList("index.analysis.analyzer.custom_analyzer.filter", "nGram")
|
||||
.build();
|
||||
|
||||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
|
||||
IllegalArgumentException e = expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
|
||||
);
|
||||
assertEquals(
|
||||
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
|
||||
+ "Please change the filter name to [ngram] instead.",
|
||||
e.getMessage()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0
|
||||
*/
|
||||
public void testEdgeNGramDeprecationError() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.put(
|
||||
IndexMetadata.SETTING_VERSION_CREATED,
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
|
||||
)
|
||||
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
|
||||
.putList("index.analysis.analyzer.custom_analyzer.filter", "edgeNGram")
|
||||
.build();
|
||||
|
||||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
|
||||
IllegalArgumentException ex = expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
|
||||
);
|
||||
assertEquals(
|
||||
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
|
||||
+ "Please change the filter name to [edge_ngram] instead.",
|
||||
ex.getMessage()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
|
||||
*/
|
||||
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.put(
|
||||
IndexMetadata.SETTING_VERSION_CREATED,
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
|
||||
)
|
||||
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
|
||||
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
|
||||
.build();
|
||||
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||
CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
|
||||
IllegalArgumentException ex = expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin)
|
||||
);
|
||||
assertEquals(
|
||||
"[standard_html_strip] analyzer is not supported for new indices, "
|
||||
+ "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter",
|
||||
ex.getMessage()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the deprecated "nGram" filter logs a warning when the filter is used as a custom filter
|
||||
*/
|
||||
public void testnGramFilterInCustomAnalyzerDeprecationError() throws IOException {
|
||||
final Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.put(
|
||||
IndexMetadata.SETTING_VERSION_CREATED,
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
|
||||
)
|
||||
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
|
||||
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
|
||||
.put("index.analysis.filter.my_ngram.type", "nGram")
|
||||
.build();
|
||||
|
||||
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
|
||||
|
||||
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
|
||||
assertWarnings(
|
||||
"The [nGram] token filter name is deprecated and will be removed in a future version. "
|
||||
+ "Please change the filter name to [ngram] instead."
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the deprecated "edgeNGram" filter logs a warning when the filter is used as a custom filter
|
||||
*/
|
||||
public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
|
||||
final Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.put(
|
||||
IndexMetadata.SETTING_VERSION_CREATED,
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
|
||||
)
|
||||
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
|
||||
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
|
||||
.put("index.analysis.filter.my_ngram.type", "edgeNGram")
|
||||
.build();
|
||||
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
|
||||
|
||||
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
|
||||
assertWarnings(
|
||||
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
|
||||
+ "Please change the filter name to [edge_ngram] instead."
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that we log a deprecation warning for "nGram" and "edgeNGram" tokenizer names with 7.6 and
|
||||
* disallow usages for indices created after 8.0
|
||||
*/
|
||||
public void testNGramTokenizerDeprecation() throws IOException {
|
||||
// tests for prebuilt tokenizer
|
||||
doTestPrebuiltTokenizerDeprecation(
|
||||
"nGram",
|
||||
"ngram",
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
|
||||
false
|
||||
);
|
||||
doTestPrebuiltTokenizerDeprecation(
|
||||
"edgeNGram",
|
||||
"edge_ngram",
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
|
||||
false
|
||||
);
|
||||
doTestPrebuiltTokenizerDeprecation("nGram", "ngram", LegacyESVersion.V_7_6_0, true);
|
||||
doTestPrebuiltTokenizerDeprecation("edgeNGram", "edge_ngram", LegacyESVersion.V_7_6_0, true);
|
||||
|
||||
// same batch of tests for custom tokenizer definition in the settings
|
||||
doTestCustomTokenizerDeprecation(
|
||||
"nGram",
|
||||
"ngram",
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
|
||||
false
|
||||
);
|
||||
doTestCustomTokenizerDeprecation(
|
||||
"edgeNGram",
|
||||
"edge_ngram",
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
|
||||
false
|
||||
);
|
||||
doTestCustomTokenizerDeprecation("nGram", "ngram", LegacyESVersion.V_7_6_0, false);
|
||||
doTestCustomTokenizerDeprecation("edgeNGram", "edge_ngram", LegacyESVersion.V_7_6_0, false);
|
||||
}
|
||||
|
||||
public void doTestPrebuiltTokenizerDeprecation(String deprecatedName, String replacement, Version version, boolean expectWarning)
|
||||
throws IOException {
|
||||
final Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
|
||||
.build();
|
||||
|
||||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
|
||||
Map<String, TokenizerFactory> tokenizers = createTestAnalysis(
|
||||
IndexSettingsModule.newIndexSettings("index", settings),
|
||||
settings,
|
||||
commonAnalysisPlugin
|
||||
).tokenizer;
|
||||
TokenizerFactory tokenizerFactory = tokenizers.get(deprecatedName);
|
||||
|
||||
Tokenizer tokenizer = tokenizerFactory.create();
|
||||
assertNotNull(tokenizer);
|
||||
if (expectWarning) {
|
||||
assertWarnings(
|
||||
"The ["
|
||||
+ deprecatedName
|
||||
+ "] tokenizer name is deprecated and will be removed in a future version. "
|
||||
+ "Please change the tokenizer name to ["
|
||||
+ replacement
|
||||
+ "] instead."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void doTestCustomTokenizerDeprecation(String deprecatedName, String replacement, Version version, boolean expectWarning)
|
||||
throws IOException {
|
||||
final Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
|
||||
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "my_tokenizer")
|
||||
.put("index.analysis.tokenizer.my_tokenizer.type", deprecatedName)
|
||||
.build();
|
||||
|
||||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
|
||||
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
|
||||
|
||||
if (expectWarning) {
|
||||
assertWarnings(
|
||||
"The ["
|
||||
+ deprecatedName
|
||||
+ "] tokenizer name is deprecated and will be removed in a future version. "
|
||||
+ "Please change the tokenizer name to ["
|
||||
+ replacement
|
||||
+ "] instead."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* The OpenSearch Contributors require contributions made to
|
||||
* this file be licensed under the Apache-2.0 license or a
|
||||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Modifications Copyright OpenSearch Contributors. See
|
||||
* GitHub history for details.
|
||||
*/
|
||||
|
||||
package org.opensearch.analysis.common;
|
||||
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.Version;
|
||||
import org.opensearch.cluster.metadata.IndexMetadata;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.analysis.CharFilterFactory;
|
||||
import org.opensearch.test.OpenSearchTestCase;
|
||||
import org.opensearch.test.IndexSettingsModule;
|
||||
import org.opensearch.test.VersionUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Map;
|
||||
|
||||
public class HtmlStripCharFilterFactoryTests extends OpenSearchTestCase {
|
||||
|
||||
/**
|
||||
* Check that the deprecated name "htmlStrip" issues a deprecation warning for indices created since 6.3.0
|
||||
*/
|
||||
public void testDeprecationWarning() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.put(
|
||||
IndexMetadata.SETTING_VERSION_CREATED,
|
||||
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_6_3_0, Version.CURRENT)
|
||||
)
|
||||
.build();
|
||||
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
|
||||
Map<String, CharFilterFactory> charFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).charFilter;
|
||||
CharFilterFactory charFilterFactory = charFilters.get("htmlStrip");
|
||||
assertNotNull(charFilterFactory.create(new StringReader("input")));
|
||||
assertWarnings(
|
||||
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
|
||||
+ "Please change the filter name to [html_strip] instead."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -67,18 +67,6 @@
|
|||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: a1 b2 c3 d4 }
|
||||
|
||||
---
|
||||
"standard_html_strip":
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: only starting from version 7.x this throws an error
|
||||
- do:
|
||||
catch: /\[standard_html_strip\] analyzer is not supported for new indices, use a custom analyzer using \[standard\] tokenizer and \[html_strip\] char_filter, plus \[lowercase\] filter/
|
||||
indices.analyze:
|
||||
body:
|
||||
text: <bold/> <italic/>
|
||||
analyzer: standard_html_strip
|
||||
|
||||
---
|
||||
"pattern":
|
||||
- do:
|
||||
|
|
|
@ -1164,34 +1164,6 @@
|
|||
- match: { tokens.9.token: 落ち }
|
||||
- match: { tokens.10.token: ちた }
|
||||
|
||||
---
|
||||
"delimited_payload_filter_error":
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: using delimited_payload_filter throws error from 7.0 on
|
||||
|
||||
- do:
|
||||
catch: /\[delimited_payload_filter\] is not supported for new indices, use \[delimited_payload\] instead/
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_delimited_payload_filter:
|
||||
type: delimited_payload_filter
|
||||
delimiter: ^
|
||||
encoding: identity
|
||||
|
||||
# Test pre-configured token filter too:
|
||||
- do:
|
||||
catch: /\[delimited_payload_filter\] is not supported for new indices, use \[delimited_payload\] instead/
|
||||
indices.analyze:
|
||||
body:
|
||||
text: foo|5
|
||||
tokenizer: keyword
|
||||
filter: [delimited_payload_filter]
|
||||
|
||||
---
|
||||
"delimited_payload":
|
||||
- do:
|
||||
|
|
|
@ -14,43 +14,6 @@
|
|||
- match: { error.type: illegal_argument_exception }
|
||||
- match: { error.reason: "Custom normalizer may not use filter [word_delimiter]" }
|
||||
|
||||
---
|
||||
"htmlStrip_deprecated":
|
||||
- skip:
|
||||
version: " - 6.2.99"
|
||||
reason: deprecated in 6.3
|
||||
features: "allowed_warnings"
|
||||
|
||||
- do:
|
||||
allowed_warnings:
|
||||
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
|
||||
indices.create:
|
||||
index: test_deprecated_htmlstrip
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_htmlStripWithCharfilter:
|
||||
tokenizer: keyword
|
||||
char_filter: ["htmlStrip"]
|
||||
mappings:
|
||||
properties:
|
||||
name:
|
||||
type: text
|
||||
analyzer: my_htmlStripWithCharfilter
|
||||
|
||||
- do:
|
||||
allowed_warnings:
|
||||
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
|
||||
indices.analyze:
|
||||
index: test_deprecated_htmlstrip
|
||||
body:
|
||||
analyzer: "my_htmlStripWithCharfilter"
|
||||
text: "<html>foo</html>"
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: "\nfoo\n" }
|
||||
|
||||
---
|
||||
"Synonym filter with tokenizer":
|
||||
- do:
|
||||
|
|
|
@ -36,8 +36,6 @@ import com.ibm.icu.text.FilteredNormalizer2;
|
|||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -48,9 +46,6 @@ import org.opensearch.index.IndexSettings;
|
|||
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
|
||||
*/
|
||||
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(IcuNormalizerTokenFilterFactory.class);
|
||||
|
||||
private final Normalizer2 normalizer;
|
||||
|
||||
public IcuNormalizerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
|
@ -66,17 +61,7 @@ public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory
|
|||
}
|
||||
|
||||
static Normalizer2 wrapWithUnicodeSetFilter(final IndexSettings indexSettings, final Normalizer2 normalizer, final Settings settings) {
|
||||
String unicodeSetFilter = settings.get("unicodeSetFilter");
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
if (unicodeSetFilter != null) {
|
||||
deprecationLogger.deprecate(
|
||||
"icu_normalizer_unicode_set_filter",
|
||||
"[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
|
||||
);
|
||||
} else {
|
||||
unicodeSetFilter = settings.get("unicode_set_filter");
|
||||
}
|
||||
}
|
||||
String unicodeSetFilter = settings.get("unicode_set_filter");
|
||||
if (unicodeSetFilter != null) {
|
||||
UnicodeSet unicodeSet = new UnicodeSet(unicodeSetFilter);
|
||||
|
||||
|
|
|
@ -103,46 +103,3 @@
|
|||
- match: { tokens.1.token: foo }
|
||||
- match: { tokens.2.token: bâr }
|
||||
- match: { tokens.3.token: russ }
|
||||
|
||||
---
|
||||
"Normalization with deprecated unicodeSetFilter":
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: unicodeSetFilter deprecated in 7.0.0, replaced by unicode_set_filter
|
||||
features: "allowed_warnings"
|
||||
|
||||
- do:
|
||||
allowed_warnings:
|
||||
- "[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
analysis:
|
||||
char_filter:
|
||||
charfilter_icu_normalizer:
|
||||
type: icu_normalizer
|
||||
unicodeSetFilter: "[^ß]"
|
||||
filter:
|
||||
tokenfilter_icu_normalizer:
|
||||
type: icu_normalizer
|
||||
unicodeSetFilter: "[^ßB]"
|
||||
tokenfilter_icu_folding:
|
||||
type: icu_folding
|
||||
unicodeSetFilter: "[^â]"
|
||||
- do:
|
||||
allowed_warnings:
|
||||
- "[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
char_filter: ["charfilter_icu_normalizer"]
|
||||
tokenizer: standard
|
||||
text: charfilter Föo Bâr Ruß
|
||||
- length: { tokens: 4 }
|
||||
- match: { tokens.0.token: charfilter }
|
||||
- match: { tokens.1.token: föo }
|
||||
- match: { tokens.2.token: bâr }
|
||||
- match: { tokens.3.token: ruß }
|
||||
|
||||
|
|
|
@ -48,8 +48,6 @@ import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
|
|||
import org.apache.lucene.analysis.phonetic.DaitchMokotoffSoundexFilter;
|
||||
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
|
||||
import org.apache.lucene.analysis.phonetic.PhoneticFilter;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
@ -61,9 +59,6 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
|
||||
public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(PhoneticTokenFilterFactory.class);
|
||||
|
||||
private final Encoder encoder;
|
||||
private final boolean replace;
|
||||
private int maxcodelength;
|
||||
|
@ -158,14 +153,6 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenFilterFactory getSynonymFilter() {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return this;
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,9 +34,7 @@ package org.opensearch.index.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.Version;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.indices.analysis.PreBuiltCacheFactory;
|
||||
import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
|
@ -47,9 +45,6 @@ import java.util.function.Function;
|
|||
* Provides pre-configured, shared {@link TokenFilter}s.
|
||||
*/
|
||||
public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisComponent<TokenFilterFactory> {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(PreConfiguredTokenFilter.class);
|
||||
|
||||
/**
|
||||
* Create a pre-configured token filter that may not vary at all.
|
||||
*/
|
||||
|
@ -180,15 +175,7 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone
|
|||
if (allowForSynonymParsing) {
|
||||
return this;
|
||||
}
|
||||
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return this;
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -208,15 +195,7 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone
|
|||
if (allowForSynonymParsing) {
|
||||
return this;
|
||||
}
|
||||
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecate(
|
||||
name() + "_synonym_tokenfilters",
|
||||
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
|
||||
);
|
||||
return this;
|
||||
}
|
||||
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue