[Analysis] Deprecate Standard Html Strip Analyzer in master (#26719)
* [Analysis] Deprecate Standard Html Strip Analyzer Deprecate only Standard Html Strip Analyzer If user create index with the analyzer since 7.0, es throws an exception. If an index was created before 7.0, es issue deprecation log We will remove it in 8.0 Related #4704
This commit is contained in:
parent
ec32e66088
commit
38b698d455
|
@ -31,3 +31,11 @@ instead.
|
||||||
==== `standard` filter has been removed
|
==== `standard` filter has been removed
|
||||||
|
|
||||||
The `standard` token filter has been removed because it doesn't change anything in the stream.
|
The `standard` token filter has been removed because it doesn't change anything in the stream.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
==== Deprecated standard_html_strip analyzer
|
||||||
|
|
||||||
|
The `standard_html_strip` analyzer has been deprecated, and should be replaced
|
||||||
|
with a combination of the `standard` tokenizer and `html_strip` char_filter.
|
||||||
|
Indexes created using this analyzer will still be readable in elasticsearch 7.0,
|
||||||
|
but it will not be possible to create new indexes using it.
|
|
@ -171,6 +171,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
||||||
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
|
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
|
||||||
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
|
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
|
||||||
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
|
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
|
||||||
|
|
||||||
|
// TODO remove in 8.0
|
||||||
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
|
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
|
||||||
analyzers.put("pattern", PatternAnalyzerProvider::new);
|
analyzers.put("pattern", PatternAnalyzerProvider::new);
|
||||||
analyzers.put("snowball", SnowballAnalyzerProvider::new);
|
analyzers.put("snowball", SnowballAnalyzerProvider::new);
|
||||||
|
@ -320,6 +322,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
||||||
@Override
|
@Override
|
||||||
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
|
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
|
||||||
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
|
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
|
||||||
|
// TODO remove in 8.0
|
||||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH,
|
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH,
|
||||||
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
|
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
|
||||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,
|
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,
|
||||||
|
|
|
@ -37,7 +37,10 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
|
||||||
public StandardHtmlStripAnalyzer() {
|
public StandardHtmlStripAnalyzer() {
|
||||||
super(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
|
super(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
StandardHtmlStripAnalyzer(CharArraySet stopwords) {
|
StandardHtmlStripAnalyzer(CharArraySet stopwords) {
|
||||||
super(stopwords);
|
super(stopwords);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,10 @@
|
||||||
|
|
||||||
package org.elasticsearch.analysis.common;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
|
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
@ -28,14 +31,29 @@ import org.elasticsearch.index.analysis.Analysis;
|
||||||
|
|
||||||
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
|
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
|
||||||
|
|
||||||
|
private static final DeprecationLogger DEPRECATION_LOGGER =
|
||||||
|
new DeprecationLogger(LogManager.getLogger(StandardHtmlStripAnalyzerProvider.class));
|
||||||
|
|
||||||
private final StandardHtmlStripAnalyzer analyzer;
|
private final StandardHtmlStripAnalyzer analyzer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
|
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
|
||||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
|
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
|
||||||
analyzer = new StandardHtmlStripAnalyzer(stopWords);
|
analyzer = new StandardHtmlStripAnalyzer(stopWords);
|
||||||
analyzer.setVersion(version);
|
analyzer.setVersion(version);
|
||||||
|
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0)) {
|
||||||
|
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
|
||||||
|
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
|
||||||
|
} else {
|
||||||
|
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_html_strip_deprecation",
|
||||||
|
"Deprecated analyzer [standard_html_strip] used, " +
|
||||||
|
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.analysis.common;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
|
@ -26,6 +27,8 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||||
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
import org.elasticsearch.test.IndexSettingsModule;
|
import org.elasticsearch.test.IndexSettingsModule;
|
||||||
|
@ -116,4 +119,47 @@ public class CommonAnalysisPluginTests extends ESTestCase {
|
||||||
assertNotNull(tokenFilterFactory.create(tokenizer));
|
assertNotNull(tokenFilterFactory.create(tokenizer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
|
||||||
|
*/
|
||||||
|
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
|
||||||
|
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||||
|
.put(IndexMetaData.SETTING_VERSION_CREATED,
|
||||||
|
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT))
|
||||||
|
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
|
||||||
|
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||||
|
CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
|
||||||
|
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin));
|
||||||
|
assertEquals("[standard_html_strip] analyzer is not supported for new indices, " +
|
||||||
|
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter", ex.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7
|
||||||
|
*/
|
||||||
|
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
|
||||||
|
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||||
|
.put(IndexMetaData.SETTING_VERSION_CREATED,
|
||||||
|
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0,
|
||||||
|
VersionUtils.getPreviousVersion(Version.V_7_0_0)))
|
||||||
|
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
|
||||||
|
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||||
|
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
|
||||||
|
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
|
||||||
|
Analyzer analyzer = analyzers.get("custom_analyzer");
|
||||||
|
assertNotNull(((NamedAnalyzer) analyzer).analyzer());
|
||||||
|
assertWarnings(
|
||||||
|
"Deprecated analyzer [standard_html_strip] used, " +
|
||||||
|
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,14 +69,15 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
"standard_html_strip":
|
"standard_html_strip":
|
||||||
|
- skip:
|
||||||
|
version: " - 6.99.99"
|
||||||
|
reason: only starting from version 7.x this throws an error
|
||||||
- do:
|
- do:
|
||||||
|
catch: /\[standard_html_strip\] analyzer is not supported for new indices, use a custom analyzer using \[standard\] tokenizer and \[html_strip\] char_filter, plus \[lowercase\] filter/
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
body:
|
body:
|
||||||
text: <bold/> <italic/>
|
text: <bold/> <italic/>
|
||||||
analyzer: standard_html_strip
|
analyzer: standard_html_strip
|
||||||
- length: { tokens: 2 }
|
|
||||||
- match: { tokens.0.token: bold }
|
|
||||||
- match: { tokens.1.token: italic }
|
|
||||||
|
|
||||||
---
|
---
|
||||||
"pattern":
|
"pattern":
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.core.internal.io.IOUtils;
|
import org.elasticsearch.core.internal.io.IOUtils;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
|
@ -130,7 +131,13 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
throw new ElasticsearchException("failed to load analyzer for name " + key, ex);
|
throw new ElasticsearchException("failed to load analyzer for name " + key, ex);
|
||||||
}}
|
}}
|
||||||
);
|
);
|
||||||
|
} else if ("standard_html_strip".equals(analyzer)) {
|
||||||
|
if (Version.CURRENT.onOrAfter(Version.V_7_0_0)) {
|
||||||
|
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
|
||||||
|
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return analyzerProvider.get(environment, analyzer).get();
|
return analyzerProvider.get(environment, analyzer).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue