[Analysis] Deprecate Standard Html Strip Analyzer in master (#26719)

* [Analysis] Deprecate Standard Html Strip Analyzer

Deprecate only Standard Html Strip Analyzer
If user create index with the analyzer since 7.0, es throws an exception.
If an index was created before 7.0, es issue deprecation log
We will remove it in 8.0

Related #4704
This commit is contained in:
Jun Ohtani 2019-01-09 12:42:00 +09:00 committed by GitHub
parent ec32e66088
commit 38b698d455
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 90 additions and 4 deletions

View File

@ -31,3 +31,11 @@ instead.
==== `standard` filter has been removed ==== `standard` filter has been removed
The `standard` token filter has been removed because it doesn't change anything in the stream. The `standard` token filter has been removed because it doesn't change anything in the stream.
[float]
==== Deprecated standard_html_strip analyzer
The `standard_html_strip` analyzer has been deprecated, and should be replaced
with a combination of the `standard` tokenizer and `html_strip` char_filter.
Indexes created using this analyzer will still be readable in elasticsearch 7.0,
but it will not be possible to create new indexes using it.

View File

@ -171,6 +171,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() { public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>(); Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new); analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
// TODO remove in 8.0
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new); analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.put("pattern", PatternAnalyzerProvider::new); analyzers.put("pattern", PatternAnalyzerProvider::new);
analyzers.put("snowball", SnowballAnalyzerProvider::new); analyzers.put("snowball", SnowballAnalyzerProvider::new);
@ -320,6 +322,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
@Override @Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() { public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>(); List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
// TODO remove in 8.0
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH, analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET))); () -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,

View File

@ -37,7 +37,10 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
public StandardHtmlStripAnalyzer() { public StandardHtmlStripAnalyzer() {
super(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET); super(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
} }
/**
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzer(CharArraySet stopwords) { StandardHtmlStripAnalyzer(CharArraySet stopwords) {
super(stopwords); super(stopwords);
} }

View File

@ -19,7 +19,10 @@
package org.elasticsearch.analysis.common; package org.elasticsearch.analysis.common;
import org.apache.logging.log4j.LogManager;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.elasticsearch.Version;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
@ -28,14 +31,29 @@ import org.elasticsearch.index.analysis.Analysis;
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> { public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
private static final DeprecationLogger DEPRECATION_LOGGER =
new DeprecationLogger(LogManager.getLogger(StandardHtmlStripAnalyzerProvider.class));
private final StandardHtmlStripAnalyzer analyzer; private final StandardHtmlStripAnalyzer analyzer;
/**
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings); super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET; final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords); CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords); analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version); analyzer.setVersion(version);
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0)) {
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
} else {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_html_strip_deprecation",
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
} }
@Override @Override

View File

@ -19,6 +19,7 @@
package org.elasticsearch.analysis.common; package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.Version; import org.elasticsearch.Version;
@ -26,6 +27,8 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule; import org.elasticsearch.test.IndexSettingsModule;
@ -116,4 +119,47 @@ public class CommonAnalysisPluginTests extends ESTestCase {
assertNotNull(tokenFilterFactory.create(tokenizer)); assertNotNull(tokenFilterFactory.create(tokenizer));
} }
} }
/**
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
*/
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT))
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin));
assertEquals("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter", ex.getMessage());
}
/**
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7
*/
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0,
VersionUtils.getPreviousVersion(Version.V_7_0_0)))
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
Analyzer analyzer = analyzers.get("custom_analyzer");
assertNotNull(((NamedAnalyzer) analyzer).analyzer());
assertWarnings(
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
}
} }

View File

@ -69,14 +69,15 @@
--- ---
"standard_html_strip": "standard_html_strip":
- skip:
version: " - 6.99.99"
reason: only starting from version 7.x this throws an error
- do: - do:
catch: /\[standard_html_strip\] analyzer is not supported for new indices, use a custom analyzer using \[standard\] tokenizer and \[html_strip\] char_filter, plus \[lowercase\] filter/
indices.analyze: indices.analyze:
body: body:
text: <bold/> <italic/> text: <bold/> <italic/>
analyzer: standard_html_strip analyzer: standard_html_strip
- length: { tokens: 2 }
- match: { tokens.0.token: bold }
- match: { tokens.1.token: italic }
--- ---
"pattern": "pattern":

View File

@ -20,6 +20,7 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
@ -130,7 +131,13 @@ public final class AnalysisRegistry implements Closeable {
throw new ElasticsearchException("failed to load analyzer for name " + key, ex); throw new ElasticsearchException("failed to load analyzer for name " + key, ex);
}} }}
); );
} else if ("standard_html_strip".equals(analyzer)) {
if (Version.CURRENT.onOrAfter(Version.V_7_0_0)) {
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
} }
}
return analyzerProvider.get(environment, analyzer).get(); return analyzerProvider.get(environment, analyzer).get();
} }