[Analysis] Deprecate Standard Html Strip Analyzer in master (#26719)

* [Analysis] Deprecate Standard Html Strip Analyzer

Deprecate only Standard Html Strip Analyzer
If user create index with the analyzer since 7.0, es throws an exception.
If an index was created before 7.0, es issue deprecation log
We will remove it in 8.0

Related #4704
This commit is contained in:
Jun Ohtani 2019-01-09 12:42:00 +09:00 committed by GitHub
parent ec32e66088
commit 38b698d455
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 90 additions and 4 deletions

View File

@ -31,3 +31,11 @@ instead.
==== `standard` filter has been removed
The `standard` token filter has been removed because it doesn't change anything in the stream.
[float]
==== Deprecated standard_html_strip analyzer
The `standard_html_strip` analyzer has been deprecated, and should be replaced
with a combination of the `standard` tokenizer and `html_strip` char_filter.
Indexes created using this analyzer will still be readable in elasticsearch 7.0,
but it will not be possible to create new indexes using it.

View File

@ -171,6 +171,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
// TODO remove in 8.0
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.put("pattern", PatternAnalyzerProvider::new);
analyzers.put("snowball", SnowballAnalyzerProvider::new);
@ -320,6 +322,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
// TODO remove in 8.0
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,

View File

@ -37,7 +37,10 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
public StandardHtmlStripAnalyzer() {
super(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
}
/**
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzer(CharArraySet stopwords) {
super(stopwords);
}

View File

@ -19,7 +19,10 @@
package org.elasticsearch.analysis.common;
import org.apache.logging.log4j.LogManager;
import org.apache.lucene.analysis.CharArraySet;
import org.elasticsearch.Version;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
@ -28,14 +31,29 @@ import org.elasticsearch.index.analysis.Analysis;
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
private static final DeprecationLogger DEPRECATION_LOGGER =
new DeprecationLogger(LogManager.getLogger(StandardHtmlStripAnalyzerProvider.class));
private final StandardHtmlStripAnalyzer analyzer;
/**
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version);
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0)) {
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
} else {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_html_strip_deprecation",
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
}
@Override

View File

@ -19,6 +19,7 @@
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.Version;
@ -26,6 +27,8 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
@ -116,4 +119,47 @@ public class CommonAnalysisPluginTests extends ESTestCase {
assertNotNull(tokenFilterFactory.create(tokenizer));
}
}
/**
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
*/
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT))
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin));
assertEquals("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter", ex.getMessage());
}
/**
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7
*/
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0,
VersionUtils.getPreviousVersion(Version.V_7_0_0)))
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
Analyzer analyzer = analyzers.get("custom_analyzer");
assertNotNull(((NamedAnalyzer) analyzer).analyzer());
assertWarnings(
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
}
}

View File

@ -69,14 +69,15 @@
---
"standard_html_strip":
- skip:
version: " - 6.99.99"
reason: only starting from version 7.x this throws an error
- do:
catch: /\[standard_html_strip\] analyzer is not supported for new indices, use a custom analyzer using \[standard\] tokenizer and \[html_strip\] char_filter, plus \[lowercase\] filter/
indices.analyze:
body:
text: <bold/> <italic/>
analyzer: standard_html_strip
- length: { tokens: 2 }
- match: { tokens.0.token: bold }
- match: { tokens.1.token: italic }
---
"pattern":

View File

@ -20,6 +20,7 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cluster.metadata.IndexMetaData;
@ -130,7 +131,13 @@ public final class AnalysisRegistry implements Closeable {
throw new ElasticsearchException("failed to load analyzer for name " + key, ex);
}}
);
} else if ("standard_html_strip".equals(analyzer)) {
if (Version.CURRENT.onOrAfter(Version.V_7_0_0)) {
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
}
return analyzerProvider.get(environment, analyzer).get();
}