From c1c447a4cfd827923cf35bd63fdfeee8710778a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Fri, 19 Oct 2018 12:59:23 +0200 Subject: [PATCH] Check stemmer language setting early (#34601) Currently the StemmerTokenFilterFactory checks the validity of the language setting only when the first TokenStream is processed. Instead we should throw an error earlier at mapping creation time. This change adds a check to the StemmerTokenFilterFactory constructor that checks for a valid `language` setting by trying to create a new TokenStream from an empty input stream. This will throw errors about wrong language settings early on. Closes #34170 --- .../analysis/common/StemmerTokenFilterFactory.java | 9 ++++++++- .../common/StemmerTokenFilterFactoryTests.java | 12 ++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java index 829d9746399..b94f7f6499a 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java @@ -44,6 +44,7 @@ import org.apache.lucene.analysis.hu.HungarianLightStemFilter; import org.apache.lucene.analysis.id.IndonesianStemFilter; import org.apache.lucene.analysis.it.ItalianLightStemFilter; import org.apache.lucene.analysis.lv.LatvianStemFilter; +import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream; import org.apache.lucene.analysis.no.NorwegianLightStemFilter; import org.apache.lucene.analysis.no.NorwegianLightStemmer; import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter; @@ -82,13 +83,19 @@ import org.tartarus.snowball.ext.SpanishStemmer; import org.tartarus.snowball.ext.SwedishStemmer; import org.tartarus.snowball.ext.TurkishStemmer; +import java.io.IOException; + public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory { + private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream(); + private String language; - StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { super(indexSettings, name, settings); this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter"))); + // check that we have a valid language by trying to create a TokenStream + create(EMPTY_TOKEN_STREAM).close(); } @Override diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java index 10f7653c52c..8e3e862f462 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java @@ -69,7 +69,6 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase { assertThat(create, instanceOf(PorterStemFilter.class)); assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"}); } - } public void testPorter2FilterFactory() throws IOException { @@ -97,7 +96,16 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase { assertThat(create, instanceOf(SnowballFilter.class)); assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"}); } - } + public void testMultipleLanguagesThrowsException() throws IOException { + Version v = VersionUtils.randomVersion(random()); + Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer") + .putList("index.analysis.filter.my_english.language", "english", "light_english").put(SETTING_VERSION_CREATED, v) + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN)); + assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage()); + } }