diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java index 829d9746399..b94f7f6499a 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java @@ -44,6 +44,7 @@ import org.apache.lucene.analysis.hu.HungarianLightStemFilter; import org.apache.lucene.analysis.id.IndonesianStemFilter; import org.apache.lucene.analysis.it.ItalianLightStemFilter; import org.apache.lucene.analysis.lv.LatvianStemFilter; +import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream; import org.apache.lucene.analysis.no.NorwegianLightStemFilter; import org.apache.lucene.analysis.no.NorwegianLightStemmer; import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter; @@ -82,13 +83,19 @@ import org.tartarus.snowball.ext.SpanishStemmer; import org.tartarus.snowball.ext.SwedishStemmer; import org.tartarus.snowball.ext.TurkishStemmer; +import java.io.IOException; + public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory { + private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream(); + private String language; - StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { super(indexSettings, name, settings); this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter"))); + // check that we have a valid language by trying to create a TokenStream + create(EMPTY_TOKEN_STREAM).close(); } @Override diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java index 10f7653c52c..8e3e862f462 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java @@ -69,7 +69,6 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase { assertThat(create, instanceOf(PorterStemFilter.class)); assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"}); } - } public void testPorter2FilterFactory() throws IOException { @@ -97,7 +96,16 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase { assertThat(create, instanceOf(SnowballFilter.class)); assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"}); } - } + public void testMultipleLanguagesThrowsException() throws IOException { + Version v = VersionUtils.randomVersion(random()); + Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer") + .putList("index.analysis.filter.my_english.language", "english", "light_english").put(SETTING_VERSION_CREATED, v) + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN)); + assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage()); + } }