Check stemmer language setting early (#34601)

Currently the StemmerTokenFilterFactory checks the validity of the language
setting only when the first TokenStream is processed. Instead we should throw an
error earlier at mapping creation time. This change adds a check to the
StemmerTokenFilterFactory constructor that checks for a valid `language` setting
by trying to create a new TokenStream from an empty input stream. This will
throw errors about wrong language settings early on.

Closes #34170
This commit is contained in:
Christoph Büscher 2018-10-19 12:59:23 +02:00 committed by GitHub
parent 59033e0e45
commit c1c447a4cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 3 deletions

View File

@ -44,6 +44,7 @@ import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
import org.apache.lucene.analysis.id.IndonesianStemFilter; import org.apache.lucene.analysis.id.IndonesianStemFilter;
import org.apache.lucene.analysis.it.ItalianLightStemFilter; import org.apache.lucene.analysis.it.ItalianLightStemFilter;
import org.apache.lucene.analysis.lv.LatvianStemFilter; import org.apache.lucene.analysis.lv.LatvianStemFilter;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.no.NorwegianLightStemFilter; import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
import org.apache.lucene.analysis.no.NorwegianLightStemmer; import org.apache.lucene.analysis.no.NorwegianLightStemmer;
import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter; import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
@ -82,13 +83,19 @@ import org.tartarus.snowball.ext.SpanishStemmer;
import org.tartarus.snowball.ext.SwedishStemmer; import org.tartarus.snowball.ext.SwedishStemmer;
import org.tartarus.snowball.ext.TurkishStemmer; import org.tartarus.snowball.ext.TurkishStemmer;
import java.io.IOException;
public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory { public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();
private String language; private String language;
StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
super(indexSettings, name, settings); super(indexSettings, name, settings);
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter"))); this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
// check that we have a valid language by trying to create a TokenStream
create(EMPTY_TOKEN_STREAM).close();
} }
@Override @Override

View File

@ -69,7 +69,6 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {
assertThat(create, instanceOf(PorterStemFilter.class)); assertThat(create, instanceOf(PorterStemFilter.class));
assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"}); assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"});
} }
} }
public void testPorter2FilterFactory() throws IOException { public void testPorter2FilterFactory() throws IOException {
@ -97,7 +96,16 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {
assertThat(create, instanceOf(SnowballFilter.class)); assertThat(create, instanceOf(SnowballFilter.class));
assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"}); assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"});
} }
} }
public void testMultipleLanguagesThrowsException() throws IOException {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer")
.putList("index.analysis.filter.my_english.language", "english", "light_english").put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN));
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
}
} }