Check stemmer language setting early (#34601)
Currently the StemmerTokenFilterFactory checks the validity of the language setting only when the first TokenStream is processed. Instead we should throw an error earlier at mapping creation time. This change adds a check to the StemmerTokenFilterFactory constructor that checks for a valid `language` setting by trying to create a new TokenStream from an empty input stream. This will throw errors about wrong language settings early on. Closes #34170
This commit is contained in:
parent
59033e0e45
commit
c1c447a4cf
|
@ -44,6 +44,7 @@ import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
|
||||||
import org.apache.lucene.analysis.id.IndonesianStemFilter;
|
import org.apache.lucene.analysis.id.IndonesianStemFilter;
|
||||||
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
|
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
|
||||||
import org.apache.lucene.analysis.lv.LatvianStemFilter;
|
import org.apache.lucene.analysis.lv.LatvianStemFilter;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
|
||||||
import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
|
import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
|
||||||
import org.apache.lucene.analysis.no.NorwegianLightStemmer;
|
import org.apache.lucene.analysis.no.NorwegianLightStemmer;
|
||||||
import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
|
import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
|
||||||
|
@ -82,13 +83,19 @@ import org.tartarus.snowball.ext.SpanishStemmer;
|
||||||
import org.tartarus.snowball.ext.SwedishStemmer;
|
import org.tartarus.snowball.ext.SwedishStemmer;
|
||||||
import org.tartarus.snowball.ext.TurkishStemmer;
|
import org.tartarus.snowball.ext.TurkishStemmer;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();
|
||||||
|
|
||||||
private String language;
|
private String language;
|
||||||
|
|
||||||
StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
|
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
|
||||||
|
// check that we have a valid language by trying to create a TokenStream
|
||||||
|
create(EMPTY_TOKEN_STREAM).close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -69,7 +69,6 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
||||||
assertThat(create, instanceOf(PorterStemFilter.class));
|
assertThat(create, instanceOf(PorterStemFilter.class));
|
||||||
assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"});
|
assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPorter2FilterFactory() throws IOException {
|
public void testPorter2FilterFactory() throws IOException {
|
||||||
|
@ -97,7 +96,16 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
||||||
assertThat(create, instanceOf(SnowballFilter.class));
|
assertThat(create, instanceOf(SnowballFilter.class));
|
||||||
assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"});
|
assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMultipleLanguagesThrowsException() throws IOException {
|
||||||
|
Version v = VersionUtils.randomVersion(random());
|
||||||
|
Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer")
|
||||||
|
.putList("index.analysis.filter.my_english.language", "english", "light_english").put(SETTING_VERSION_CREATED, v)
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
|
||||||
|
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN));
|
||||||
|
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue