Fix thread safety of Stempel's token filter factory (#22610)
Closes #21911
This commit is contained in:
parent
2791c69960
commit
eea4db5512
|
@ -35,20 +35,11 @@ import java.io.IOException;
|
|||
|
||||
public class PolishStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final StempelStemmer stemmer;
|
||||
|
||||
public PolishStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
Trie tire;
|
||||
try {
|
||||
tire = StempelStemmer.load(PolishAnalyzer.class.getResourceAsStream(PolishAnalyzer.DEFAULT_STEMMER_FILE));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException("Unable to load default stemming tables", ex);
|
||||
}
|
||||
stemmer = new StempelStemmer(tire);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new StempelFilter(tokenStream, stemmer);
|
||||
return new StempelFilter(tokenStream, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,12 +19,24 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.AnalysisFactoryTestCase;
|
||||
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.elasticsearch.AnalysisFactoryTestCase;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.UUIDs;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
|
||||
|
||||
public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
|
||||
|
||||
@Override
|
||||
|
@ -34,4 +46,31 @@ public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
|
|||
return filters;
|
||||
}
|
||||
|
||||
public void testThreadSafety() throws IOException {
|
||||
// TODO: is this the right boilerplate? I forked this out of TransportAnalyzeAction.java:
|
||||
Settings settings = Settings.builder()
|
||||
// for _na_
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
Environment environment = new Environment(settings);
|
||||
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
|
||||
IndexSettings indexSettings = new IndexSettings(metaData, Settings.EMPTY);
|
||||
testThreadSafety(new PolishStemTokenFilterFactory(indexSettings, environment, "stempelpolishstem", settings));
|
||||
}
|
||||
|
||||
// TODO: move to AnalysisFactoryTestCase so we can more easily test thread safety for all factories
|
||||
private void testThreadSafety(TokenFilterFactory factory) throws IOException {
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer();
|
||||
return new TokenStreamComponents(tokenizer, factory.create(tokenizer));
|
||||
}
|
||||
};
|
||||
BaseTokenStreamTestCase.checkRandomData(random(), analyzer, 100);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue