Fix thread safety of Stempel's token filter factory (#22610)

Closes #21911
This commit is contained in:
Michael McCandless 2017-01-16 10:34:47 -05:00 committed by Mike McCandless
parent 2791c69960
commit eea4db5512
2 changed files with 43 additions and 13 deletions

View File

@ -35,20 +35,11 @@ import java.io.IOException;
public class PolishStemTokenFilterFactory extends AbstractTokenFilterFactory { public class PolishStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final StempelStemmer stemmer;
public PolishStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { public PolishStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings); super(indexSettings, name, settings);
Trie tire;
try {
tire = StempelStemmer.load(PolishAnalyzer.class.getResourceAsStream(PolishAnalyzer.DEFAULT_STEMMER_FILE));
} catch (IOException ex) {
throw new RuntimeException("Unable to load default stemming tables", ex);
}
stemmer = new StempelStemmer(tire);
} }
@Override public TokenStream create(TokenStream tokenStream) { @Override public TokenStream create(TokenStream tokenStream) {
return new StempelFilter(tokenStream, stemmer); return new StempelFilter(tokenStream, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
} }
} }

View File

@ -19,12 +19,24 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.elasticsearch.AnalysisFactoryTestCase; import java.io.IOException;
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.AnalysisFactoryTestCase;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase { public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
@Override @Override
@ -34,4 +46,31 @@ public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
return filters; return filters;
} }
public void testThreadSafety() throws IOException {
// TODO: is this the right boilerplate? I forked this out of TransportAnalyzeAction.java:
Settings settings = Settings.builder()
// for _na_
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Environment environment = new Environment(settings);
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
IndexSettings indexSettings = new IndexSettings(metaData, Settings.EMPTY);
testThreadSafety(new PolishStemTokenFilterFactory(indexSettings, environment, "stempelpolishstem", settings));
}
// TODO: move to AnalysisFactoryTestCase so we can more easily test thread safety for all factories
private void testThreadSafety(TokenFilterFactory factory) throws IOException {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
return new TokenStreamComponents(tokenizer, factory.create(tokenizer));
}
};
BaseTokenStreamTestCase.checkRandomData(random(), analyzer, 100);
}
} }