Fix thread safety of Stempel's token filter factory (#22610)

Closes #21911
2017-01-16 10:34:47 -05:00 · 2017-01-16 10:34:47 -05:00 · eea4db5512
parent 2791c69960
commit eea4db5512
2 changed files with 43 additions and 13 deletions
--- a/plugins/analysis-stempel/src/main/java/org/elasticsearch/index/analysis/pl/PolishStemTokenFilterFactory.java
+++ b/plugins/analysis-stempel/src/main/java/org/elasticsearch/index/analysis/pl/PolishStemTokenFilterFactory.java
@ -35,20 +35,11 @@ import java.io.IOException;
 public class PolishStemTokenFilterFactory extends AbstractTokenFilterFactory {
    private final StempelStemmer stemmer;
    public PolishStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
        super(indexSettings, name, settings);
        Trie tire;
        try {
            tire = StempelStemmer.load(PolishAnalyzer.class.getResourceAsStream(PolishAnalyzer.DEFAULT_STEMMER_FILE));
        } catch (IOException ex) {
            throw new RuntimeException("Unable to load default stemming tables", ex);
        }
        stemmer = new StempelStemmer(tire);
    }
    @Override public TokenStream create(TokenStream tokenStream) {
-        return new StempelFilter(tokenStream, stemmer);
+        return new StempelFilter(tokenStream, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
    }
 }
--- a/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java
+++ b/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java
@ -19,12 +19,24 @@
 package org.elasticsearch.index.analysis;
-import org.elasticsearch.AnalysisFactoryTestCase;
+import java.io.IOException;
 import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
 import java.util.HashMap;
 import java.util.Map;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.elasticsearch.AnalysisFactoryTestCase;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.UUIDs;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
 public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
    @Override
@ -34,4 +46,31 @@ public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
        return filters;
    }
    public void testThreadSafety() throws IOException {
        // TODO: is this the right boilerplate?  I forked this out of TransportAnalyzeAction.java:
        Settings settings = Settings.builder()
            // for _na_
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
            .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
            .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .build();
        Environment environment = new Environment(settings);
        IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
        IndexSettings indexSettings = new IndexSettings(metaData, Settings.EMPTY);
        testThreadSafety(new PolishStemTokenFilterFactory(indexSettings, environment, "stempelpolishstem", settings));
    }
    // TODO: move to AnalysisFactoryTestCase so we can more easily test thread safety for all factories
    private void testThreadSafety(TokenFilterFactory factory) throws IOException {
        final Analyzer analyzer = new Analyzer() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer();
                return new TokenStreamComponents(tokenizer, factory.create(tokenizer));
            }
        };
        BaseTokenStreamTestCase.checkRandomData(random(), analyzer, 100);
    }
 }