Keep word filter through an error if keep_word_path was specified.

Closes #4073
2025-03-25 01:19:02 +00:00 · 2013-09-13 14:29:22 +02:00 · 2013-09-13 14:29:22 +02:00 · 47969efae9
commit 47969efae9
parent 555ececaa9
2 changed files with 55 additions and 36 deletions
--- a/src/main/java/org/elasticsearch/index/analysis/KeepWordFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/KeepWordFilterFactory.java
@ -17,14 +17,11 @@ package org.elasticsearch.index.analysis;
 * specific language governing permissions and limitations
 * under the License.
 */
-import org.apache.lucene.util.Version;
-
-import java.util.Arrays;
-import java.util.Map;

 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
 import org.elasticsearch.ElasticSearchIllegalArgumentException;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -32,33 +29,31 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.settings.IndexSettings;
-import org.elasticsearch.indices.analysis.IndicesAnalysisService;

 /**
 * A {@link TokenFilterFactory} for {@link KeepWordFilter}. This filter only
 * keep tokens that are contained in the term set configured via
 * {@value #KEEP_WORDS_KEY} setting. This filter acts like an inverse stop
 * filter.
- * 
+ * <p/>
 * Configuration options:
- * 
+ * <p/>
 * <ul>
 * <li>{@value #KEEP_WORDS_KEY} the array of words / tokens to keep.</li>
- * 
+ * <p/>
 * <li>{@value #KEEP_WORDS_PATH_KEY} an reference to a file containing the words
 * / tokens to keep. Note: this is an alternative to {@value #KEEP_WORDS_KEY} if
 * both are set an exception will be thrown.</li>
- * 
+ * <p/>
 * <li>{@value #ENABLE_POS_INC_KEY} <code>true</code> iff the filter should
 * maintain position increments for dropped tokens. The default is
 * <code>true</code>.</li>
- * 
+ * <p/>
 * <li>{@value #KEEP_WORDS_CASE_KEY} to use case sensitive keep words. The
 * default is <code>false</code> which corresponds to case-sensitive.</li>
 * </ul>
- * 
+ *
 * @see StopTokenFilterFactory
- * 
 */
@AnalysisSettingsRequired
 public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
@ -68,30 +63,29 @@ public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
    private static final String KEEP_WORDS_PATH_KEY = KEEP_WORDS_KEY + "_path";
    private static final String KEEP_WORDS_CASE_KEY = KEEP_WORDS_KEY + "_case"; // for javadoc
    private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
-    
+
    @Inject
-    public KeepWordFilterFactory(Index index, @IndexSettings Settings indexSettings, 
-            Environment env, IndicesAnalysisService indicesAnalysisService, Map<String, TokenizerFactoryFactory> tokenizerFactories,
-                                     @Assisted String name, @Assisted Settings settings) {
+    public KeepWordFilterFactory(Index index, @IndexSettings Settings indexSettings,
+                                 Environment env, @Assisted String name, @Assisted Settings settings) {
        super(index, indexSettings, name, settings);
-        
-        final String[] arrayKeepWords = settings.getAsArray(KEEP_WORDS_KEY);
+
+        final String[] arrayKeepWords = settings.getAsArray(KEEP_WORDS_KEY, null);
        final String keepWordsPath = settings.get(KEEP_WORDS_PATH_KEY, null);
-        if (!(arrayKeepWords == null ^ keepWordsPath == null)) {
-            // we don't allow both or non
+        if ((arrayKeepWords == null && keepWordsPath == null) || (arrayKeepWords != null && keepWordsPath != null)) {
+            // we don't allow both or none
            throw new ElasticSearchIllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
-                            + KEEP_WORDS_PATH_KEY + "` to be configured");
+                    + KEEP_WORDS_PATH_KEY + "` to be configured");
        }
        if (version.onOrAfter(Version.LUCENE_44) && settings.get(ENABLE_POS_INC_KEY) != null) {
-            throw new ElasticSearchIllegalArgumentException(ENABLE_POS_INC_KEY +  " is not supported anymore. Please fix your analysis chain or use"
+            throw new ElasticSearchIllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
                    + " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
        }
        enablePositionIncrements = version.onOrAfter(Version.LUCENE_44) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);

        this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY, version);
-        
+
    }
-    
+
    @Override
    public TokenStream create(TokenStream tokenStream) {
        if (version.onOrAfter(Version.LUCENE_44)) {
@ -100,8 +94,5 @@ public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
        return new KeepWordFilter(version, enablePositionIncrements, tokenStream, keepWords);
    }

-    
-    
-   

 }
--- a/src/test/java/org/elasticsearch/index/analysis/KeepFilterFactoryTests.java
+++ b/src/test/java/org/elasticsearch/index/analysis/KeepFilterFactoryTests.java
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.elasticsearch.ElasticSearchIllegalArgumentException;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.FailedToResolveConfigException;
 import org.elasticsearch.test.ElasticsearchTokenStreamTestCase;
 import org.junit.Assert;
 import org.junit.Test;
@ -36,22 +37,22 @@ import static org.hamcrest.Matchers.instanceOf;
 public class KeepFilterFactoryTests extends ElasticsearchTokenStreamTestCase {

    private static final String RESOURCE = "org/elasticsearch/index/analysis/keep_analysis.json";
-    
-    
+
+
    @Test
    public void testLoadWithoutSettings() {
        AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(RESOURCE);
        TokenFilterFactory tokenFilter = analysisService.tokenFilter("keep");
        Assert.assertNull(tokenFilter);
    }
-    
+
    @Test
    public void testLoadOverConfiguredSettings() {
        Settings settings = ImmutableSettings.settingsBuilder()
-        .put("index.analysis.filter.broken_keep_filter.type", "keep")
-        .put("index.analysis.filter.broken_keep_filter.keep_words_path", "does/not/exists.txt")
-        .put("index.analysis.filter.broken_keep_filter.keep_words", "[\"Hello\", \"worlD\"]")
-        .build();
+                .put("index.analysis.filter.broken_keep_filter.type", "keep")
+                .put("index.analysis.filter.broken_keep_filter.keep_words_path", "does/not/exists.txt")
+                .put("index.analysis.filter.broken_keep_filter.keep_words", "[\"Hello\", \"worlD\"]")
+                .build();
        try {
            AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            Assert.fail("path and array are configured");
@ -60,6 +61,33 @@ public class KeepFilterFactoryTests extends ElasticsearchTokenStreamTestCase {
        }
    }

+    @Test
+    public void testKeepWordsPathSettings() {
+        Settings settings = ImmutableSettings.settingsBuilder()
+                .put("index.analysis.filter.non_broken_keep_filter.type", "keep")
+                .put("index.analysis.filter.non_broken_keep_filter.keep_words_path", "does/not/exists.txt")
+                .build();
+        try {
+            // test our none existing setup is picked up
+            AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
+            fail("expected an exception due to non existent keep_words_path");
+        } catch (Throwable e) {
+            assertThat(e.getCause(), instanceOf(FailedToResolveConfigException.class));
+        }
+
+        settings = ImmutableSettings.settingsBuilder().put(settings)
+                .put("index.analysis.filter.non_broken_keep_filter.keep_words", new String[]{"test"})
+                .build();
+        try {
+            // test our none existing setup is picked up
+            AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
+            fail("expected an exception indicating that you can't use [keep_words_path] with [keep_words] ");
+        } catch (Throwable e) {
+            assertThat(e.getCause(), instanceOf(ElasticSearchIllegalArgumentException.class));
+        }
+
+    }
+
    @Test
    public void testCaseInsensitiveMapping() throws IOException {
        AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(RESOURCE);
@ -68,7 +96,7 @@ public class KeepFilterFactoryTests extends ElasticsearchTokenStreamTestCase {
        String source = "hello small world";
        String[] expected = new String[]{"hello", "world"};
        Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] {1,2});
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[]{1, 2});
    }

    @Test
@ -79,7 +107,7 @@ public class KeepFilterFactoryTests extends ElasticsearchTokenStreamTestCase {
        String source = "Hello small world";
        String[] expected = new String[]{"Hello"};
        Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] {1});
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[]{1});
    }

 }