diff --git a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc index 2ff19cebe89..34646a0413e 100644 --- a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc @@ -4,8 +4,9 @@ A token filter which removes elisions. For example, "l'avion" (the plane) will tokenized as "avion" (plane). -Accepts `articles` parameter which is a set of stop words articles. Also accepts -`articles_case`, which indicates whether the filter treats those articles as +Requires either an `articles` parameter which is a set of stop word articles, or +`articles_path` which points to a text file containing the stop set. Also optionally +accepts `articles_case`, which indicates whether the filter treats those articles as case sensitive. For example: diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index c2886408437..b438ca52e05 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -239,7 +239,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri filters.put("dutch_stem", DutchStemTokenFilterFactory::new); filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new); filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new); - filters.put("elision", ElisionTokenFilterFactory::new); + filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new)); filters.put("fingerprint", FingerprintTokenFilterFactory::new); filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new); filters.put("french_stem", FrenchStemTokenFilterFactory::new); diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java index 52cb69952b8..39d042caa8c 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java @@ -36,6 +36,9 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implem ElisionTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); this.articles = Analysis.parseArticles(env, settings); + if (this.articles == null) { + throw new IllegalArgumentException("elision filter requires [articles] or [articles_path] setting"); + } } @Override diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java new file mode 100644 index 00000000000..dbfd49d5649 --- /dev/null +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java @@ -0,0 +1,43 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.analysis.common; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.analysis.AnalysisTestsHelper; +import org.elasticsearch.test.ESTokenStreamTestCase; + +import java.io.IOException; + +public class ElisionFilterFactoryTests extends ESTokenStreamTestCase { + + public void testElisionFilterWithNoArticles() throws IOException { + Settings settings = Settings.builder() + .put("index.analysis.filter.elision.type", "elision") + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin())); + + assertEquals("elision filter requires [articles] or [articles_path] setting", e.getMessage()); + } + +} diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml index d6fe6b9a980..63658d486a1 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml @@ -587,6 +587,20 @@ - length: { tokens: 1 } - match: { tokens.0.token: avion } + - do: + catch: bad_request + indices.create: + index: test2 + body: + settings: + analysis: + filter: + my_elision: + type: elision + - match: { status: 400 } + - match: { error.type: illegal_argument_exception } + - match: { error.reason: "elision filter requires [articles] or [articles_path] setting" } + --- "stemmer": - do: