Require [articles] setting in elision filter (#43083)

We should throw an exception at construction time if a list of
articles is not provided, otherwise we can get random NPEs during
indexing.

Relates to #43002
This commit is contained in:
Alan Woodward 2019-06-27 08:56:26 +01:00
parent bed7e68014
commit 05a7333eca
5 changed files with 64 additions and 3 deletions

View File

@ -4,8 +4,9 @@
A token filter which removes elisions. For example, "l'avion" (the
plane) will tokenized as "avion" (plane).
Accepts `articles` parameter which is a set of stop words articles. Also accepts
`articles_case`, which indicates whether the filter treats those articles as
Requires either an `articles` parameter which is a set of stop word articles, or
`articles_path` which points to a text file containing the stop set. Also optionally
accepts `articles_case`, which indicates whether the filter treats those articles as
case sensitive.
For example:

View File

@ -239,7 +239,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
filters.put("elision", ElisionTokenFilterFactory::new);
filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
filters.put("french_stem", FrenchStemTokenFilterFactory::new);

View File

@ -36,6 +36,9 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implem
ElisionTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
this.articles = Analysis.parseArticles(env, settings);
if (this.articles == null) {
throw new IllegalArgumentException("elision filter requires [articles] or [articles_path] setting");
}
}
@Override

View File

@ -0,0 +1,43 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.analysis.common;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
import org.elasticsearch.test.ESTokenStreamTestCase;
import java.io.IOException;
public class ElisionFilterFactoryTests extends ESTokenStreamTestCase {
public void testElisionFilterWithNoArticles() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.elision.type", "elision")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()));
assertEquals("elision filter requires [articles] or [articles_path] setting", e.getMessage());
}
}

View File

@ -587,6 +587,20 @@
- length: { tokens: 1 }
- match: { tokens.0.token: avion }
- do:
catch: bad_request
indices.create:
index: test2
body:
settings:
analysis:
filter:
my_elision:
type: elision
- match: { status: 400 }
- match: { error.type: illegal_argument_exception }
- match: { error.reason: "elision filter requires [articles] or [articles_path] setting" }
---
"stemmer":
- do: