Enable reloading of synonym_graph filters (#45135)

Reloading of synonym_graph filter doesn't work currently because the search time
AnalysisMode doesn't get propagated to the TokenFilterFactory emitted by the
graph filters getChainAwareTokenFilterFactory() method. This change fixes that.

Closes #45127
This commit is contained in:
Christoph Büscher 2019-08-02 14:34:22 +02:00
parent f657f3914d
commit 3366726ad1
3 changed files with 56 additions and 29 deletions

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.synonym.SynonymMap;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalysisMode;
import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.analysis.TokenizerFactory;
@ -62,6 +63,11 @@ public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return synonyms.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonyms, false); return synonyms.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonyms, false);
} }
@Override
public AnalysisMode getAnalysisMode() {
return analysisMode;
}
}; };
} }

View File

@ -51,7 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean lenient; private final boolean lenient;
protected final Settings settings; protected final Settings settings;
protected final Environment environment; protected final Environment environment;
private final boolean updateable; protected final AnalysisMode analysisMode;
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) { String name, Settings settings) {
@ -67,13 +67,14 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
this.expand = settings.getAsBoolean("expand", true); this.expand = settings.getAsBoolean("expand", true);
this.lenient = settings.getAsBoolean("lenient", false); this.lenient = settings.getAsBoolean("lenient", false);
this.format = settings.get("format", ""); this.format = settings.get("format", "");
this.updateable = settings.getAsBoolean("updateable", false); boolean updateable = settings.getAsBoolean("updateable", false);
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
this.environment = env; this.environment = env;
} }
@Override @Override
public AnalysisMode getAnalysisMode() { public AnalysisMode getAnalysisMode() {
return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL; return this.analysisMode;
} }
@Override @Override
@ -109,7 +110,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
@Override @Override
public AnalysisMode getAnalysisMode() { public AnalysisMode getAnalysisMode() {
return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL; return analysisMode;
} }
}; };
} }

View File

@ -57,16 +57,22 @@ public class ReloadSynonymAnalyzerTests extends ESSingleNodeTestCase {
} }
final String indexName = "test"; final String indexName = "test";
final String analyzerName = "my_synonym_analyzer"; final String synonymAnalyzerName = "synonym_analyzer";
final String synonymGraphAnalyzerName = "synonym_graph_analyzer";
assertAcked(client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder() assertAcked(client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder()
.put("index.number_of_shards", 5) .put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0) .put("index.number_of_replicas", 0)
.put("analysis.analyzer." + analyzerName + ".tokenizer", "standard") .put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "my_synonym_filter") .putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym") .put("analysis.analyzer." + synonymGraphAnalyzerName + ".tokenizer", "standard")
.put("analysis.filter.my_synonym_filter.updateable", "true") .putList("analysis.analyzer." + synonymGraphAnalyzerName + ".filter", "lowercase", "synonym_graph_filter")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName)) .put("analysis.filter.synonym_filter.type", "synonym")
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=" + analyzerName)); .put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.synonym_graph_filter.type", "synonym_graph")
.put("analysis.filter.synonym_graph_filter.updateable", "true")
.put("analysis.filter.synonym_graph_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=" + synonymAnalyzerName));
client().prepareIndex(indexName, "_doc", "1").setSource("field", "Foo").get(); client().prepareIndex(indexName, "_doc", "1").setSource("field", "Foo").get();
assertNoFailures(client().admin().indices().prepareRefresh(indexName).execute().actionGet()); assertNoFailures(client().admin().indices().prepareRefresh(indexName).execute().actionGet());
@ -75,10 +81,18 @@ public class ReloadSynonymAnalyzerTests extends ESSingleNodeTestCase {
assertHitCount(response, 1L); assertHitCount(response, 1L);
response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "buzz")).get(); response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L); assertHitCount(response, 0L);
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName).get();
assertEquals(2, analyzeResponse.getTokens().size()); {
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm()); for (String analyzerName : new String[] { synonymAnalyzerName, synonymGraphAnalyzerName }) {
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm()); Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName)
.get();
assertEquals(2, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
}
}
// now update synonyms file and trigger reloading // now update synonyms file and trigger reloading
try (PrintWriter out = new PrintWriter( try (PrintWriter out = new PrintWriter(
@ -89,16 +103,22 @@ public class ReloadSynonymAnalyzerTests extends ESSingleNodeTestCase {
.actionGet(); .actionGet();
assertNoFailures(reloadResponse); assertNoFailures(reloadResponse);
Set<String> reloadedAnalyzers = reloadResponse.getReloadDetails().get(indexName).getReloadedAnalyzers(); Set<String> reloadedAnalyzers = reloadResponse.getReloadDetails().get(indexName).getReloadedAnalyzers();
assertEquals(1, reloadedAnalyzers.size()); assertEquals(2, reloadedAnalyzers.size());
assertTrue(reloadedAnalyzers.contains(analyzerName)); assertTrue(reloadedAnalyzers.contains(synonymAnalyzerName));
assertTrue(reloadedAnalyzers.contains(synonymGraphAnalyzerName));
analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "Foo").setAnalyzer(analyzerName).get(); {
assertEquals(3, analyzeResponse.getTokens().size()); for (String analyzerName : new String[] { synonymAnalyzerName, synonymGraphAnalyzerName }) {
Set<String> tokens = new HashSet<>(); Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName)
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t)); .get();
assertTrue(tokens.contains("foo")); assertEquals(3, analyzeResponse.getTokens().size());
assertTrue(tokens.contains("baz")); Set<String> tokens = new HashSet<>();
assertTrue(tokens.contains("buzz")); analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains("buzz"));
}
}
response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "baz")).get(); response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L); assertHitCount(response, 1L);
@ -128,15 +148,15 @@ public class ReloadSynonymAnalyzerTests extends ESSingleNodeTestCase {
.put("index.number_of_shards", 5) .put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0) .put("index.number_of_replicas", 0)
.put("analysis.analyzer." + analyzerName + ".tokenizer", "standard") .put("analysis.analyzer." + analyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "my_synonym_filter") .putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym") .put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true") .put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName)) .put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get()); .addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get());
assertEquals( assertEquals(
"Failed to parse mapping [_doc]: analyzer [my_synonym_analyzer] " "Failed to parse mapping [_doc]: analyzer [my_synonym_analyzer] "
+ "contains filters [my_synonym_filter] that are not allowed to run in all mode.", + "contains filters [synonym_filter] that are not allowed to run in all mode.",
ex.getMessage()); ex.getMessage());
} }
} }