Finish exposing FlattenGraphTokenFilter (#22667)

This commit is contained in:
Michael McCandless 2017-01-18 11:05:34 -05:00 committed by GitHub
parent e71b26f480
commit 1d1bdd476c
5 changed files with 31 additions and 8 deletions

View File

@ -166,8 +166,8 @@ public final class AnalysisRegistry implements Closeable {
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
}
@ -229,9 +229,9 @@ public final class AnalysisRegistry implements Closeable {
* hide internal data-structures as much as possible.
*/
if ("synonym".equals(typeName)) {
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
} else if ("synonym_graph".equals(typeName)) {
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
} else {
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
}
@ -258,7 +258,7 @@ public final class AnalysisRegistry implements Closeable {
}
}
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
private static <T> AnalysisModule.AnalysisProvider<T> requiresAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
return new AnalysisModule.AnalysisProvider<T>() {
@Override
public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {

View File

@ -60,6 +60,7 @@ import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
@ -226,6 +227,7 @@ public final class AnalysisModule {
tokenFilters.register("word_delimiter", WordDelimiterTokenFilterFactory::new);
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));

View File

@ -13,6 +13,8 @@ include::tokenfilters/standard-tokenfilter.asciidoc[]
include::tokenfilters/asciifolding-tokenfilter.asciidoc[]
include::tokenfilters/flatten-graph-tokenfilter.asciidoc[]
include::tokenfilters/length-tokenfilter.asciidoc[]
include::tokenfilters/lowercase-tokenfilter.asciidoc[]

View File

@ -0,0 +1,20 @@
[[analysis-flatten-graph-tokenfilter]]
=== Flatten Graph Token Filter
experimental[]
The `flatten_graph` token filter accepts an arbitrary graph token
stream, such as that produced by
<<analysis-synonym-graph-tokenfilter>>, and flattens it into a single
linear chain of tokens suitable for indexing.
This is a lossy process, as separate side paths are squashed on top of
one another, but it is necessary if you use a graph token stream
during indexing because a Lucene index cannot currently represent a
graph. For this reason, it's best to apply graph analyzers only at
search time because that preserves the full graph structure and gives
correct matches for proximity queries.
For more information on this topic and its various complexities,
please read the http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's
TokenStreams are actually graphs] blog post.

View File

@ -8,9 +8,8 @@ including multi-word synonyms correctly during the analysis process.
In order to properly handle multi-word synonyms this token filter
creates a "graph token stream" during processing. For more information
on this topic and it's various complexities, please read
http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs!]
by Michael McCandless.
on this topic and its various complexities, please read the
http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs] blog post.
["NOTE",id="synonym-graph-index-note"]
===============================