Finish exposing FlattenGraphTokenFilter (#22667)
This commit is contained in:
parent
e71b26f480
commit
1d1bdd476c
|
@ -166,8 +166,8 @@ public final class AnalysisRegistry implements Closeable {
|
|||
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
|
||||
* hide internal data-structures as much as possible.
|
||||
*/
|
||||
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
|
||||
tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
|
||||
tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
|
||||
tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
|
||||
return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
|
||||
}
|
||||
|
||||
|
@ -229,9 +229,9 @@ public final class AnalysisRegistry implements Closeable {
|
|||
* hide internal data-structures as much as possible.
|
||||
*/
|
||||
if ("synonym".equals(typeName)) {
|
||||
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
|
||||
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
|
||||
} else if ("synonym_graph".equals(typeName)) {
|
||||
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
|
||||
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
|
||||
} else {
|
||||
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
|
||||
}
|
||||
|
@ -258,7 +258,7 @@ public final class AnalysisRegistry implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
|
||||
private static <T> AnalysisModule.AnalysisProvider<T> requiresAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
|
||||
return new AnalysisModule.AnalysisProvider<T>() {
|
||||
@Override
|
||||
public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
|
||||
|
|
|
@ -60,6 +60,7 @@ import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
|
|||
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
|
||||
|
@ -226,6 +227,7 @@ public final class AnalysisModule {
|
|||
tokenFilters.register("word_delimiter", WordDelimiterTokenFilterFactory::new);
|
||||
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
|
||||
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
|
||||
tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
|
||||
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
|
||||
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
|
||||
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
|
||||
|
|
|
@ -13,6 +13,8 @@ include::tokenfilters/standard-tokenfilter.asciidoc[]
|
|||
|
||||
include::tokenfilters/asciifolding-tokenfilter.asciidoc[]
|
||||
|
||||
include::tokenfilters/flatten-graph-tokenfilter.asciidoc[]
|
||||
|
||||
include::tokenfilters/length-tokenfilter.asciidoc[]
|
||||
|
||||
include::tokenfilters/lowercase-tokenfilter.asciidoc[]
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
[[analysis-flatten-graph-tokenfilter]]
|
||||
=== Flatten Graph Token Filter
|
||||
|
||||
experimental[]
|
||||
|
||||
The `flatten_graph` token filter accepts an arbitrary graph token
|
||||
stream, such as that produced by
|
||||
<<analysis-synonym-graph-tokenfilter>>, and flattens it into a single
|
||||
linear chain of tokens suitable for indexing.
|
||||
|
||||
This is a lossy process, as separate side paths are squashed on top of
|
||||
one another, but it is necessary if you use a graph token stream
|
||||
during indexing because a Lucene index cannot currently represent a
|
||||
graph. For this reason, it's best to apply graph analyzers only at
|
||||
search time because that preserves the full graph structure and gives
|
||||
correct matches for proximity queries.
|
||||
|
||||
For more information on this topic and its various complexities,
|
||||
please read the http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's
|
||||
TokenStreams are actually graphs] blog post.
|
|
@ -8,9 +8,8 @@ including multi-word synonyms correctly during the analysis process.
|
|||
|
||||
In order to properly handle multi-word synonyms this token filter
|
||||
creates a "graph token stream" during processing. For more information
|
||||
on this topic and it's various complexities, please read
|
||||
http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs!]
|
||||
by Michael McCandless.
|
||||
on this topic and its various complexities, please read the
|
||||
http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs] blog post.
|
||||
|
||||
["NOTE",id="synonym-graph-index-note"]
|
||||
===============================
|
||||
|
|
Loading…
Reference in New Issue