From 1d1bdd476c855f51180093701e29ed53f355b2d9 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 18 Jan 2017 11:05:34 -0500 Subject: [PATCH] Finish exposing FlattenGraphTokenFilter (#22667) --- .../index/analysis/AnalysisRegistry.java | 10 +++++----- .../indices/analysis/AnalysisModule.java | 2 ++ docs/reference/analysis/tokenfilters.asciidoc | 2 ++ .../flatten-graph-tokenfilter.asciidoc | 20 +++++++++++++++++++ .../synonym-graph-tokenfilter.asciidoc | 5 ++--- 5 files changed, 31 insertions(+), 8 deletions(-) create mode 100644 docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index f2f13479c9d..36357afe678 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -166,8 +166,8 @@ public final class AnalysisRegistry implements Closeable { * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and * hide internal data-structures as much as possible. */ - tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); - tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); + tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); + tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories); } @@ -229,9 +229,9 @@ public final class AnalysisRegistry implements Closeable { * hide internal data-structures as much as possible. */ if ("synonym".equals(typeName)) { - return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)); + return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)); } else if ("synonym_graph".equals(typeName)) { - return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)); + return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)); } else { return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName); } @@ -258,7 +258,7 @@ public final class AnalysisRegistry implements Closeable { } } - private static AnalysisModule.AnalysisProvider requriesAnalysisSettings(AnalysisModule.AnalysisProvider provider) { + private static AnalysisModule.AnalysisProvider requiresAnalysisSettings(AnalysisModule.AnalysisProvider provider) { return new AnalysisModule.AnalysisProvider() { @Override public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index fae4c75b655..1aaf3077aea 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -60,6 +60,7 @@ import org.elasticsearch.index.analysis.EnglishAnalyzerProvider; import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider; import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory; import org.elasticsearch.index.analysis.FinnishAnalyzerProvider; +import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory; import org.elasticsearch.index.analysis.FrenchAnalyzerProvider; import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory; import org.elasticsearch.index.analysis.GalicianAnalyzerProvider; @@ -226,6 +227,7 @@ public final class AnalysisModule { tokenFilters.register("word_delimiter", WordDelimiterTokenFilterFactory::new); tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new); tokenFilters.register("elision", ElisionTokenFilterFactory::new); + tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new); tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new)); tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new)); tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); diff --git a/docs/reference/analysis/tokenfilters.asciidoc b/docs/reference/analysis/tokenfilters.asciidoc index 227947fb45e..eae6ceabed7 100644 --- a/docs/reference/analysis/tokenfilters.asciidoc +++ b/docs/reference/analysis/tokenfilters.asciidoc @@ -13,6 +13,8 @@ include::tokenfilters/standard-tokenfilter.asciidoc[] include::tokenfilters/asciifolding-tokenfilter.asciidoc[] +include::tokenfilters/flatten-graph-tokenfilter.asciidoc[] + include::tokenfilters/length-tokenfilter.asciidoc[] include::tokenfilters/lowercase-tokenfilter.asciidoc[] diff --git a/docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc new file mode 100644 index 00000000000..90b61368983 --- /dev/null +++ b/docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc @@ -0,0 +1,20 @@ +[[analysis-flatten-graph-tokenfilter]] +=== Flatten Graph Token Filter + +experimental[] + +The `flatten_graph` token filter accepts an arbitrary graph token +stream, such as that produced by +<>, and flattens it into a single +linear chain of tokens suitable for indexing. + +This is a lossy process, as separate side paths are squashed on top of +one another, but it is necessary if you use a graph token stream +during indexing because a Lucene index cannot currently represent a +graph. For this reason, it's best to apply graph analyzers only at +search time because that preserves the full graph structure and gives +correct matches for proximity queries. + +For more information on this topic and its various complexities, +please read the http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's +TokenStreams are actually graphs] blog post. diff --git a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc index 6f15b27c379..5ab498802d9 100644 --- a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc @@ -8,9 +8,8 @@ including multi-word synonyms correctly during the analysis process. In order to properly handle multi-word synonyms this token filter creates a "graph token stream" during processing. For more information -on this topic and it's various complexities, please read -http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs!] -by Michael McCandless. +on this topic and its various complexities, please read the +http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs] blog post. ["NOTE",id="synonym-graph-index-note"] ===============================