Finish exposing FlattenGraphTokenFilter (#22667)

2017-01-18 11:05:34 -05:00 · 2017-01-18 11:05:34 -05:00 · 1d1bdd476c
parent e71b26f480
commit 1d1bdd476c
5 changed files with 31 additions and 8 deletions
--- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java
+++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java
@ -166,8 +166,8 @@ public final class AnalysisRegistry implements Closeable {
         * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
         * hide internal data-structures as much as possible.
         */
-        tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
-        tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
+        tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
+        tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
        return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
    }

@ -229,9 +229,9 @@ public final class AnalysisRegistry implements Closeable {
             * hide internal data-structures as much as possible.
             */
            if ("synonym".equals(typeName)) {
-                return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
+                return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
            } else if ("synonym_graph".equals(typeName)) {
-                return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
+                return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
            } else {
                return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
            }
@ -258,7 +258,7 @@ public final class AnalysisRegistry implements Closeable {
        }
    }

-    private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
+    private static <T> AnalysisModule.AnalysisProvider<T> requiresAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
        return new AnalysisModule.AnalysisProvider<T>() {
            @Override
            public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
--- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@ -60,6 +60,7 @@ import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
 import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
 import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
 import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
+import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
 import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
 import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
@ -226,6 +227,7 @@ public final class AnalysisModule {
        tokenFilters.register("word_delimiter", WordDelimiterTokenFilterFactory::new);
        tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
        tokenFilters.register("elision", ElisionTokenFilterFactory::new);
+        tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
        tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
        tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
        tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
--- a/docs/reference/analysis/tokenfilters.asciidoc
+++ b/docs/reference/analysis/tokenfilters.asciidoc
@ -13,6 +13,8 @@ include::tokenfilters/standard-tokenfilter.asciidoc[]

 include::tokenfilters/asciifolding-tokenfilter.asciidoc[]

+include::tokenfilters/flatten-graph-tokenfilter.asciidoc[]
+
 include::tokenfilters/length-tokenfilter.asciidoc[]

 include::tokenfilters/lowercase-tokenfilter.asciidoc[]
--- a/docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc
@ -0,0 +1,20 @@
+[[analysis-flatten-graph-tokenfilter]]
+=== Flatten Graph Token Filter
+
+experimental[]
+
+The `flatten_graph` token filter accepts an arbitrary graph token
+stream, such as that produced by
+<<analysis-synonym-graph-tokenfilter>>, and flattens it into a single
+linear chain of tokens suitable for indexing.
+
+This is a lossy process, as separate side paths are squashed on top of
+one another, but it is necessary if you use a graph token stream
+during indexing because a Lucene index cannot currently represent a
+graph.  For this reason, it's best to apply graph analyzers only at
+search time because that preserves the full graph structure and gives
+correct matches for proximity queries.
+
+For more information on this topic and its various complexities,
+please read the http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's
+TokenStreams are actually graphs] blog post.
--- a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc
@ -8,9 +8,8 @@ including multi-word synonyms correctly during the analysis process.

 In order to properly handle multi-word synonyms this token filter
 creates a "graph token stream" during processing.  For more information
-on this topic and it's various complexities, please read
-http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs!]
-by Michael McCandless.
+on this topic and its various complexities, please read the
+http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs] blog post.

 ["NOTE",id="synonym-graph-index-note"]
 ===============================