diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
index 1aaf3077aea..61950942e60 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@@ -140,6 +140,7 @@ import org.elasticsearch.index.analysis.UniqueTokenFilterFactory;
 import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
 import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
 import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
+import org.elasticsearch.index.analysis.WordDelimiterGraphTokenFilterFactory;
 import org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory;
 import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
 import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
@@ -225,6 +226,7 @@ public final class AnalysisModule {
         tokenFilters.register("snowball", SnowballTokenFilterFactory::new);
         tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
         tokenFilters.register("word_delimiter", WordDelimiterTokenFilterFactory::new);
+        tokenFilters.register("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
         tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
         tokenFilters.register("elision", ElisionTokenFilterFactory::new);
         tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
index 53e79cb9dfe..6c58ab884db 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
@@ -51,6 +51,7 @@ import org.apache.lucene.analysis.miscellaneous.TrimFilter;
 import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
+import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
 import org.apache.lucene.analysis.ngram.NGramTokenFilter;
 import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
@@ -87,6 +88,18 @@ public enum PreBuiltTokenFilters {
         }
     },
 
+    WORD_DELIMITER_GRAPH(CachingStrategy.ONE) {
+        @Override
+        public TokenStream create(TokenStream tokenStream, Version version) {
+            return new WordDelimiterGraphFilter(tokenStream,
+                WordDelimiterGraphFilter.GENERATE_WORD_PARTS |
+                    WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS |
+                    WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE |
+                    WordDelimiterGraphFilter.SPLIT_ON_NUMERICS |
+                    WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null);
+        }
+    },
+
     STOP(CachingStrategy.LUCENE) {
         @Override
         public TokenStream create(TokenStream tokenStream, Version version) {
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
index e3a78227d9c..3a3c1cfd66d 100644
--- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
@@ -21,6 +21,7 @@ package org.elasticsearch.search.fetch.subphase.highlight;
 
 import org.apache.lucene.search.highlight.SimpleFragmenter;
 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.elasticsearch.Version;
 import org.elasticsearch.action.support.ToXContentToBytes;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
@@ -32,10 +33,12 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Order;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.function.BiFunction;
@@ -57,8 +60,10 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
     public static final ParseField NUMBER_OF_FRAGMENTS_FIELD = new ParseField("number_of_fragments");
     public static final ParseField ENCODER_FIELD = new ParseField("encoder");
     public static final ParseField REQUIRE_FIELD_MATCH_FIELD = new ParseField("require_field_match");
+    public static final ParseField BOUNDARY_SCANNER_FIELD = new ParseField("boundary_scanner");
     public static final ParseField BOUNDARY_MAX_SCAN_FIELD = new ParseField("boundary_max_scan");
     public static final ParseField BOUNDARY_CHARS_FIELD = new ParseField("boundary_chars");
+    public static final ParseField BOUNDARY_SCANNER_LOCALE_FIELD = new ParseField("boundary_scanner_locale");
     public static final ParseField TYPE_FIELD = new ParseField("type");
     public static final ParseField FRAGMENTER_FIELD = new ParseField("fragmenter");
     public static final ParseField NO_MATCH_SIZE_FIELD = new ParseField("no_match_size");
@@ -88,10 +93,14 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
 
     protected Boolean forceSource;
 
+    protected BoundaryScannerType boundaryScannerType;
+
     protected Integer boundaryMaxScan;
 
     protected char[] boundaryChars;
 
+    protected Locale boundaryScannerLocale;
+
     protected Integer noMatchSize;
 
     protected Integer phraseLimit;
@@ -119,10 +128,18 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         order(in.readOptionalWriteable(Order::readFromStream));
         highlightFilter(in.readOptionalBoolean());
         forceSource(in.readOptionalBoolean());
+        if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
+            boundaryScannerType(in.readOptionalWriteable(BoundaryScannerType::readFromStream));
+        }
         boundaryMaxScan(in.readOptionalVInt());
         if (in.readBoolean()) {
             boundaryChars(in.readString().toCharArray());
         }
+        if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
+            if (in.readBoolean()) {
+                boundaryScannerLocale(in.readString());
+            }
+        }
         noMatchSize(in.readOptionalVInt());
         phraseLimit(in.readOptionalVInt());
         if (in.readBoolean()) {
@@ -150,12 +167,22 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         out.writeOptionalWriteable(order);
         out.writeOptionalBoolean(highlightFilter);
         out.writeOptionalBoolean(forceSource);
+        if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
+            out.writeOptionalWriteable(boundaryScannerType);
+        }
         out.writeOptionalVInt(boundaryMaxScan);
         boolean hasBounaryChars = boundaryChars != null;
         out.writeBoolean(hasBounaryChars);
         if (hasBounaryChars) {
             out.writeString(String.valueOf(boundaryChars));
         }
+        if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
+            boolean hasBoundaryScannerLocale = boundaryScannerLocale != null;
+            out.writeBoolean(hasBoundaryScannerLocale);
+            if (hasBoundaryScannerLocale) {
+                out.writeString(boundaryScannerLocale.toLanguageTag());
+            }
+        }
         out.writeOptionalVInt(noMatchSize);
         out.writeOptionalVInt(phraseLimit);
         boolean hasOptions = options != null;
@@ -331,6 +358,33 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         return this.highlightFilter;
     }
 
+    /**
+     * When using the highlighterType <tt>fvh</tt> this setting
+     * controls which scanner to use for fragment boundaries, and defaults to "simple".
+     */
+    @SuppressWarnings("unchecked")
+    public HB boundaryScannerType(String boundaryScannerType) {
+        this.boundaryScannerType = BoundaryScannerType.fromString(boundaryScannerType);
+        return (HB) this;
+    }
+
+    /**
+     * When using the highlighterType <tt>fvh</tt> this setting
+     * controls which scanner to use for fragment boundaries, and defaults to "simple".
+     */
+    @SuppressWarnings("unchecked")
+    public HB boundaryScannerType(BoundaryScannerType boundaryScannerType) {
+        this.boundaryScannerType = boundaryScannerType;
+        return (HB) this;
+    }
+
+    /**
+     * @return the value set by {@link #boundaryScannerType(String)}
+     */
+    public BoundaryScannerType boundaryScannerType() {
+        return this.boundaryScannerType;
+    }
+
     /**
      * When using the highlighterType <tt>fvh</tt> this setting
      * controls how far to look for boundary characters, and defaults to 20.
@@ -366,6 +420,25 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         return this.boundaryChars;
     }
 
+    /**
+     * When using the highlighterType <tt>fvh</tt> and boundaryScannerType <tt>break_iterator</tt>, this setting
+     * controls the locale to use by the BreakIterator, defaults to "root".
+     */
+    @SuppressWarnings("unchecked")
+    public HB boundaryScannerLocale(String boundaryScannerLocale) {
+        if (boundaryScannerLocale != null) {
+            this.boundaryScannerLocale = Locale.forLanguageTag(boundaryScannerLocale);
+        }
+        return (HB) this;
+    }
+
+    /**
+     * @return the value set by {@link #boundaryScannerLocale(String)}
+     */
+    public Locale boundaryScannerLocale() {
+        return this.boundaryScannerLocale;
+    }
+
     /**
      * Allows to set custom options for custom highlighters.
      */
@@ -491,12 +564,18 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         if (highlightFilter != null) {
             builder.field(HIGHLIGHT_FILTER_FIELD.getPreferredName(), highlightFilter);
         }
+        if (boundaryScannerType != null) {
+            builder.field(BOUNDARY_SCANNER_FIELD.getPreferredName(), boundaryScannerType.name());
+        }
         if (boundaryMaxScan != null) {
             builder.field(BOUNDARY_MAX_SCAN_FIELD.getPreferredName(), boundaryMaxScan);
         }
         if (boundaryChars != null) {
             builder.field(BOUNDARY_CHARS_FIELD.getPreferredName(), new String(boundaryChars));
         }
+        if (boundaryScannerLocale != null) {
+            builder.field(BOUNDARY_SCANNER_LOCALE_FIELD.getPreferredName(), boundaryScannerLocale.toLanguageTag());
+        }
         if (options != null && options.size() > 0) {
             builder.field(OPTIONS_FIELD.getPreferredName(), options);
         }
@@ -523,8 +602,10 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
         parser.declareInt(HB::fragmentSize, FRAGMENT_SIZE_FIELD);
         parser.declareInt(HB::numOfFragments, NUMBER_OF_FRAGMENTS_FIELD);
         parser.declareBoolean(HB::requireFieldMatch, REQUIRE_FIELD_MATCH_FIELD);
+        parser.declareString(HB::boundaryScannerType, BOUNDARY_SCANNER_FIELD);
         parser.declareInt(HB::boundaryMaxScan, BOUNDARY_MAX_SCAN_FIELD);
         parser.declareString((HB hb, String bc) -> hb.boundaryChars(bc.toCharArray()) , BOUNDARY_CHARS_FIELD);
+        parser.declareString(HB::boundaryScannerLocale, BOUNDARY_SCANNER_LOCALE_FIELD);
         parser.declareString(HB::highlighterType, TYPE_FIELD);
         parser.declareString(HB::fragmenter, FRAGMENTER_FIELD);
         parser.declareInt(HB::noMatchSize, NO_MATCH_SIZE_FIELD);
@@ -562,8 +643,8 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
     public final int hashCode() {
         return Objects.hash(getClass(), Arrays.hashCode(preTags), Arrays.hashCode(postTags), fragmentSize,
                 numOfFragments, highlighterType, fragmenter, highlightQuery, order, highlightFilter,
-                forceSource, boundaryMaxScan, Arrays.hashCode(boundaryChars), noMatchSize,
-                phraseLimit, options, requireFieldMatch, doHashCode());
+                forceSource, boundaryScannerType, boundaryMaxScan, Arrays.hashCode(boundaryChars), boundaryScannerLocale,
+                noMatchSize, phraseLimit, options, requireFieldMatch, doHashCode());
     }
 
     /**
@@ -591,8 +672,10 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
                Objects.equals(order, other.order) &&
                Objects.equals(highlightFilter, other.highlightFilter) &&
                Objects.equals(forceSource, other.forceSource) &&
+               Objects.equals(boundaryScannerType, other.boundaryScannerType) &&
                Objects.equals(boundaryMaxScan, other.boundaryMaxScan) &&
                Arrays.equals(boundaryChars, other.boundaryChars) &&
+               Objects.equals(boundaryScannerLocale, other.boundaryScannerLocale) &&
                Objects.equals(noMatchSize, other.noMatchSize) &&
                Objects.equals(phraseLimit, other.phraseLimit) &&
                Objects.equals(options, other.options) &&
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java
index bba4aa324af..0c15f1106b1 100644
--- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java
@@ -21,6 +21,7 @@ package org.elasticsearch.search.fetch.subphase.highlight;
 import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder;
 import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
+import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
 import org.apache.lucene.search.vectorhighlight.FieldFragList;
 import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
@@ -38,15 +39,23 @@ import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
 import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
+import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.FieldOptions;
 import org.elasticsearch.search.internal.SearchContext;
 
+import java.text.BreakIterator;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
 
 public class FastVectorHighlighter implements Highlighter {
 
-    private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
+    private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
+    private static final BoundaryScanner DEFAULT_SENTENCE_BOUNDARY_SCANNER = new BreakIteratorBoundaryScanner(
+            BreakIterator.getSentenceInstance(Locale.ROOT));
+    private static final BoundaryScanner DEFAULT_WORD_BOUNDARY_SCANNER = new BreakIteratorBoundaryScanner(
+            BreakIterator.getWordInstance(Locale.ROOT));
 
     public static final Setting<Boolean> SETTING_TV_HIGHLIGHT_MULTI_VALUE = Setting.boolSetting("search.highlight.term_vector_multi_value",
         true, Setting.Property.NodeScope);
@@ -105,12 +114,7 @@ public class FastVectorHighlighter implements Highlighter {
                 FragListBuilder fragListBuilder;
                 BaseFragmentsBuilder fragmentsBuilder;
 
-                BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER;
-                if (field.fieldOptions().boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
-                        || field.fieldOptions().boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
-                    boundaryScanner = new SimpleBoundaryScanner(field.fieldOptions().boundaryMaxScan(),
-                            field.fieldOptions().boundaryChars());
-                }
+                final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
                 boolean forceSource = context.highlight().forceSource(field);
                 if (field.fieldOptions().numberOfFragments() == 0) {
                     fragListBuilder = new SingleFragListBuilder();
@@ -206,6 +210,29 @@ public class FastVectorHighlighter implements Highlighter {
                 && fieldMapper.fieldType().storeTermVectorPositions();
     }
 
+    private static BoundaryScanner getBoundaryScanner(Field field) {
+        final FieldOptions fieldOptions = field.fieldOptions();
+        final Locale boundaryScannerLocale = fieldOptions.boundaryScannerLocale();
+        switch(fieldOptions.boundaryScannerType()) {
+        case SENTENCE:
+            if (boundaryScannerLocale != null) {
+                return new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(boundaryScannerLocale));
+            }
+            return DEFAULT_SENTENCE_BOUNDARY_SCANNER;
+        case WORD:
+            if (boundaryScannerLocale != null) {
+                return new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(boundaryScannerLocale));
+            }
+            return DEFAULT_WORD_BOUNDARY_SCANNER;
+        default:
+            if (fieldOptions.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
+                    || fieldOptions.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
+                return new SimpleBoundaryScanner(fieldOptions.boundaryMaxScan(), fieldOptions.boundaryChars());
+            }
+            return DEFAULT_SIMPLE_BOUNDARY_SCANNER;
+        }
+    }
+
     private class MapperHighlightEntry {
         public FragListBuilder fragListBuilder;
         public FragmentsBuilder fragmentsBuilder;
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java
index a063b2900d5..45b8c612a76 100644
--- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java
@@ -95,9 +95,9 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
             .preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(DEFAULT_SCORE_ORDERED)
             .highlightFilter(DEFAULT_HIGHLIGHT_FILTER).requireFieldMatch(DEFAULT_REQUIRE_FIELD_MATCH)
             .forceSource(DEFAULT_FORCE_SOURCE).fragmentCharSize(DEFAULT_FRAGMENT_CHAR_SIZE)
-            .numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER)
+            .numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER).boundaryScannerType(BoundaryScannerType.CHARS)
             .boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN).boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS)
-            .noMatchSize(DEFAULT_NO_MATCH_SIZE).phraseLimit(DEFAULT_PHRASE_LIMIT).build();
+            .boundaryScannerLocale(Locale.ROOT).noMatchSize(DEFAULT_NO_MATCH_SIZE).phraseLimit(DEFAULT_PHRASE_LIMIT).build();
 
     private final List<Field> fields = new ArrayList<>();
 
@@ -327,12 +327,18 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
         if (highlighterBuilder.requireFieldMatch != null) {
             targetOptionsBuilder.requireFieldMatch(highlighterBuilder.requireFieldMatch);
         }
+        if (highlighterBuilder.boundaryScannerType != null) {
+            targetOptionsBuilder.boundaryScannerType(highlighterBuilder.boundaryScannerType);
+        }
         if (highlighterBuilder.boundaryMaxScan != null) {
             targetOptionsBuilder.boundaryMaxScan(highlighterBuilder.boundaryMaxScan);
         }
         if (highlighterBuilder.boundaryChars != null) {
             targetOptionsBuilder.boundaryChars(convertCharArray(highlighterBuilder.boundaryChars));
         }
+        if (highlighterBuilder.boundaryScannerLocale != null) {
+            targetOptionsBuilder.boundaryScannerLocale(highlighterBuilder.boundaryScannerLocale);
+        }
         if (highlighterBuilder.highlighterType != null) {
             targetOptionsBuilder.highlighterType(highlighterBuilder.highlighterType);
         }
@@ -522,4 +528,30 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
             return name().toLowerCase(Locale.ROOT);
         }
     }
+
+    public enum BoundaryScannerType implements Writeable {
+        CHARS, WORD, SENTENCE;
+
+        public static BoundaryScannerType readFromStream(StreamInput in) throws IOException {
+            int ordinal = in.readVInt();
+            if (ordinal < 0 || ordinal >= values().length) {
+                throw new IOException("Unknown BoundaryScannerType ordinal [" + ordinal + "]");
+            }
+            return values()[ordinal];
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            out.writeVInt(this.ordinal());
+        }
+
+        public static BoundaryScannerType fromString(String boundaryScannerType) {
+            return valueOf(boundaryScannerType.toUpperCase(Locale.ROOT));
+        }
+
+        @Override
+        public String toString() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+    }
 }
diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java
index d4731718793..2baf73ab5fa 100644
--- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java
@@ -20,11 +20,13 @@
 package org.elasticsearch.search.fetch.subphase.highlight;
 
 import org.apache.lucene.search.Query;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
 
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 
@@ -110,10 +112,14 @@ public class SearchContextHighlight {
 
         private String fragmenter;
 
+        private BoundaryScannerType boundaryScannerType;
+
         private int boundaryMaxScan = -1;
 
         private Character[] boundaryChars = null;
 
+        private Locale boundaryScannerLocale;
+
         private Query highlightQuery;
 
         private int noMatchSize = -1;
@@ -168,6 +174,10 @@ public class SearchContextHighlight {
             return fragmenter;
         }
 
+        public BoundaryScannerType boundaryScannerType() {
+            return boundaryScannerType;
+        }
+
         public int boundaryMaxScan() {
             return boundaryMaxScan;
         }
@@ -176,6 +186,10 @@ public class SearchContextHighlight {
             return boundaryChars;
         }
 
+        public Locale boundaryScannerLocale() {
+            return boundaryScannerLocale;
+        }
+
         public Query highlightQuery() {
             return highlightQuery;
         }
@@ -260,6 +274,11 @@ public class SearchContextHighlight {
                 return this;
             }
 
+            Builder boundaryScannerType(BoundaryScannerType boundaryScanner) {
+                fieldOptions.boundaryScannerType = boundaryScanner;
+                return this;
+            }
+
             Builder boundaryMaxScan(int boundaryMaxScan) {
                 fieldOptions.boundaryMaxScan = boundaryMaxScan;
                 return this;
@@ -270,6 +289,11 @@ public class SearchContextHighlight {
                 return this;
             }
 
+            Builder boundaryScannerLocale(Locale boundaryScannerLocale) {
+                fieldOptions.boundaryScannerLocale = boundaryScannerLocale;
+                return this;
+            }
+
             Builder highlightQuery(Query highlightQuery) {
                 fieldOptions.highlightQuery = highlightQuery;
                 return this;
@@ -324,12 +348,18 @@ public class SearchContextHighlight {
                 if (fieldOptions.requireFieldMatch == null) {
                     fieldOptions.requireFieldMatch = globalOptions.requireFieldMatch;
                 }
+                if (fieldOptions.boundaryScannerType == null) {
+                    fieldOptions.boundaryScannerType = globalOptions.boundaryScannerType;
+                }
                 if (fieldOptions.boundaryMaxScan == -1) {
                     fieldOptions.boundaryMaxScan = globalOptions.boundaryMaxScan;
                 }
                 if (fieldOptions.boundaryChars == null && globalOptions.boundaryChars != null) {
                     fieldOptions.boundaryChars = Arrays.copyOf(globalOptions.boundaryChars, globalOptions.boundaryChars.length);
                 }
+                if (fieldOptions.boundaryScannerLocale == null) {
+                    fieldOptions.boundaryScannerLocale = globalOptions.boundaryScannerLocale;
+                }
                 if (fieldOptions.highlighterType == null) {
                     fieldOptions.highlighterType = globalOptions.highlighterType;
                 }
diff --git a/core/src/test/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterTests.java b/core/src/test/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterTests.java
deleted file mode 100644
index fafe8a954c8..00000000000
--- a/core/src/test/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterTests.java
+++ /dev/null
@@ -1,1074 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis.synonym;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CannedTokenStream;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockGraphTokenFilter;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.TokenStreamToTermAutomatonQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.CharsRefBuilder;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.AutomatonTestUtil;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
-import org.apache.lucene.util.automaton.Transition;
-import org.apache.lucene.util.fst.Util;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-public class SynonymGraphFilterTests extends BaseTokenStreamTestCase {
-
-    /**
-     * Set a side effect by {@link #getAnalyzer}.
-     */
-    private SynonymGraphFilter synFilter;
-
-    // LUCENE-6664
-    public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, String[] types, int[] posIncrements, int[]
-        posLengths) throws IOException {
-        assertAnalyzesTo(a, input, output, null, null, types, posIncrements, posLengths);
-    }
-
-    public void testBasicKeepOrigOneOutput() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x", true);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b", new String[]{"c", "x", "a", "b"}, new int[]{0, 2, 2, 4}, new int[]{1, 5, 3, 5}, new String[]{"word",
-            "SYNONYM", "word", "word"}, new int[]{1, 1, 0, 1}, new int[]{1, 2, 1, 1});
-        a.close();
-    }
-
-    public void testMixedKeepOrig() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x", true);
-        add(b, "e f", "y", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b c e f g", new String[]{"c", "x", "a", "b", "c", "y", "g"}, new int[]{0, 2, 2, 4, 6, 8, 12}, new
-            int[]{1, 5, 3, 5, 7, 11, 13}, new String[]{"word", "SYNONYM", "word", "word", "word", "SYNONYM", "word"}, new
-            int[]{1, 1, 0,
-            1, 1, 1, 1}, new int[]{1, 2, 1, 1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testNoParseAfterBuffer() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "b a", "x", true);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "b b b", new String[]{"b", "b", "b"}, new int[]{0, 2, 4}, new int[]{1, 3, 5}, new String[]{"word", "word",
-            "word"}, new int[]{1, 1, 1}, new int[]{1, 1, 1});
-        a.close();
-    }
-
-    public void testOneInputMultipleOutputKeepOrig() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x", true);
-        add(b, "a b", "y", true);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b c", new String[]{"c", "x", "y", "a", "b", "c"}, new int[]{0, 2, 2, 2, 4, 6}, new int[]{1, 5, 5, 3, 5,
-            7}, new String[]{"word", "SYNONYM", "SYNONYM", "word", "word", "word"}, new int[]{1, 1, 0, 0, 1, 1, 1, 1}, new
-            int[]{1, 2, 2,
-            1, 1, 1, 1, 1});
-        a.close();
-    }
-
-    /**
-     * parse a syn file with bad syntax
-     */
-    public void testInvalidAnalyzesToNothingOutput() throws Exception {
-        String testFile = "a => 1";
-        Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, false);
-        SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
-        try {
-            parser.parse(new StringReader(testFile));
-            fail("didn't get expected exception");
-        } catch (ParseException expected) {
-            // expected exc
-        }
-        analyzer.close();
-    }
-
-    /**
-     * parse a syn file with bad syntax
-     */
-    public void testInvalidDoubleMap() throws Exception {
-        String testFile = "a => b => c";
-        Analyzer analyzer = new MockAnalyzer(random());
-        SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
-        try {
-            parser.parse(new StringReader(testFile));
-            fail("didn't get expected exception");
-        } catch (ParseException expected) {
-            // expected exc
-        }
-        analyzer.close();
-    }
-
-    public void testMoreThanOneLookAhead() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b c d", "x", true);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "a b c e", new String[]{"a", "b", "c", "e"}, new int[]{0, 2, 4, 6}, new int[]{1, 3, 5, 7}, new
-            String[]{"word", "word", "word", "word"}, new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testLookaheadAfterParse() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "b b", "x", true);
-        add(b, "b", "y", true);
-
-        Analyzer a = getAnalyzer(b, true);
-
-        assertAnalyzesTo(a, "b a b b", new String[]{"y", "b", "a", "x", "b", "b"}, new int[]{0, 0, 2, 4, 4, 6}, new int[]{1, 1, 3, 7, 5,
-            7}, null, new int[]{1, 0, 1, 1, 0, 1}, new int[]{1, 1, 1, 2, 1, 1}, true);
-    }
-
-    public void testLookaheadSecondParse() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "b b b", "x", true);
-        add(b, "b", "y", true);
-
-        Analyzer a = getAnalyzer(b, true);
-
-        assertAnalyzesTo(a, "b b", new String[]{"y", "b", "y", "b"}, new int[]{0, 0, 2, 2}, new int[]{1, 1, 3, 3}, null, new int[]{1, 0,
-            1, 0}, new int[]{1, 1, 1, 1}, true);
-    }
-
-    public void testOneInputMultipleOutputNoKeepOrig() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x", false);
-        add(b, "a b", "y", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b c", new String[]{"c", "x", "y", "c"}, new int[]{0, 2, 2, 6}, new int[]{1, 5, 5, 7}, new
-            String[]{"word", "SYNONYM", "SYNONYM", "word"}, new int[]{1, 1, 0, 1}, new int[]{1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testOneInputMultipleOutputMixedKeepOrig() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x", true);
-        add(b, "a b", "y", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b c", new String[]{"c", "x", "y", "a", "b", "c"}, new int[]{0, 2, 2, 2, 4, 6}, new int[]{1, 5, 5, 3, 5,
-            7}, new String[]{"word", "SYNONYM", "SYNONYM", "word", "word", "word"}, new int[]{1, 1, 0, 0, 1, 1, 1, 1}, new
-            int[]{1, 2, 2,
-            1, 1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testSynAtEnd() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x", true);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c d e a b", new String[]{"c", "d", "e", "x", "a", "b"}, new int[]{0, 2, 4, 6, 6, 8}, new int[]{1, 3, 5, 9,
-            7, 9}, new String[]{"word", "word", "word", "SYNONYM", "word", "word"}, new int[]{1, 1, 1, 1, 0, 1}, new int[]{1, 1, 1,
-            2, 1,
-            1});
-        a.close();
-    }
-
-    public void testTwoSynsInARow() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a", "x", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a a b", new String[]{"c", "x", "x", "b"}, new int[]{0, 2, 4, 6}, new int[]{1, 3, 5, 7}, new
-            String[]{"word", "SYNONYM", "SYNONYM", "word"}, new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testBasicKeepOrigTwoOutputs() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x y", true);
-        add(b, "a b", "m n o", true);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b d", new String[]{"c", "x", "m", "a", "y", "n", "o", "b", "d"}, new int[]{0, 2, 2, 2, 2, 2, 2, 4, 6},
-            new int[]{1, 5, 5, 3, 5, 5, 5, 5, 7}, new String[]{"word", "SYNONYM", "SYNONYM", "word", "SYNONYM",
-                "SYNONYM", "SYNONYM",
-                "word", "word"}, new int[]{1, 1, 0, 0, 1, 1, 1, 1, 1}, new int[]{1, 1, 2, 4, 4, 1, 2, 1, 1});
-        a.close();
-    }
-
-    public void testNoCaptureIfNoMatch() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x y", true);
-
-        Analyzer a = getAnalyzer(b, true);
-
-        assertAnalyzesTo(a, "c d d", new String[]{"c", "d", "d"}, new int[]{0, 2, 4}, new int[]{1, 3, 5}, new String[]{"word", "word",
-            "word"}, new int[]{1, 1, 1}, new int[]{1, 1, 1});
-        assertEquals(0, synFilter.getCaptureCount());
-        a.close();
-    }
-
-    public void testBasicNotKeepOrigOneOutput() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b", new String[]{"c", "x"}, new int[]{0, 2}, new int[]{1, 5}, new String[]{"word", "SYNONYM"}, new
-            int[]{1, 1}, new int[]{1, 1});
-        a.close();
-    }
-
-    public void testBasicNoKeepOrigTwoOutputs() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x y", false);
-        add(b, "a b", "m n o", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b d", new String[]{"c", "x", "m", "y", "n", "o", "d"}, new int[]{0, 2, 2, 2, 2, 2, 6}, new int[]{1, 5,
-            5, 5, 5, 5, 7}, new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM",
-            "word"}, new int[]{1, 1, 0, 1, 1,
-            1, 1}, new int[]{1, 1, 2, 3, 1, 1, 1});
-        a.close();
-    }
-
-    public void testIgnoreCase() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x y", false);
-        add(b, "a b", "m n o", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c A B D", new String[]{"c", "x", "m", "y", "n", "o", "D"}, new int[]{0, 2, 2, 2, 2, 2, 6}, new int[]{1, 5,
-            5, 5, 5, 5, 7}, new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM",
-            "word"}, new int[]{1, 1, 0, 1, 1,
-            1, 1}, new int[]{1, 1, 2, 3, 1, 1, 1});
-        a.close();
-    }
-
-    public void testDoNotIgnoreCase() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x y", false);
-        add(b, "a b", "m n o", false);
-
-        Analyzer a = getAnalyzer(b, false);
-        assertAnalyzesTo(a, "c A B D", new String[]{"c", "A", "B", "D"}, new int[]{0, 2, 4, 6}, new int[]{1, 3, 5, 7}, new
-            String[]{"word", "word", "word", "word"}, new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testBufferedFinish1() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b c", "m n o", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a b", new String[]{"c", "a", "b"}, new int[]{0, 2, 4}, new int[]{1, 3, 5}, new String[]{"word", "word",
-            "word"}, new int[]{1, 1, 1}, new int[]{1, 1, 1});
-        a.close();
-    }
-
-    public void testBufferedFinish2() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "m n o", false);
-        add(b, "d e", "m n o", false);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "c a d", new String[]{"c", "a", "d"}, new int[]{0, 2, 4}, new int[]{1, 3, 5}, new String[]{"word", "word",
-            "word"}, new int[]{1, 1, 1}, new int[]{1, 1, 1});
-        a.close();
-    }
-
-    public void testCanReuse() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b", "x", true);
-        Analyzer a = getAnalyzer(b, true);
-        for (int i = 0; i < 10; i++) {
-            assertAnalyzesTo(a, "c a b", new String[]{"c", "x", "a", "b"}, new int[]{0, 2, 2, 4}, new int[]{1, 5, 3, 5}, new
-                String[]{"word", "SYNONYM", "word", "word"}, new int[]{1, 1, 0, 1}, new int[]{1, 2, 1, 1});
-        }
-        a.close();
-    }
-
-    /**
-     * Multiple input tokens map to a single output token
-     */
-    public void testManyToOne() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b c", "z", true);
-
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "a b c d", new String[]{"z", "a", "b", "c", "d"}, new int[]{0, 0, 2, 4, 6}, new int[]{5, 1, 3, 5, 7}, new
-            String[]{"SYNONYM", "word", "word", "word", "word"}, new int[]{1, 0, 1, 1, 1}, new int[]{3, 1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testBufferAfterMatch() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "a b c d", "x", true);
-        add(b, "a b", "y", false);
-
-        // The 'c' token has to be buffered because SynGraphFilter
-        // needs to know whether a b c d -> x matches:
-        Analyzer a = getAnalyzer(b, true);
-        assertAnalyzesTo(a, "f a b c e", new String[]{"f", "y", "c", "e"}, new int[]{0, 2, 6, 8}, new int[]{1, 5, 7, 9}, new
-            String[]{"word", "SYNONYM", "word", "word"}, new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testZeroSyns() throws Exception {
-        Tokenizer tokenizer = new MockTokenizer();
-        tokenizer.setReader(new StringReader("aa bb"));
-        try {
-            new SynonymGraphFilter(tokenizer, new SynonymMap.Builder(true).build(), true);
-            fail("did not hit expected exception");
-        } catch (IllegalArgumentException iae) {
-            // expected
-            assertEquals("fst must be non-null", iae.getMessage());
-        }
-    }
-
-    // Needs TermAutomatonQuery, which is in sandbox still:
-    public void testAccurateGraphQuery1() throws Exception {
-        Directory dir = newDirectory();
-        RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-        Document doc = new Document();
-        doc.add(newTextField("field", "wtf happened", Field.Store.NO));
-        w.addDocument(doc);
-        IndexReader r = w.getReader();
-        w.close();
-
-        IndexSearcher s = newSearcher(r);
-
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "what the fudge", "wtf", true);
-
-        SynonymMap map = b.build();
-
-        TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();
-
-
-        TokenStream in = new CannedTokenStream(0, 23, token("what", 1, 1, 0, 4), token("the", 1, 1, 5, 8), token("fudge", 1, 1, 9, 14),
-            token("happened", 1, 1, 15, 23));
-
-        assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));
-
-        in = new CannedTokenStream(0, 12, token("wtf", 1, 1, 0, 3), token("happened", 1, 1, 4, 12));
-
-        assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));
-
-        // "what happened" should NOT match:
-        in = new CannedTokenStream(0, 13, token("what", 1, 1, 0, 4), token("happened", 1, 1, 5, 13));
-        assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));
-
-        IOUtils.close(r, dir);
-    }
-
-
-    /**
-     * If we expand synonyms at search time, the results are correct.
-     */
-    // Needs TermAutomatonQuery, which is in sandbox still:
-    public void testAccurateGraphQuery2() throws Exception {
-        Directory dir = newDirectory();
-        RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-        Document doc = new Document();
-        doc.add(newTextField("field", "say wtf happened", Field.Store.NO));
-        w.addDocument(doc);
-        IndexReader r = w.getReader();
-        w.close();
-
-        IndexSearcher s = newSearcher(r);
-
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "fudge", "chocolate", true);
-        add(b, "what the fudge", "wtf", true);
-        add(b, "what the", "wut", true);
-        add(b, "say", "say what", true);
-
-        SynonymMap map = b.build();
-
-        TokenStream in = new CannedTokenStream(0, 26, token("say", 1, 1, 0, 3), token("what", 1, 1, 3, 7), token("the", 1, 1, 8, 11),
-            token("fudge", 1, 1, 12, 17), token("happened", 1, 1, 18, 26));
-
-        TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();
-
-        assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));
-
-        // "what happened" should NOT match:
-        in = new CannedTokenStream(0, 13, token("what", 1, 1, 0, 4), token("happened", 1, 1, 5, 13));
-        assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));
-
-        IOUtils.close(r, dir);
-    }
-
-
-    // Needs TermAutomatonQuery, which is in sandbox still:
-    public void testAccurateGraphQuery3() throws Exception {
-        Directory dir = newDirectory();
-        RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-        Document doc = new Document();
-        doc.add(newTextField("field", "say what the fudge happened", Field.Store.NO));
-        w.addDocument(doc);
-        IndexReader r = w.getReader();
-        w.close();
-
-        IndexSearcher s = newSearcher(r);
-
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        add(b, "wtf", "what the fudge", true);
-
-        SynonymMap map = b.build();
-
-        TokenStream in = new CannedTokenStream(0, 15, token("say", 1, 1, 0, 3), token("wtf", 1, 1, 3, 6), token("happened", 1, 1, 7, 15));
-
-        TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();
-
-        assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));
-
-        // "what happened" should NOT match:
-        in = new CannedTokenStream(0, 13, token("what", 1, 1, 0, 4), token("happened", 1, 1, 5, 13));
-        assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));
-
-        IOUtils.close(r, dir);
-    }
-
-    private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) {
-        final Token t = new Token(term, startOffset, endOffset);
-        t.setPositionIncrement(posInc);
-        t.setPositionLength(posLength);
-        return t;
-    }
-
-    private String randomNonEmptyString() {
-        while (true) {
-            String s = TestUtil.randomUnicodeString(random()).trim();
-            //String s = TestUtil.randomSimpleString(random()).trim();
-            if (s.length() != 0 && s.indexOf('\u0000') == -1) {
-                return s;
-            }
-        }
-    }
-
-    // Adds MockGraphTokenFilter after SynFilter:
-    public void testRandomGraphAfter() throws Exception {
-        final int numIters = atLeast(3);
-        for (int i = 0; i < numIters; i++) {
-            SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
-            final int numEntries = atLeast(10);
-            for (int j = 0; j < numEntries; j++) {
-                add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
-            }
-            final SynonymMap map = b.build();
-            final boolean ignoreCase = random().nextBoolean();
-
-            final Analyzer analyzer = new Analyzer() {
-                @Override
-                protected TokenStreamComponents createComponents(String fieldName) {
-                    Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
-                    TokenStream syns = new SynonymGraphFilter(tokenizer, map, ignoreCase);
-                    TokenStream graph = new MockGraphTokenFilter(random(), syns);
-                    return new TokenStreamComponents(tokenizer, graph);
-                }
-            };
-
-            checkRandomData(random(), analyzer, 100);
-            analyzer.close();
-        }
-    }
-
-    public void testEmptyStringInput() throws IOException {
-        final int numIters = atLeast(10);
-        for (int i = 0; i < numIters; i++) {
-            SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
-            final int numEntries = atLeast(10);
-            for (int j = 0; j < numEntries; j++) {
-                add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
-            }
-            final boolean ignoreCase = random().nextBoolean();
-
-            Analyzer analyzer = getAnalyzer(b, ignoreCase);
-
-            checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), "");
-            analyzer.close();
-        }
-    }
-
-    /**
-     * simple random test, doesn't verify correctness.
-     * does verify it doesnt throw exceptions, or that the stream doesn't misbehave
-     */
-    public void testRandom2() throws Exception {
-        final int numIters = atLeast(3);
-        for (int i = 0; i < numIters; i++) {
-            SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
-            final int numEntries = atLeast(10);
-            for (int j = 0; j < numEntries; j++) {
-                add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
-            }
-            final boolean ignoreCase = random().nextBoolean();
-
-            Analyzer analyzer = getAnalyzer(b, ignoreCase);
-            checkRandomData(random(), analyzer, 100);
-            analyzer.close();
-        }
-    }
-
-    /**
-     * simple random test like testRandom2, but for larger docs
-     */
-    public void testRandomHuge() throws Exception {
-        final int numIters = atLeast(3);
-        for (int i = 0; i < numIters; i++) {
-            SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
-            final int numEntries = atLeast(10);
-            //if (VERBOSE) {
-            //System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
-            //}
-            for (int j = 0; j < numEntries; j++) {
-                add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
-            }
-            final boolean ignoreCase = random().nextBoolean();
-
-            Analyzer analyzer = getAnalyzer(b, ignoreCase);
-            checkRandomData(random(), analyzer, 100, 1024);
-            analyzer.close();
-        }
-    }
-
-    public void testEmptyTerm() throws IOException {
-        final int numIters = atLeast(10);
-        for (int i = 0; i < numIters; i++) {
-            SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
-            final int numEntries = atLeast(10);
-            for (int j = 0; j < numEntries; j++) {
-                add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
-            }
-            final boolean ignoreCase = random().nextBoolean();
-
-            final Analyzer analyzer = getAnalyzer(b, ignoreCase);
-
-            checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), "");
-            analyzer.close();
-        }
-    }
-
-    public void testBuilderDedup() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        final boolean keepOrig = false;
-        add(b, "a b", "ab", keepOrig);
-        add(b, "a b", "ab", keepOrig);
-        add(b, "a b", "ab", keepOrig);
-        Analyzer a = getAnalyzer(b, true);
-
-        assertAnalyzesTo(a, "a b", new String[]{"ab"}, new int[]{1});
-        a.close();
-    }
-
-    public void testBuilderNoDedup() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(false);
-        final boolean keepOrig = false;
-        add(b, "a b", "ab", keepOrig);
-        add(b, "a b", "ab", keepOrig);
-        add(b, "a b", "ab", keepOrig);
-        Analyzer a = getAnalyzer(b, true);
-
-        assertAnalyzesTo(a, "a b", new String[]{"ab", "ab", "ab"}, new int[]{1, 0, 0});
-        a.close();
-    }
-
-    public void testRecursion1() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        final boolean keepOrig = false;
-        add(b, "zoo", "zoo", keepOrig);
-        Analyzer a = getAnalyzer(b, true);
-
-        assertAnalyzesTo(a, "zoo zoo $ zoo", new String[]{"zoo", "zoo", "$", "zoo"}, new int[]{1, 1, 1, 1});
-        a.close();
-    }
-
-    public void testRecursion2() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        final boolean keepOrig = false;
-        add(b, "zoo", "zoo", keepOrig);
-        add(b, "zoo", "zoo zoo", keepOrig);
-        Analyzer a = getAnalyzer(b, true);
-
-        // verify("zoo zoo $ zoo", "zoo/zoo zoo/zoo/zoo $/zoo zoo/zoo zoo");
-        assertAnalyzesTo(a, "zoo zoo $ zoo", new String[]{"zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo"}, new
-            int[]{1, 0, 1, 1, 0, 1, 1, 1, 0, 1});
-        a.close();
-    }
-
-    public void testKeepOrig() throws Exception {
-        SynonymMap.Builder b = new SynonymMap.Builder(true);
-        final boolean keepOrig = true;
-        add(b, "a b", "ab", keepOrig);
-        add(b, "a c", "ac", keepOrig);
-        add(b, "a", "aa", keepOrig);
-        add(b, "b", "bb", keepOrig);
-        add(b, "z x c v", "zxcv", keepOrig);
-        add(b, "x c", "xc", keepOrig);
-        Analyzer a = getAnalyzer(b, true);
-
-        assertAnalyzesTo(a, "$", new String[]{"$"}, new int[]{1});
-        assertAnalyzesTo(a, "a", new String[]{"aa", "a"}, new int[]{1, 0});
-        assertAnalyzesTo(a, "a", new String[]{"aa", "a"}, new int[]{1, 0});
-        assertAnalyzesTo(a, "$ a", new String[]{"$", "aa", "a"}, new int[]{1, 1, 0});
-        assertAnalyzesTo(a, "a $", new String[]{"aa", "a", "$"}, new int[]{1, 0, 1});
-        assertAnalyzesTo(a, "$ a !", new String[]{"$", "aa", "a", "!"}, new int[]{1, 1, 0, 1});
-        assertAnalyzesTo(a, "a a", new String[]{"aa", "a", "aa", "a"}, new int[]{1, 0, 1, 0});
-        assertAnalyzesTo(a, "b", new String[]{"bb", "b"}, new int[]{1, 0});
-        assertAnalyzesTo(a, "z x c v", new String[]{"zxcv", "z", "x", "c", "v"}, new int[]{1, 0, 1, 1, 1});
-        assertAnalyzesTo(a, "z x c $", new String[]{"z", "xc", "x", "c", "$"}, new int[]{1, 1, 0, 1, 1});
-        a.close();
-    }
-
-    private Analyzer getAnalyzer(SynonymMap.Builder b, final boolean ignoreCase) throws IOException {
-        final SynonymMap map = b.build();
-        return new Analyzer() {
-            @Override
-            protected TokenStreamComponents createComponents(String fieldName) {
-                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-                // Make a local variable so testRandomHuge doesn't share it across threads!
-                SynonymGraphFilter synFilter = new SynonymGraphFilter(tokenizer, map, ignoreCase);
-                SynonymGraphFilterTests.this.synFilter = synFilter;
-                return new TokenStreamComponents(tokenizer, synFilter);
-            }
-        };
-    }
-
-    private void add(SynonymMap.Builder b, String input, String output, boolean keepOrig) {
-        if (VERBOSE) {
-            //System.out.println("  add input=" + input + " output=" + output + " keepOrig=" + keepOrig);
-        }
-        CharsRefBuilder inputCharsRef = new CharsRefBuilder();
-        SynonymMap.Builder.join(input.split(" +"), inputCharsRef);
-
-        CharsRefBuilder outputCharsRef = new CharsRefBuilder();
-        SynonymMap.Builder.join(output.split(" +"), outputCharsRef);
-
-        b.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig);
-    }
-
-    private char[] randomBinaryChars(int minLen, int maxLen, double bias, char base) {
-        int len = TestUtil.nextInt(random(), minLen, maxLen);
-        char[] chars = new char[len];
-        for (int i = 0; i < len; i++) {
-            char ch;
-            if (random().nextDouble() < bias) {
-                ch = base;
-            } else {
-                ch = (char) (base + 1);
-            }
-            chars[i] = ch;
-        }
-
-        return chars;
-    }
-
-    private static String toTokenString(char[] chars) {
-        StringBuilder b = new StringBuilder();
-        for (char c : chars) {
-            if (b.length() > 0) {
-                b.append(' ');
-            }
-            b.append(c);
-        }
-        return b.toString();
-    }
-
-    private static class OneSyn {
-        char[] in;
-        char[] out;
-        boolean keepOrig;
-
-        @Override
-        public String toString() {
-            return toTokenString(in) + " --> " + toTokenString(out) + " (keepOrig=" + keepOrig + ")";
-        }
-    }
-
-    public void testRandomSyns() throws Exception {
-        int synCount = atLeast(10);
-        double bias = random().nextDouble();
-        boolean dedup = random().nextBoolean();
-
-        SynonymMap.Builder b = new SynonymMap.Builder(dedup);
-        List<OneSyn> syns = new ArrayList<>();
-        // Makes random syns from random a / b tokens, mapping to random x / y tokens
-        //if (VERBOSE) {
-        //    System.out.println("TEST: make " + synCount + " syns");
-        //    System.out.println("  bias for a over b=" + bias);
-        //    System.out.println("  dedup=" + dedup);
-        //    System.out.println("  sausage=" + sausage);
-        //}
-
-        int maxSynLength = 0;
-
-        for (int i = 0; i < synCount; i++) {
-            OneSyn syn = new OneSyn();
-            syn.in = randomBinaryChars(1, 5, bias, 'a');
-            syn.out = randomBinaryChars(1, 5, 0.5, 'x');
-            syn.keepOrig = random().nextBoolean();
-            syns.add(syn);
-
-            maxSynLength = Math.max(maxSynLength, syn.in.length);
-
-            //if (VERBOSE) {
-            //    System.out.println("  " + syn);
-            //}
-            add(b, toTokenString(syn.in), toTokenString(syn.out), syn.keepOrig);
-        }
-
-        // Only used w/ VERBOSE:
-        Analyzer aNoSausageed;
-        if (VERBOSE) {
-            aNoSausageed = getAnalyzer(b, true);
-        } else {
-            aNoSausageed = null;
-        }
-
-        Analyzer a = getAnalyzer(b, true);
-        int iters = atLeast(20);
-        for (int iter = 0; iter < iters; iter++) {
-
-            String doc = toTokenString(randomBinaryChars(50, 100, bias, 'a'));
-            //String doc = toTokenString(randomBinaryChars(10, 50, bias, 'a'));
-
-            //if (VERBOSE) {
-            //    System.out.println("TEST: iter=" + iter + " doc=" + doc);
-            //}
-            Automaton expected = slowSynFilter(doc, syns);
-            if (VERBOSE) {
-                //System.out.println("  expected:\n" + expected.toDot());
-            }
-            Automaton actual = toAutomaton(a.tokenStream("field", new StringReader(doc)));
-            //if (VERBOSE) {
-            //    System.out.println("  actual:\n" + actual.toDot());
-            //}
-
-            assertTrue("maxLookaheadUsed=" + synFilter.getMaxLookaheadUsed() + " maxSynLength=" + maxSynLength, synFilter
-                .getMaxLookaheadUsed() <= maxSynLength);
-
-            checkAnalysisConsistency(random(), a, random().nextBoolean(), doc);
-            // We can easily have a non-deterministic automaton at this point, e.g. if
-            // more than one syn matched at given point, or if the syn mapped to an
-            // output token that also happens to be in the input:
-            try {
-                actual = Operations.determinize(actual, 50000);
-            } catch (TooComplexToDeterminizeException tctde) {
-                // Unfortunately the syns can easily create difficult-to-determinize graphs:
-                assertTrue(approxEquals(actual, expected));
-                continue;
-            }
-
-            try {
-                expected = Operations.determinize(expected, 50000);
-            } catch (TooComplexToDeterminizeException tctde) {
-                // Unfortunately the syns can easily create difficult-to-determinize graphs:
-                assertTrue(approxEquals(actual, expected));
-                continue;
-            }
-
-            assertTrue(approxEquals(actual, expected));
-            assertTrue(Operations.sameLanguage(actual, expected));
-        }
-
-        a.close();
-    }
-
-    /**
-     * Only used when true equality is too costly to check!
-     */
-    private boolean approxEquals(Automaton actual, Automaton expected) {
-        // Don't collapse these into one line else the thread stack won't say which direction failed!:
-        boolean b1 = approxSubsetOf(actual, expected);
-        boolean b2 = approxSubsetOf(expected, actual);
-        return b1 && b2;
-    }
-
-    private boolean approxSubsetOf(Automaton a1, Automaton a2) {
-        AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(a1);
-        for (int i = 0; i < 2000; i++) {
-            int[] ints = ras.getRandomAcceptedString(random());
-            IntsRef path = new IntsRef(ints, 0, ints.length);
-            if (accepts(a2, path) == false) {
-                throw new RuntimeException("a2 does not accept " + path);
-            }
-        }
-
-        // Presumed true
-        return true;
-    }
-
-    /**
-     * Like {@link Operations#run} except the incoming automaton is allowed to be non-deterministic.
-     */
-    private static boolean accepts(Automaton a, IntsRef path) {
-        Set<Integer> states = new HashSet<>();
-        states.add(0);
-        Transition t = new Transition();
-        for (int i = 0; i < path.length; i++) {
-            int digit = path.ints[path.offset + i];
-            Set<Integer> nextStates = new HashSet<>();
-            for (int state : states) {
-                int count = a.initTransition(state, t);
-                for (int j = 0; j < count; j++) {
-                    a.getNextTransition(t);
-                    if (digit >= t.min && digit <= t.max) {
-                        nextStates.add(t.dest);
-                    }
-                }
-            }
-            states = nextStates;
-            if (states.isEmpty()) {
-                return false;
-            }
-        }
-
-        for (int state : states) {
-            if (a.isAccept(state)) {
-                return true;
-            }
-        }
-
-        return false;
-    }
-
-    /**
-     * Stupid, slow brute-force, yet hopefully bug-free, synonym filter.
-     */
-    private Automaton slowSynFilter(String doc, List<OneSyn> syns) {
-        String[] tokens = doc.split(" +");
-        //if (VERBOSE) {
-        //    System.out.println("  doc has " + tokens.length + " tokens");
-        //}
-        int i = 0;
-        Automaton.Builder a = new Automaton.Builder();
-        int lastState = a.createState();
-        while (i < tokens.length) {
-            // Consider all possible syn matches starting at this point:
-            assert tokens[i].length() == 1;
-            //if (VERBOSE) {
-            //    System.out.println("    i=" + i);
-            //}
-
-            List<OneSyn> matches = new ArrayList<>();
-            for (OneSyn syn : syns) {
-                if (i + syn.in.length <= tokens.length) {
-                    boolean match = true;
-                    for (int j = 0; j < syn.in.length; j++) {
-                        if (tokens[i + j].charAt(0) != syn.in[j]) {
-                            match = false;
-                            break;
-                        }
-                    }
-
-                    if (match) {
-                        if (matches.isEmpty() == false) {
-                            if (syn.in.length < matches.get(0).in.length) {
-                                // Greedy matching: we already found longer syns matching here
-                                continue;
-                            } else if (syn.in.length > matches.get(0).in.length) {
-                                // Greedy matching: all previous matches were shorter, so we drop them
-                                matches.clear();
-                            } else {
-                                // Keep the current matches: we allow multiple synonyms matching the same input string
-                            }
-                        }
-
-                        matches.add(syn);
-                    }
-                }
-            }
-
-            int nextState = a.createState();
-
-            if (matches.isEmpty() == false) {
-                // We have match(es) starting at this token
-                //if (VERBOSE) {
-                //    System.out.println("  matches @ i=" + i + ": " + matches);
-                //}
-                // We keepOrig if any of the matches said to:
-                boolean keepOrig = false;
-                for (OneSyn syn : matches) {
-                    keepOrig |= syn.keepOrig;
-                }
-
-                if (keepOrig) {
-                    // Add path for the original tokens
-                    addSidePath(a, lastState, nextState, matches.get(0).in);
-                }
-
-                for (OneSyn syn : matches) {
-                    addSidePath(a, lastState, nextState, syn.out);
-                }
-
-                i += matches.get(0).in.length;
-            } else {
-                a.addTransition(lastState, nextState, tokens[i].charAt(0));
-                i++;
-            }
-
-            lastState = nextState;
-        }
-
-        a.setAccept(lastState, true);
-
-        return topoSort(a.finish());
-    }
-
-    /**
-     * Just creates a side path from startState to endState with the provided tokens.
-     */
-    private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens) {
-        int lastState = startState;
-        for (int i = 0; i < tokens.length; i++) {
-            int nextState;
-            if (i == tokens.length - 1) {
-                nextState = endState;
-            } else {
-                nextState = a.createState();
-            }
-
-            a.addTransition(lastState, nextState, tokens[i]);
-
-            lastState = nextState;
-        }
-    }
-
-    private Automaton toAutomaton(TokenStream ts) throws IOException {
-        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
-        PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
-        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-        ts.reset();
-        Automaton a = new Automaton();
-        int srcNode = -1;
-        int destNode = -1;
-        int state = a.createState();
-        while (ts.incrementToken()) {
-            assert termAtt.length() == 1;
-            char c = termAtt.charAt(0);
-            int posInc = posIncAtt.getPositionIncrement();
-            if (posInc != 0) {
-                srcNode += posInc;
-                while (state < srcNode) {
-                    state = a.createState();
-                }
-            }
-            destNode = srcNode + posLenAtt.getPositionLength();
-            while (state < destNode) {
-                state = a.createState();
-            }
-            a.addTransition(srcNode, destNode, c);
-        }
-        ts.end();
-        ts.close();
-        a.finishState();
-        a.setAccept(destNode, true);
-        return a;
-    }
-
-    /**
-     * Renumbers nodes according to their topo sort
-     */
-    private Automaton topoSort(Automaton in) {
-        int[] newToOld = Operations.topoSortStates(in);
-        int[] oldToNew = new int[newToOld.length];
-
-        Automaton.Builder a = new Automaton.Builder();
-        //System.out.println("remap:");
-        for (int i = 0; i < newToOld.length; i++) {
-            a.createState();
-            oldToNew[newToOld[i]] = i;
-            //System.out.println("  " + newToOld[i] + " -> " + i);
-            if (in.isAccept(newToOld[i])) {
-                a.setAccept(i, true);
-                //System.out.println("    **");
-            }
-        }
-
-        Transition t = new Transition();
-        for (int i = 0; i < newToOld.length; i++) {
-            int count = in.initTransition(newToOld[i], t);
-            for (int j = 0; j < count; j++) {
-                in.getNextTransition(t);
-                a.addTransition(i, oldToNew[t.dest], t.min, t.max);
-            }
-        }
-
-        return a.finish();
-    }
-
-    /**
-     * Helper method to validate all strings that can be generated from a token stream. Uses {@link
-     * TokenStreamToAutomaton} to create an automaton. Asserts the finite strings of the automaton
-     * are all and only the given valid strings.
-     *
-     * @param analyzer        analyzer containing the SynonymFilter under test.
-     * @param text            text to be analyzed.
-     * @param expectedStrings all expected finite strings.
-     */
-    public void assertAllStrings(Analyzer analyzer, String text, String[] expectedStrings) throws IOException {
-        TokenStream tokenStream = analyzer.tokenStream("dummy", text);
-        try {
-            Automaton automaton = new TokenStreamToAutomaton().toAutomaton(tokenStream);
-            Set<IntsRef> finiteStrings = AutomatonTestUtil.getFiniteStringsRecursive(automaton, -1);
-
-            assertEquals("Invalid resulting strings count. Expected " + expectedStrings.length + " was " + finiteStrings.size(),
-                expectedStrings.length, finiteStrings.size());
-
-            Set<String> expectedStringsSet = new HashSet<>(Arrays.asList(expectedStrings));
-
-            BytesRefBuilder scratchBytesRefBuilder = new BytesRefBuilder();
-            for (IntsRef ir : finiteStrings) {
-                String s = Util.toBytesRef(ir, scratchBytesRefBuilder).utf8ToString().replace((char) TokenStreamToAutomaton.POS_SEP, ' ');
-                assertTrue("Unexpected string found: " + s, expectedStringsSet.contains(s));
-            }
-        } finally {
-            tokenStream.close();
-        }
-    }
-}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/BaseWordDelimiterTokenFilterFactoryTestCase.java b/core/src/test/java/org/elasticsearch/index/analysis/BaseWordDelimiterTokenFilterFactoryTestCase.java
new file mode 100644
index 00000000000..713e9424759
--- /dev/null
+++ b/core/src/test/java/org/elasticsearch/index/analysis/BaseWordDelimiterTokenFilterFactoryTestCase.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.index.analysis;
+
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.ESTokenStreamTestCase;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+/**
+ * Base class to test {@link WordDelimiterTokenFilterFactory}  and {@link WordDelimiterGraphTokenFilterFactory}
+ */
+public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends ESTokenStreamTestCase {
+    final String type;
+
+    public BaseWordDelimiterTokenFilterFactoryTestCase(String type) {
+        this.type = type;
+    }
+
+    public void testDefault() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.filter.my_word_delimiter.type", type)
+                .build());
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
+        String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi",
+            "fi", "4000", "j", "2", "se", "O", "Neil"};
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+
+    public void testCatenateWords() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.filter.my_word_delimiter.type", type)
+                .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
+                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
+                .build());
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
+        String[] expected = new String[]{"PowerShot", "500", "42", "wifi", "wifi", "4000", "j", "2", "se", "ONeil"};
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+
+    public void testCatenateNumbers() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.filter.my_word_delimiter.type", type)
+                .put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
+                .put("index.analysis.filter.my_word_delimiter.catenate_numbers", "true")
+                .build());
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
+        String[] expected = new String[]{"Power", "Shot", "50042", "wi", "fi", "wi", "fi", "4000", "j", "2",
+            "se", "O", "Neil"};
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+
+    public void testCatenateAll() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.filter.my_word_delimiter.type", type)
+                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
+                .put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
+                .put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
+                .build());
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
+        String[] expected = new String[]{"PowerShot", "50042", "wifi", "wifi4000", "j2se", "ONeil"};
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+
+    public void testSplitOnCaseChange() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.filter.my_word_delimiter.type", type)
+                .put("index.analysis.filter.my_word_delimiter.split_on_case_change", "false")
+                .build());
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot";
+        String[] expected = new String[]{"PowerShot"};
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+
+    public void testPreserveOriginal() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.filter.my_word_delimiter.type", type)
+                .put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
+                .build());
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
+        String[] expected = new String[]{"PowerShot", "Power", "Shot", "500-42", "500", "42", "wi-fi", "wi", "fi",
+            "wi-fi-4000", "wi", "fi", "4000", "j2se", "j", "2", "se", "O'Neil's", "O", "Neil"};
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+
+    public void testStemEnglishPossessive() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .put("index.analysis.filter.my_word_delimiter.type", type)
+                .put("index.analysis.filter.my_word_delimiter.stem_english_possessive", "false")
+                .build());
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
+        String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2",
+            "se", "O", "Neil", "s"};
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterGraphTokenFilterFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterGraphTokenFilterFactoryTests.java
new file mode 100644
index 00000000000..2ae4267104a
--- /dev/null
+++ b/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterGraphTokenFilterFactoryTests.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.index.analysis;
+
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class WordDelimiterGraphTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase {
+    public WordDelimiterGraphTokenFilterFactoryTests() {
+        super("word_delimiter_graph");
+    }
+
+    public void testMultiTerms() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put("index.analysis.filter.my_word_delimiter.type", type)
+            .put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
+            .put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
+            .build());
+
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
+        String[] expected = new String[]{"PowerShot", "PowerShot", "Power", "Shot", "50042", "500-42", "500", "42",
+            "wifi", "wi-fi", "wi", "fi", "wifi4000", "wi-fi-4000", "wi", "fi", "4000", "j2se", "j2se", "j", "2", "se",
+            "ONeil", "O'Neil's", "O", "Neil" };
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        int[] expectedIncr = new int[]{1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1};
+        int[] expectedPosLen = new int[]{2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 1, 1, 1, 3, 3, 1, 1, 1, 2, 2, 1, 1};
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected, null, null, null,
+                expectedIncr, expectedPosLen, null);
+    }
+
+    /** Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power */
+    public void testPartsAndCatenate() throws IOException {
+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put("index.analysis.filter.my_word_delimiter.type", type)
+            .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
+            .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
+            .build());
+        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
+        String source = "PowerShot";
+        int[] expectedIncr = new int[]{1, 0, 1};
+        int[] expectedPosLen = new int[]{2, 1, 1};
+        String[] expected = new String[]{"PowerShot", "Power", "Shot" };
+        Tokenizer tokenizer = new WhitespaceTokenizer();
+        tokenizer.setReader(new StringReader(source));
+        assertTokenStreamContents(tokenFilter.create(tokenizer), expected, null, null, null,
+            expectedIncr, expectedPosLen, null);
+    }
+}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java
index 1a7903bcfac..1e919e00bbb 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java
@@ -24,121 +24,23 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.test.ESTestCase;
-import org.elasticsearch.test.ESTokenStreamTestCase;
 
 import java.io.IOException;
 import java.io.StringReader;
 
-public class WordDelimiterTokenFilterFactoryTests extends ESTokenStreamTestCase {
-    public void testDefault() throws IOException {
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
-                .build());
-        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
-        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
-        String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil"};
-        Tokenizer tokenizer = new WhitespaceTokenizer();
-        tokenizer.setReader(new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
-    }
-
-    public void testCatenateWords() throws IOException {
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
-                .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
-                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
-                .build());
-        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
-        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
-        String[] expected = new String[]{"PowerShot", "500", "42", "wifi", "wifi", "4000", "j", "2", "se", "ONeil"};
-        Tokenizer tokenizer = new WhitespaceTokenizer();
-        tokenizer.setReader(new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
-    }
-
-    public void testCatenateNumbers() throws IOException {
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
-                .put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
-                .put("index.analysis.filter.my_word_delimiter.catenate_numbers", "true")
-                .build());
-        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
-        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
-        String[] expected = new String[]{"Power", "Shot", "50042", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil"};
-        Tokenizer tokenizer = new WhitespaceTokenizer();
-        tokenizer.setReader(new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
-    }
-
-    public void testCatenateAll() throws IOException {
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
-                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
-                .put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
-                .put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
-                .build());
-        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
-        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
-        String[] expected = new String[]{"PowerShot", "50042", "wifi", "wifi4000", "j2se", "ONeil"};
-        Tokenizer tokenizer = new WhitespaceTokenizer();
-        tokenizer.setReader(new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
-    }
-
-    public void testSplitOnCaseChange() throws IOException {
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
-                .put("index.analysis.filter.my_word_delimiter.split_on_case_change", "false")
-                .build());
-        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
-        String source = "PowerShot";
-        String[] expected = new String[]{"PowerShot"};
-        Tokenizer tokenizer = new WhitespaceTokenizer();
-        tokenizer.setReader(new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
-    }
-
-    public void testPreserveOriginal() throws IOException {
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
-                .put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
-                .build());
-        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
-        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
-        String[] expected = new String[]{"PowerShot", "Power", "Shot", "500-42", "500", "42", "wi-fi", "wi", "fi", "wi-fi-4000", "wi", "fi", "4000", "j2se", "j", "2", "se", "O'Neil's", "O", "Neil"};
-        Tokenizer tokenizer = new WhitespaceTokenizer();
-        tokenizer.setReader(new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
-    }
-
-    public void testStemEnglishPossessive() throws IOException {
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
-                .put("index.analysis.filter.my_word_delimiter.stem_english_possessive", "false")
-                .build());
-        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
-        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
-        String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil", "s"};
-        Tokenizer tokenizer = new WhitespaceTokenizer();
-        tokenizer.setReader(new StringReader(source));
-        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+public class WordDelimiterTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase {
+    public WordDelimiterTokenFilterFactoryTests() {
+        super("word_delimiter");
     }
 
     /** Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power */
     public void testPartsAndCatenate() throws IOException {
         ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
-                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
-                .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
-                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
-                .build());
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put("index.analysis.filter.my_word_delimiter.type", type)
+            .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
+            .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
+            .build());
         TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
         String source = "PowerShot";
         String[] expected = new String[]{"Power", "PowerShot", "Shot" };
diff --git a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilderTests.java b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilderTests.java
index 944427b7e17..e33b201bf22 100644
--- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilderTests.java
+++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilderTests.java
@@ -47,6 +47,7 @@ import org.elasticsearch.index.query.QueryParseContext;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.index.query.TermQueryBuilder;
 import org.elasticsearch.search.SearchModule;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Field;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Order;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.FieldOptions;
@@ -288,6 +289,7 @@ public class HighlightBuilderTests extends ESTestCase {
                         mergeBeforeChek(highlightBuilder, fieldBuilder, fieldOptions);
 
                 checkSame.accept(AbstractHighlighterBuilder::boundaryChars, FieldOptions::boundaryChars);
+                checkSame.accept(AbstractHighlighterBuilder::boundaryScannerType, FieldOptions::boundaryScannerType);
                 checkSame.accept(AbstractHighlighterBuilder::boundaryMaxScan, FieldOptions::boundaryMaxScan);
                 checkSame.accept(AbstractHighlighterBuilder::fragmentSize, FieldOptions::fragmentCharSize);
                 checkSame.accept(AbstractHighlighterBuilder::fragmenter, FieldOptions::fragmenter);
@@ -557,12 +559,23 @@ public class HighlightBuilderTests extends ESTestCase {
         if (randomBoolean()) {
             highlightBuilder.forceSource(randomBoolean());
         }
+        if (randomBoolean()) {
+            if (randomBoolean()) {
+                highlightBuilder.boundaryScannerType(randomFrom(BoundaryScannerType.values()));
+            } else {
+                // also test the string setter
+                highlightBuilder.boundaryScannerType(randomFrom(BoundaryScannerType.values()).toString());
+            }
+        }
         if (randomBoolean()) {
             highlightBuilder.boundaryMaxScan(randomIntBetween(0, 10));
         }
         if (randomBoolean()) {
             highlightBuilder.boundaryChars(randomAsciiOfLengthBetween(1, 10).toCharArray());
         }
+        if (randomBoolean()) {
+            highlightBuilder.boundaryScannerLocale(randomLocale(random()).toLanguageTag());
+        }
         if (randomBoolean()) {
             highlightBuilder.noMatchSize(randomIntBetween(0, 10));
         }
diff --git a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
index 815998ad093..7db99ff3232 100644
--- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
+++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
@@ -44,6 +44,7 @@ import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Field;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.test.ESIntegTestCase;
@@ -57,6 +58,7 @@ import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
 
 import static org.elasticsearch.client.Requests.searchRequest;
@@ -747,7 +749,94 @@ public class HighlighterSearchIT extends ESIntegTestCase {
         searchResponse = client().prepareSearch("test").setSource(source).get();
 
         assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over"));
+    }
 
+    public void testFastVectorHighlighterWithSentenceBoundaryScanner() throws Exception {
+        assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
+        ensureGreen();
+
+        indexRandom(true, client().prepareIndex("test", "type1")
+                .setSource("field1", "A sentence with few words. Another sentence with even more words."));
+
+        logger.info("--> highlighting and searching on 'field' with sentence boundary_scanner");
+        SearchSourceBuilder source = searchSource()
+                .query(termQuery("field1", "sentence"))
+                .highlighter(highlight()
+                        .field("field1", 20, 2)
+                        .order("score")
+                        .preTags("<xxx>").postTags("</xxx>")
+                        .boundaryScannerType(BoundaryScannerType.SENTENCE));
+
+        SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
+
+        assertHighlight(searchResponse, 0, "field1", 0, 2, equalTo("A <xxx>sentence</xxx> with few words. "));
+        assertHighlight(searchResponse, 0, "field1", 1, 2, equalTo("Another <xxx>sentence</xxx> with even more words. "));
+    }
+
+    public void testFastVectorHighlighterWithSentenceBoundaryScannerAndLocale() throws Exception {
+        assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
+        ensureGreen();
+
+        indexRandom(true, client().prepareIndex("test", "type1")
+                .setSource("field1", "A sentence with few words. Another sentence with even more words."));
+
+        logger.info("--> highlighting and searching on 'field' with sentence boundary_scanner");
+        SearchSourceBuilder source = searchSource()
+                .query(termQuery("field1", "sentence"))
+                .highlighter(highlight()
+                        .field("field1", 20, 2)
+                        .order("score")
+                        .preTags("<xxx>").postTags("</xxx>")
+                        .boundaryScannerType(BoundaryScannerType.SENTENCE)
+                        .boundaryScannerLocale(Locale.ENGLISH.toLanguageTag()));
+
+        SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
+
+        assertHighlight(searchResponse, 0, "field1", 0, 2, equalTo("A <xxx>sentence</xxx> with few words. "));
+        assertHighlight(searchResponse, 0, "field1", 1, 2, equalTo("Another <xxx>sentence</xxx> with even more words. "));
+    }
+
+    public void testFastVectorHighlighterWithWordBoundaryScanner() throws Exception {
+        assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
+        ensureGreen();
+
+        indexRandom(true, client().prepareIndex("test", "type1")
+                .setSource("field1", "some quick and hairy brown:fox jumped over the lazy dog"));
+
+        logger.info("--> highlighting and searching on 'field' with word boundary_scanner");
+        SearchSourceBuilder source = searchSource()
+                .query(termQuery("field1", "some"))
+                .highlighter(highlight()
+                        .field("field1", 23, 1)
+                        .order("score")
+                        .preTags("<xxx>").postTags("</xxx>")
+                        .boundaryScannerType(BoundaryScannerType.WORD));
+
+        SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
+
+        assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<xxx>some</xxx> quick and hairy brown"));
+    }
+
+    public void testFastVectorHighlighterWithWordBoundaryScannerAndLocale() throws Exception {
+        assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
+        ensureGreen();
+
+        indexRandom(true, client().prepareIndex("test", "type1")
+                .setSource("field1", "some quick and hairy brown:fox jumped over the lazy dog"));
+
+        logger.info("--> highlighting and searching on 'field' with word boundary_scanner");
+        SearchSourceBuilder source = searchSource()
+                .query(termQuery("field1", "some"))
+                .highlighter(highlight()
+                        .field("field1", 23, 1)
+                        .order("score")
+                        .preTags("<xxx>").postTags("</xxx>")
+                        .boundaryScannerType(BoundaryScannerType.WORD)
+                        .boundaryScannerLocale(Locale.ENGLISH.toLanguageTag()));
+
+        SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
+
+        assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<xxx>some</xxx> quick and hairy brown"));
     }
 
     /**
diff --git a/docs/build.gradle b/docs/build.gradle
index 36727b12e50..9fd593e2fae 100644
--- a/docs/build.gradle
+++ b/docs/build.gradle
@@ -81,6 +81,7 @@ buildRestTests.expectedUnconvertedCandidates = [
   'reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc',
   'reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc',
   'reference/analysis/tokenfilters/word-delimiter-tokenfilter.asciidoc',
+  'reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc',
   'reference/cat/snapshots.asciidoc',
   'reference/cat/templates.asciidoc',
   'reference/cat/thread_pool.asciidoc',
diff --git a/docs/reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc
new file mode 100644
index 00000000000..01176fa5636
--- /dev/null
+++ b/docs/reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc
@@ -0,0 +1,97 @@
+[[analysis-word-delimiter-graph-tokenfilter]]
+=== Word Delimiter Graph Token Filter
+
+experimental[]
+
+Named `word_delimiter_graph`, it splits words into subwords and performs
+optional transformations on subword groups. Words are split into
+subwords with the following rules:
+
+* split on intra-word delimiters (by default, all non alpha-numeric
+characters).
+* "Wi-Fi" -> "Wi", "Fi"
+* split on case transitions: "PowerShot" -> "Power", "Shot"
+* split on letter-number transitions: "SD500" -> "SD", "500"
+* leading and trailing intra-word delimiters on each subword are
+ignored: "//hello---there, 'dude'" -> "hello", "there", "dude"
+* trailing "'s" are removed for each subword: "O'Neil's" -> "O", "Neil"
+
+Unlike the `word_delimiter`, this token filter correctly handles positions for
+multi terms expansion at search-time when any of the following options
+are set to true:
+
+ * `preserve_original`
+ * `catenate_numbers`
+ * `catenate_words`
+ * `catenate_all`
+
+Parameters include:
+
+`generate_word_parts`::
+    If `true` causes parts of words to be
+    generated: "PowerShot" => "Power" "Shot". Defaults to `true`.
+
+`generate_number_parts`::
+    If `true` causes number subwords to be
+    generated: "500-42" => "500" "42". Defaults to `true`.
+
+`catenate_words`::
+    If `true` causes maximum runs of word parts to be
+    catenated: "wi-fi" => "wifi". Defaults to `false`.
+
+`catenate_numbers`::
+    If `true` causes maximum runs of number parts to
+    be catenated: "500-42" => "50042". Defaults to `false`.
+
+`catenate_all`::
+    If `true` causes all subword parts to be catenated:
+    "wi-fi-4000" => "wifi4000". Defaults to `false`.
+
+`split_on_case_change`::
+    If `true` causes "PowerShot" to be two tokens;
+    ("Power-Shot" remains two parts regards). Defaults to `true`.
+
+`preserve_original`::
+    If `true` includes original words in subwords:
+    "500-42" => "500-42" "500" "42". Defaults to `false`.
+
+`split_on_numerics`::
+    If `true` causes "j2se" to be three tokens; "j"
+    "2" "se". Defaults to `true`.
+
+`stem_english_possessive`::
+    If `true` causes trailing "'s" to be
+    removed for each subword: "O'Neil's" => "O", "Neil". Defaults to `true`.
+
+Advance settings include:
+
+`protected_words`::
+    A list of protected words from being delimiter.
+    Either an array, or also can set `protected_words_path` which resolved
+    to a file configured with protected words (one on each line).
+    Automatically resolves to `config/` based location if exists.
+
+`type_table`::
+    A custom type mapping table, for example (when configured
+    using `type_table_path`):
+
+[source,js]
+--------------------------------------------------
+    # Map the $, %, '.', and ',' characters to DIGIT
+    # This might be useful for financial data.
+    $ => DIGIT
+    % => DIGIT
+    . => DIGIT
+    \\u002C => DIGIT
+
+    # in some cases you might not want to split on ZWJ
+    # this also tests the case where we need a bigger byte[]
+    # see http://en.wikipedia.org/wiki/Zero-width_joiner
+    \\u200D => ALPHANUM
+--------------------------------------------------
+
+NOTE: Using a tokenizer like the `standard` tokenizer may interfere with
+the `catenate_*` and `preserve_original` parameters, as the original
+string may already have lost punctuation during tokenization.  Instead,
+you may want to use the `whitespace` tokenizer.
+
diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc
index 30c0e20d5bf..81f454bb158 100644
--- a/docs/reference/search/request/highlighting.asciidoc
+++ b/docs/reference/search/request/highlighting.asciidoc
@@ -103,8 +103,7 @@ If `term_vector` information is provided by setting `term_vector` to
 will be used instead of the plain highlighter.  The fast vector highlighter:
 
 * Is faster especially for large fields (> `1MB`)
-* Can be customized with `boundary_chars`, `boundary_max_scan`, and
- `fragment_offset` (see <<boundary-characters,below>>)
+* Can be customized with `boundary_scanner` (see <<boundary-scanners,below>>)
 * Requires setting `term_vector` to `with_positions_offsets` which
   increases the size of the index
 * Can combine matches from multiple fields into one result.  See
@@ -502,17 +501,23 @@ GET /_search
 --------------------------------------------------
 // CONSOLE
 
-[[boundary-characters]]
-==== Boundary Characters
+[[boundary-scanners]]
+==== Boundary Scanners
 
-When highlighting a field using the fast vector highlighter,
-`boundary_chars` can be configured to define what constitutes a boundary
-for highlighting. It's a single string with each boundary character
-defined in it. It defaults to `.,!? \t\n`.
+When highlighting a field using the fast vector highlighter, you can specify
+how to break the highlighted fragments using `boundary_scanner`, which accepts
+the following values:
 
-The `boundary_max_scan` allows to control how far to look for boundary
-characters, and defaults to `20`.
+* `chars` (default): allows to configure which characters (`boundary_chars`)
+constitute a boundary for highlighting. It's a single string with each boundary
+character defined in it (defaults to `.,!? \t\n`). It also allows configuring
+the `boundary_max_scan` to control how far to look for boundary characters
+(defaults to `20`).
 
+* `word` and `sentence`: use Java's https://docs.oracle.com/javase/8/docs/api/java/text/BreakIterator.html[BreakIterator]
+to break the highlighted fragments at the next _word_ or _sentence_ boundary.
+You can further specify `boundary_scanner_locale` to control which Locale is used
+to search the text for these boundaries.
 
 [[matched-fields]]
 ==== Matched Fields
diff --git a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java
new file mode 100644
index 00000000000..f73d2ca13c1
--- /dev/null
+++ b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.ingest.geoip;
+
+import com.maxmind.geoip2.DatabaseReader;
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.SetOnce;
+import org.elasticsearch.common.CheckedSupplier;
+import org.elasticsearch.common.logging.Loggers;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Facilitates lazy loading of the database reader, so that when the geoip plugin is installed, but not used,
+ * no memory is being wasted on the database reader.
+ */
+final class DatabaseReaderLazyLoader implements Closeable {
+
+    private static final Logger LOGGER = Loggers.getLogger(DatabaseReaderLazyLoader.class);
+
+    private final String databaseFileName;
+    private final CheckedSupplier<DatabaseReader, IOException> loader;
+    // package protected for testing only:
+    final SetOnce<DatabaseReader> databaseReader;
+
+    DatabaseReaderLazyLoader(String databaseFileName, CheckedSupplier<DatabaseReader, IOException> loader) {
+        this.databaseFileName = databaseFileName;
+        this.loader = loader;
+        this.databaseReader = new SetOnce<>();
+    }
+
+    synchronized DatabaseReader get() throws IOException {
+        if (databaseReader.get() == null) {
+            databaseReader.set(loader.get());
+            LOGGER.debug("Loaded [{}] geoip database", databaseFileName);
+        }
+        return databaseReader.get();
+    }
+
+    @Override
+    public synchronized void close() throws IOException {
+        IOUtils.close(databaseReader.get());
+    }
+}
diff --git a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java
index 3d1418dc940..2cbaa7a3bb1 100644
--- a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java
+++ b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java
@@ -19,19 +19,6 @@
 
 package org.elasticsearch.ingest.geoip;
 
-import java.io.IOException;
-import java.net.InetAddress;
-import java.security.AccessController;
-import java.security.PrivilegedAction;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-
 import com.maxmind.geoip2.DatabaseReader;
 import com.maxmind.geoip2.exception.AddressNotFoundException;
 import com.maxmind.geoip2.model.CityResponse;
@@ -49,6 +36,19 @@ import org.elasticsearch.ingest.AbstractProcessor;
 import org.elasticsearch.ingest.IngestDocument;
 import org.elasticsearch.ingest.Processor;
 
+import java.io.IOException;
+import java.net.InetAddress;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
 import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
 import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
@@ -264,9 +264,9 @@ public final class GeoIpProcessor extends AbstractProcessor {
         );
         static final Set<Property> DEFAULT_COUNTRY_PROPERTIES = EnumSet.of(Property.CONTINENT_NAME, Property.COUNTRY_ISO_CODE);
 
-        private final Map<String, DatabaseReader> databaseReaders;
+        private final Map<String, DatabaseReaderLazyLoader> databaseReaders;
 
-        public Factory(Map<String, DatabaseReader> databaseReaders) {
+        public Factory(Map<String, DatabaseReaderLazyLoader> databaseReaders) {
             this.databaseReaders = databaseReaders;
         }
 
@@ -279,12 +279,13 @@ public final class GeoIpProcessor extends AbstractProcessor {
             List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
             boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
 
-            DatabaseReader databaseReader = databaseReaders.get(databaseFile);
-            if (databaseReader == null) {
+            DatabaseReaderLazyLoader lazyLoader = databaseReaders.get(databaseFile);
+            if (lazyLoader == null) {
                 throw newConfigurationException(TYPE, processorTag,
                     "database_file", "database file [" + databaseFile + "] doesn't exist");
             }
 
+            DatabaseReader databaseReader = lazyLoader.get();
             String databaseType = databaseReader.getMetadata().getDatabaseType();
 
             final Set<Property> properties;
diff --git a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java
index 4e5cc5c0237..1571bc99ea4 100644
--- a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java
+++ b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java
@@ -19,6 +19,15 @@
 
 package org.elasticsearch.ingest.geoip;
 
+import com.maxmind.db.NoCache;
+import com.maxmind.db.NodeCache;
+import com.maxmind.geoip2.DatabaseReader;
+import org.apache.lucene.util.IOUtils;
+import org.elasticsearch.common.settings.Setting;
+import org.elasticsearch.ingest.Processor;
+import org.elasticsearch.plugins.IngestPlugin;
+import org.elasticsearch.plugins.Plugin;
+
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.InputStream;
@@ -35,20 +44,11 @@ import java.util.Map;
 import java.util.stream.Stream;
 import java.util.zip.GZIPInputStream;
 
-import com.maxmind.db.NoCache;
-import com.maxmind.db.NodeCache;
-import com.maxmind.geoip2.DatabaseReader;
-import org.apache.lucene.util.IOUtils;
-import org.elasticsearch.common.settings.Setting;
-import org.elasticsearch.ingest.Processor;
-import org.elasticsearch.plugins.IngestPlugin;
-import org.elasticsearch.plugins.Plugin;
-
 public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable {
     public static final Setting<Long> CACHE_SIZE =
         Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope);
 
-    private Map<String, DatabaseReader> databaseReaders;
+    private Map<String, DatabaseReaderLazyLoader> databaseReaders;
 
     @Override
     public List<Setting<?>> getSettings() {
@@ -76,12 +76,12 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable
         return Collections.singletonMap(GeoIpProcessor.TYPE, new GeoIpProcessor.Factory(databaseReaders));
     }
 
-    static Map<String, DatabaseReader> loadDatabaseReaders(Path geoIpConfigDirectory, NodeCache cache) throws IOException {
+    static Map<String, DatabaseReaderLazyLoader> loadDatabaseReaders(Path geoIpConfigDirectory, NodeCache cache) throws IOException {
         if (Files.exists(geoIpConfigDirectory) == false && Files.isDirectory(geoIpConfigDirectory)) {
             throw new IllegalStateException("the geoip directory [" + geoIpConfigDirectory  + "] containing databases doesn't exist");
         }
 
-        Map<String, DatabaseReader> databaseReaders = new HashMap<>();
+        Map<String, DatabaseReaderLazyLoader> databaseReaders = new HashMap<>();
         try (Stream<Path> databaseFiles = Files.list(geoIpConfigDirectory)) {
             PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb.gz");
             // Use iterator instead of forEach otherwise IOException needs to be caught twice...
@@ -89,10 +89,13 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable
             while (iterator.hasNext()) {
                 Path databasePath = iterator.next();
                 if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
-                    try (InputStream inputStream = new GZIPInputStream(Files.newInputStream(databasePath, StandardOpenOption.READ))) {
-                        databaseReaders.put(databasePath.getFileName().toString(),
-                            new DatabaseReader.Builder(inputStream).withCache(cache).build());
-                    }
+                    String databaseFileName = databasePath.getFileName().toString();
+                    DatabaseReaderLazyLoader holder = new DatabaseReaderLazyLoader(databaseFileName, () -> {
+                        try (InputStream inputStream = new GZIPInputStream(Files.newInputStream(databasePath, StandardOpenOption.READ))) {
+                            return new DatabaseReader.Builder(inputStream).withCache(cache).build();
+                        }
+                    });
+                    databaseReaders.put(databaseFileName, holder);
                 }
             }
         }
diff --git a/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java b/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java
index 0c80bcc71fd..8db0d15f796 100644
--- a/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java
+++ b/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java
@@ -22,7 +22,6 @@ package org.elasticsearch.ingest.geoip;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import com.maxmind.db.NoCache;
 import com.maxmind.db.NodeCache;
-import com.maxmind.geoip2.DatabaseReader;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.Randomness;
 import org.elasticsearch.test.ESTestCase;
@@ -48,7 +47,7 @@ import static org.hamcrest.Matchers.sameInstance;
 
 public class GeoIpProcessorFactoryTests extends ESTestCase {
 
-    private static Map<String, DatabaseReader> databaseReaders;
+    private static Map<String, DatabaseReaderLazyLoader> databaseReaders;
 
     @BeforeClass
     public static void loadDatabaseReaders() throws IOException {
@@ -66,7 +65,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
 
     @AfterClass
     public static void closeDatabaseReaders() throws IOException {
-        for (DatabaseReader reader : databaseReaders.values()) {
+        for (DatabaseReaderLazyLoader reader : databaseReaders.values()) {
             reader.close();
         }
         databaseReaders = null;
@@ -222,4 +221,37 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
             assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
         }
     }
+
+    public void testLazyLoading() throws Exception {
+        Path configDir = createTempDir();
+        Path geoIpConfigDir = configDir.resolve("ingest-geoip");
+        Files.createDirectories(geoIpConfigDir);
+        Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb.gz")),
+            geoIpConfigDir.resolve("GeoLite2-City.mmdb.gz"));
+        Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb.gz")),
+            geoIpConfigDir.resolve("GeoLite2-Country.mmdb.gz"));
+
+        // Loading another database reader instances, because otherwise we can't test lazy loading as the the
+        // database readers used at class level are reused between tests. (we want to keep that otherwise running this
+        // test will take roughly 4 times more time)
+        Map<String, DatabaseReaderLazyLoader> databaseReaders =
+            IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir, NoCache.getInstance());
+        GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
+        for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {
+            assertNull(lazyLoader.databaseReader.get());
+        }
+
+        Map<String, Object> config = new HashMap<>();
+        config.put("field", "_field");
+        config.put("database_file", "GeoLite2-City.mmdb.gz");
+        factory.create(null, "_tag", config);
+        config = new HashMap<>();
+        config.put("field", "_field");
+        config.put("database_file", "GeoLite2-Country.mmdb.gz");
+        factory.create(null, "_tag", config);
+
+        for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {
+            assertNotNull(lazyLoader.databaseReader.get());
+        }
+    }
 }
diff --git a/plugins/repository-s3/src/main/java/org/elasticsearch/cloud/aws/InternalAwsS3Service.java b/plugins/repository-s3/src/main/java/org/elasticsearch/cloud/aws/InternalAwsS3Service.java
index ce47bd44f0b..cc5d69d61c7 100644
--- a/plugins/repository-s3/src/main/java/org/elasticsearch/cloud/aws/InternalAwsS3Service.java
+++ b/plugins/repository-s3/src/main/java/org/elasticsearch/cloud/aws/InternalAwsS3Service.java
@@ -150,18 +150,7 @@ public class InternalAwsS3Service extends AbstractLifecycleComponent implements
 
             if (key.length() == 0 && secret.length() == 0) {
                 logger.debug("Using instance profile credentials");
-                AWSCredentialsProvider credentials = new InstanceProfileCredentialsProvider();
-                return new AWSCredentialsProvider() {
-                    @Override
-                    public AWSCredentials getCredentials() {
-                        return SocketAccess.doPrivileged(credentials::getCredentials);
-                    }
-
-                    @Override
-                    public void refresh() {
-                        SocketAccess.doPrivilegedVoid(credentials::refresh);
-                    }
-                };
+                return new PrivilegedInstanceProfileCredentialsProvider();
             } else {
                 logger.debug("Using basic key/secret credentials");
                 return new StaticCredentialsProvider(new BasicAWSCredentials(key.toString(), secret.toString()));
@@ -221,4 +210,22 @@ public class InternalAwsS3Service extends AbstractLifecycleComponent implements
         // Ensure that IdleConnectionReaper is shutdown
         IdleConnectionReaper.shutdown();
     }
+
+    static class PrivilegedInstanceProfileCredentialsProvider implements AWSCredentialsProvider {
+        private final InstanceProfileCredentialsProvider credentials;
+
+        private PrivilegedInstanceProfileCredentialsProvider() {
+            this.credentials = new InstanceProfileCredentialsProvider();
+        }
+
+        @Override
+        public AWSCredentials getCredentials() {
+            return SocketAccess.doPrivileged(credentials::getCredentials);
+        }
+
+        @Override
+        public void refresh() {
+            SocketAccess.doPrivilegedVoid(credentials::refresh);
+        }
+    }
 }
diff --git a/plugins/repository-s3/src/test/java/org/elasticsearch/cloud/aws/AwsS3ServiceImplTests.java b/plugins/repository-s3/src/test/java/org/elasticsearch/cloud/aws/AwsS3ServiceImplTests.java
index 73252102c2f..09a3222d63e 100644
--- a/plugins/repository-s3/src/test/java/org/elasticsearch/cloud/aws/AwsS3ServiceImplTests.java
+++ b/plugins/repository-s3/src/test/java/org/elasticsearch/cloud/aws/AwsS3ServiceImplTests.java
@@ -37,7 +37,7 @@ public class AwsS3ServiceImplTests extends ESTestCase {
     public void testAWSCredentialsWithSystemProviders() {
         AWSCredentialsProvider credentialsProvider =
             InternalAwsS3Service.buildCredentials(logger, deprecationLogger, Settings.EMPTY, Settings.EMPTY, "default");
-        assertThat(credentialsProvider, instanceOf(AWSCredentialsProvider.class));
+        assertThat(credentialsProvider, instanceOf(InternalAwsS3Service.PrivilegedInstanceProfileCredentialsProvider.class));
     }
 
     public void testAwsCredsDefaultSettings() {
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml
index 72f782e68d1..644e8c4ec5a 100644
--- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml
@@ -28,7 +28,7 @@ setup:
 ---
 "Basic":
   - skip:
-      version: " - 5.2.99"
+      version: " - 5.99.99"
       reason:  this uses a new highlighter that has been added in 5.3
   - do:
       search: