Merge branch 'master' into hlclient/add-delete-method

2025-03-01 16:39:11 +00:00 · 2017-02-24 09:23:03 +01:00 · 2017-02-24 09:23:03 +01:00 · 3e4b917066
commit 3e4b917066
parent 4ebc6dd0d0 211d50f7b8
22 changed files with 797 additions and 1251 deletions
--- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@ -140,6 +140,7 @@ import org.elasticsearch.index.analysis.UniqueTokenFilterFactory;
 import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
 import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
 import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
 import org.elasticsearch.index.analysis.WordDelimiterGraphTokenFilterFactory;
 import org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory;
 import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
 import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
@ -225,6 +226,7 @@ public final class AnalysisModule {
        tokenFilters.register("snowball", SnowballTokenFilterFactory::new);
        tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
        tokenFilters.register("word_delimiter", WordDelimiterTokenFilterFactory::new);
        tokenFilters.register("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
        tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
        tokenFilters.register("elision", ElisionTokenFilterFactory::new);
        tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
--- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
@ -51,6 +51,7 @@ import org.apache.lucene.analysis.miscellaneous.TrimFilter;
 import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
 import org.apache.lucene.analysis.ngram.NGramTokenFilter;
 import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
@ -87,6 +88,18 @@ public enum PreBuiltTokenFilters {
        }
    },
    WORD_DELIMITER_GRAPH(CachingStrategy.ONE) {
        @Override
        public TokenStream create(TokenStream tokenStream, Version version) {
            return new WordDelimiterGraphFilter(tokenStream,
                WordDelimiterGraphFilter.GENERATE_WORD_PARTS |
                    WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS |
                    WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE |
                    WordDelimiterGraphFilter.SPLIT_ON_NUMERICS |
                    WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null);
        }
    },
    STOP(CachingStrategy.LUCENE) {
        @Override
        public TokenStream create(TokenStream tokenStream, Version version) {
--- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java
@ -21,6 +21,7 @@ package org.elasticsearch.search.fetch.subphase.highlight;
 import org.apache.lucene.search.highlight.SimpleFragmenter;
 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.support.ToXContentToBytes;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
@ -32,10 +33,12 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryParseContext;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Order;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.function.BiFunction;
@ -57,8 +60,10 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
    public static final ParseField NUMBER_OF_FRAGMENTS_FIELD = new ParseField("number_of_fragments");
    public static final ParseField ENCODER_FIELD = new ParseField("encoder");
    public static final ParseField REQUIRE_FIELD_MATCH_FIELD = new ParseField("require_field_match");
    public static final ParseField BOUNDARY_SCANNER_FIELD = new ParseField("boundary_scanner");
    public static final ParseField BOUNDARY_MAX_SCAN_FIELD = new ParseField("boundary_max_scan");
    public static final ParseField BOUNDARY_CHARS_FIELD = new ParseField("boundary_chars");
    public static final ParseField BOUNDARY_SCANNER_LOCALE_FIELD = new ParseField("boundary_scanner_locale");
    public static final ParseField TYPE_FIELD = new ParseField("type");
    public static final ParseField FRAGMENTER_FIELD = new ParseField("fragmenter");
    public static final ParseField NO_MATCH_SIZE_FIELD = new ParseField("no_match_size");
@ -88,10 +93,14 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
    protected Boolean forceSource;
    protected BoundaryScannerType boundaryScannerType;
    protected Integer boundaryMaxScan;
    protected char[] boundaryChars;
    protected Locale boundaryScannerLocale;
    protected Integer noMatchSize;
    protected Integer phraseLimit;
@ -119,10 +128,18 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
        order(in.readOptionalWriteable(Order::readFromStream));
        highlightFilter(in.readOptionalBoolean());
        forceSource(in.readOptionalBoolean());
        if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
            boundaryScannerType(in.readOptionalWriteable(BoundaryScannerType::readFromStream));
        }
        boundaryMaxScan(in.readOptionalVInt());
        if (in.readBoolean()) {
            boundaryChars(in.readString().toCharArray());
        }
        if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
            if (in.readBoolean()) {
                boundaryScannerLocale(in.readString());
            }
        }
        noMatchSize(in.readOptionalVInt());
        phraseLimit(in.readOptionalVInt());
        if (in.readBoolean()) {
@ -150,12 +167,22 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
        out.writeOptionalWriteable(order);
        out.writeOptionalBoolean(highlightFilter);
        out.writeOptionalBoolean(forceSource);
        if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
            out.writeOptionalWriteable(boundaryScannerType);
        }
        out.writeOptionalVInt(boundaryMaxScan);
        boolean hasBounaryChars = boundaryChars != null;
        out.writeBoolean(hasBounaryChars);
        if (hasBounaryChars) {
            out.writeString(String.valueOf(boundaryChars));
        }
        if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
            boolean hasBoundaryScannerLocale = boundaryScannerLocale != null;
            out.writeBoolean(hasBoundaryScannerLocale);
            if (hasBoundaryScannerLocale) {
                out.writeString(boundaryScannerLocale.toLanguageTag());
            }
        }
        out.writeOptionalVInt(noMatchSize);
        out.writeOptionalVInt(phraseLimit);
        boolean hasOptions = options != null;
@ -331,6 +358,33 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
        return this.highlightFilter;
    }
    /**
     * When using the highlighterType <tt>fvh</tt> this setting
     * controls which scanner to use for fragment boundaries, and defaults to "simple".
     */
    @SuppressWarnings("unchecked")
    public HB boundaryScannerType(String boundaryScannerType) {
        this.boundaryScannerType = BoundaryScannerType.fromString(boundaryScannerType);
        return (HB) this;
    }
    /**
     * When using the highlighterType <tt>fvh</tt> this setting
     * controls which scanner to use for fragment boundaries, and defaults to "simple".
     */
    @SuppressWarnings("unchecked")
    public HB boundaryScannerType(BoundaryScannerType boundaryScannerType) {
        this.boundaryScannerType = boundaryScannerType;
        return (HB) this;
    }
    /**
     * @return the value set by {@link #boundaryScannerType(String)}
     */
    public BoundaryScannerType boundaryScannerType() {
        return this.boundaryScannerType;
    }
    /**
     * When using the highlighterType <tt>fvh</tt> this setting
     * controls how far to look for boundary characters, and defaults to 20.
@ -366,6 +420,25 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
        return this.boundaryChars;
    }
    /**
     * When using the highlighterType <tt>fvh</tt> and boundaryScannerType <tt>break_iterator</tt>, this setting
     * controls the locale to use by the BreakIterator, defaults to "root".
     */
    @SuppressWarnings("unchecked")
    public HB boundaryScannerLocale(String boundaryScannerLocale) {
        if (boundaryScannerLocale != null) {
            this.boundaryScannerLocale = Locale.forLanguageTag(boundaryScannerLocale);
        }
        return (HB) this;
    }
    /**
     * @return the value set by {@link #boundaryScannerLocale(String)}
     */
    public Locale boundaryScannerLocale() {
        return this.boundaryScannerLocale;
    }
    /**
     * Allows to set custom options for custom highlighters.
     */
@ -491,12 +564,18 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
        if (highlightFilter != null) {
            builder.field(HIGHLIGHT_FILTER_FIELD.getPreferredName(), highlightFilter);
        }
        if (boundaryScannerType != null) {
            builder.field(BOUNDARY_SCANNER_FIELD.getPreferredName(), boundaryScannerType.name());
        }
        if (boundaryMaxScan != null) {
            builder.field(BOUNDARY_MAX_SCAN_FIELD.getPreferredName(), boundaryMaxScan);
        }
        if (boundaryChars != null) {
            builder.field(BOUNDARY_CHARS_FIELD.getPreferredName(), new String(boundaryChars));
        }
        if (boundaryScannerLocale != null) {
            builder.field(BOUNDARY_SCANNER_LOCALE_FIELD.getPreferredName(), boundaryScannerLocale.toLanguageTag());
        }
        if (options != null && options.size() > 0) {
            builder.field(OPTIONS_FIELD.getPreferredName(), options);
        }
@ -523,8 +602,10 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
        parser.declareInt(HB::fragmentSize, FRAGMENT_SIZE_FIELD);
        parser.declareInt(HB::numOfFragments, NUMBER_OF_FRAGMENTS_FIELD);
        parser.declareBoolean(HB::requireFieldMatch, REQUIRE_FIELD_MATCH_FIELD);
        parser.declareString(HB::boundaryScannerType, BOUNDARY_SCANNER_FIELD);
        parser.declareInt(HB::boundaryMaxScan, BOUNDARY_MAX_SCAN_FIELD);
        parser.declareString((HB hb, String bc) -> hb.boundaryChars(bc.toCharArray()) , BOUNDARY_CHARS_FIELD);
        parser.declareString(HB::boundaryScannerLocale, BOUNDARY_SCANNER_LOCALE_FIELD);
        parser.declareString(HB::highlighterType, TYPE_FIELD);
        parser.declareString(HB::fragmenter, FRAGMENTER_FIELD);
        parser.declareInt(HB::noMatchSize, NO_MATCH_SIZE_FIELD);
@ -562,8 +643,8 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
    public final int hashCode() {
        return Objects.hash(getClass(), Arrays.hashCode(preTags), Arrays.hashCode(postTags), fragmentSize,
                numOfFragments, highlighterType, fragmenter, highlightQuery, order, highlightFilter,
-                forceSource, boundaryMaxScan, Arrays.hashCode(boundaryChars), noMatchSize,
+                forceSource, boundaryScannerType, boundaryMaxScan, Arrays.hashCode(boundaryChars), boundaryScannerLocale,
-                phraseLimit, options, requireFieldMatch, doHashCode());
+                noMatchSize, phraseLimit, options, requireFieldMatch, doHashCode());
    }
    /**
@ -591,8 +672,10 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
               Objects.equals(order, other.order) &&
               Objects.equals(highlightFilter, other.highlightFilter) &&
               Objects.equals(forceSource, other.forceSource) &&
               Objects.equals(boundaryScannerType, other.boundaryScannerType) &&
               Objects.equals(boundaryMaxScan, other.boundaryMaxScan) &&
               Arrays.equals(boundaryChars, other.boundaryChars) &&
               Objects.equals(boundaryScannerLocale, other.boundaryScannerLocale) &&
               Objects.equals(noMatchSize, other.noMatchSize) &&
               Objects.equals(phraseLimit, other.phraseLimit) &&
               Objects.equals(options, other.options) &&
--- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java
@ -21,6 +21,7 @@ package org.elasticsearch.search.fetch.subphase.highlight;
 import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder;
 import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
 import org.apache.lucene.search.vectorhighlight.FieldFragList;
 import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
@ -38,15 +39,23 @@ import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
 import org.elasticsearch.search.fetch.FetchSubPhase;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.FieldOptions;
 import org.elasticsearch.search.internal.SearchContext;
 import java.text.BreakIterator;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
 public class FastVectorHighlighter implements Highlighter {
-    private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
+    private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
    private static final BoundaryScanner DEFAULT_SENTENCE_BOUNDARY_SCANNER = new BreakIteratorBoundaryScanner(
            BreakIterator.getSentenceInstance(Locale.ROOT));
    private static final BoundaryScanner DEFAULT_WORD_BOUNDARY_SCANNER = new BreakIteratorBoundaryScanner(
            BreakIterator.getWordInstance(Locale.ROOT));
    public static final Setting<Boolean> SETTING_TV_HIGHLIGHT_MULTI_VALUE = Setting.boolSetting("search.highlight.term_vector_multi_value",
        true, Setting.Property.NodeScope);
@ -105,12 +114,7 @@ public class FastVectorHighlighter implements Highlighter {
                FragListBuilder fragListBuilder;
                BaseFragmentsBuilder fragmentsBuilder;
-                BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER;
+                final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
                if (field.fieldOptions().boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
                        || field.fieldOptions().boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
                    boundaryScanner = new SimpleBoundaryScanner(field.fieldOptions().boundaryMaxScan(),
                            field.fieldOptions().boundaryChars());
                }
                boolean forceSource = context.highlight().forceSource(field);
                if (field.fieldOptions().numberOfFragments() == 0) {
                    fragListBuilder = new SingleFragListBuilder();
@ -206,6 +210,29 @@ public class FastVectorHighlighter implements Highlighter {
                && fieldMapper.fieldType().storeTermVectorPositions();
    }
    private static BoundaryScanner getBoundaryScanner(Field field) {
        final FieldOptions fieldOptions = field.fieldOptions();
        final Locale boundaryScannerLocale = fieldOptions.boundaryScannerLocale();
        switch(fieldOptions.boundaryScannerType()) {
        case SENTENCE:
            if (boundaryScannerLocale != null) {
                return new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(boundaryScannerLocale));
            }
            return DEFAULT_SENTENCE_BOUNDARY_SCANNER;
        case WORD:
            if (boundaryScannerLocale != null) {
                return new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(boundaryScannerLocale));
            }
            return DEFAULT_WORD_BOUNDARY_SCANNER;
        default:
            if (fieldOptions.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
                    || fieldOptions.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
                return new SimpleBoundaryScanner(fieldOptions.boundaryMaxScan(), fieldOptions.boundaryChars());
            }
            return DEFAULT_SIMPLE_BOUNDARY_SCANNER;
        }
    }
    private class MapperHighlightEntry {
        public FragListBuilder fragListBuilder;
        public FragmentsBuilder fragmentsBuilder;
--- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilder.java
@ -95,9 +95,9 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
            .preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(DEFAULT_SCORE_ORDERED)
            .highlightFilter(DEFAULT_HIGHLIGHT_FILTER).requireFieldMatch(DEFAULT_REQUIRE_FIELD_MATCH)
            .forceSource(DEFAULT_FORCE_SOURCE).fragmentCharSize(DEFAULT_FRAGMENT_CHAR_SIZE)
-            .numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER)
+            .numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER).boundaryScannerType(BoundaryScannerType.CHARS)
            .boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN).boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS)
-            .noMatchSize(DEFAULT_NO_MATCH_SIZE).phraseLimit(DEFAULT_PHRASE_LIMIT).build();
+            .boundaryScannerLocale(Locale.ROOT).noMatchSize(DEFAULT_NO_MATCH_SIZE).phraseLimit(DEFAULT_PHRASE_LIMIT).build();
    private final List<Field> fields = new ArrayList<>();
@ -327,12 +327,18 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
        if (highlighterBuilder.requireFieldMatch != null) {
            targetOptionsBuilder.requireFieldMatch(highlighterBuilder.requireFieldMatch);
        }
        if (highlighterBuilder.boundaryScannerType != null) {
            targetOptionsBuilder.boundaryScannerType(highlighterBuilder.boundaryScannerType);
        }
        if (highlighterBuilder.boundaryMaxScan != null) {
            targetOptionsBuilder.boundaryMaxScan(highlighterBuilder.boundaryMaxScan);
        }
        if (highlighterBuilder.boundaryChars != null) {
            targetOptionsBuilder.boundaryChars(convertCharArray(highlighterBuilder.boundaryChars));
        }
        if (highlighterBuilder.boundaryScannerLocale != null) {
            targetOptionsBuilder.boundaryScannerLocale(highlighterBuilder.boundaryScannerLocale);
        }
        if (highlighterBuilder.highlighterType != null) {
            targetOptionsBuilder.highlighterType(highlighterBuilder.highlighterType);
        }
@ -522,4 +528,30 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
            return name().toLowerCase(Locale.ROOT);
        }
    }
    public enum BoundaryScannerType implements Writeable {
        CHARS, WORD, SENTENCE;
        public static BoundaryScannerType readFromStream(StreamInput in) throws IOException {
            int ordinal = in.readVInt();
            if (ordinal < 0 || ordinal >= values().length) {
                throw new IOException("Unknown BoundaryScannerType ordinal [" + ordinal + "]");
            }
            return values()[ordinal];
        }
        @Override
        public void writeTo(StreamOutput out) throws IOException {
            out.writeVInt(this.ordinal());
        }
        public static BoundaryScannerType fromString(String boundaryScannerType) {
            return valueOf(boundaryScannerType.toUpperCase(Locale.ROOT));
        }
        @Override
        public String toString() {
            return name().toLowerCase(Locale.ROOT);
        }
    }
 }
--- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchContextHighlight.java
@ -20,11 +20,13 @@
 package org.elasticsearch.search.fetch.subphase.highlight;
 import org.apache.lucene.search.Query;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
@ -110,10 +112,14 @@ public class SearchContextHighlight {
        private String fragmenter;
        private BoundaryScannerType boundaryScannerType;
        private int boundaryMaxScan = -1;
        private Character[] boundaryChars = null;
        private Locale boundaryScannerLocale;
        private Query highlightQuery;
        private int noMatchSize = -1;
@ -168,6 +174,10 @@ public class SearchContextHighlight {
            return fragmenter;
        }
        public BoundaryScannerType boundaryScannerType() {
            return boundaryScannerType;
        }
        public int boundaryMaxScan() {
            return boundaryMaxScan;
        }
@ -176,6 +186,10 @@ public class SearchContextHighlight {
            return boundaryChars;
        }
        public Locale boundaryScannerLocale() {
            return boundaryScannerLocale;
        }
        public Query highlightQuery() {
            return highlightQuery;
        }
@ -260,6 +274,11 @@ public class SearchContextHighlight {
                return this;
            }
            Builder boundaryScannerType(BoundaryScannerType boundaryScanner) {
                fieldOptions.boundaryScannerType = boundaryScanner;
                return this;
            }
            Builder boundaryMaxScan(int boundaryMaxScan) {
                fieldOptions.boundaryMaxScan = boundaryMaxScan;
                return this;
@ -270,6 +289,11 @@ public class SearchContextHighlight {
                return this;
            }
            Builder boundaryScannerLocale(Locale boundaryScannerLocale) {
                fieldOptions.boundaryScannerLocale = boundaryScannerLocale;
                return this;
            }
            Builder highlightQuery(Query highlightQuery) {
                fieldOptions.highlightQuery = highlightQuery;
                return this;
@ -324,12 +348,18 @@ public class SearchContextHighlight {
                if (fieldOptions.requireFieldMatch == null) {
                    fieldOptions.requireFieldMatch = globalOptions.requireFieldMatch;
                }
                if (fieldOptions.boundaryScannerType == null) {
                    fieldOptions.boundaryScannerType = globalOptions.boundaryScannerType;
                }
                if (fieldOptions.boundaryMaxScan == -1) {
                    fieldOptions.boundaryMaxScan = globalOptions.boundaryMaxScan;
                }
                if (fieldOptions.boundaryChars == null && globalOptions.boundaryChars != null) {
                    fieldOptions.boundaryChars = Arrays.copyOf(globalOptions.boundaryChars, globalOptions.boundaryChars.length);
                }
                if (fieldOptions.boundaryScannerLocale == null) {
                    fieldOptions.boundaryScannerLocale = globalOptions.boundaryScannerLocale;
                }
                if (fieldOptions.highlighterType == null) {
                    fieldOptions.highlighterType = globalOptions.highlighterType;
                }
--- a/core/src/test/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterTests.java
+++ b/core/src/test/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterTests.java
--- a/core/src/test/java/org/elasticsearch/index/analysis/BaseWordDelimiterTokenFilterFactoryTestCase.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/BaseWordDelimiterTokenFilterFactoryTestCase.java
@ -0,0 +1,146 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.index.analysis;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.ESTokenStreamTestCase;
 import java.io.IOException;
 import java.io.StringReader;
 /**
 * Base class to test {@link WordDelimiterTokenFilterFactory}  and {@link WordDelimiterGraphTokenFilterFactory}
 */
 public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends ESTokenStreamTestCase {
    final String type;
    public BaseWordDelimiterTokenFilterFactoryTestCase(String type) {
        this.type = type;
    }
    public void testDefault() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", type)
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi",
            "fi", "4000", "j", "2", "se", "O", "Neil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testCatenateWords() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", type)
                .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"PowerShot", "500", "42", "wifi", "wifi", "4000", "j", "2", "se", "ONeil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testCatenateNumbers() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", type)
                .put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
                .put("index.analysis.filter.my_word_delimiter.catenate_numbers", "true")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"Power", "Shot", "50042", "wi", "fi", "wi", "fi", "4000", "j", "2",
            "se", "O", "Neil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testCatenateAll() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", type)
                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
                .put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
                .put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"PowerShot", "50042", "wifi", "wifi4000", "j2se", "ONeil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testSplitOnCaseChange() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", type)
                .put("index.analysis.filter.my_word_delimiter.split_on_case_change", "false")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot";
        String[] expected = new String[]{"PowerShot"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testPreserveOriginal() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", type)
                .put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"PowerShot", "Power", "Shot", "500-42", "500", "42", "wi-fi", "wi", "fi",
            "wi-fi-4000", "wi", "fi", "4000", "j2se", "j", "2", "se", "O'Neil's", "O", "Neil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testStemEnglishPossessive() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", type)
                .put("index.analysis.filter.my_word_delimiter.stem_english_possessive", "false")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2",
            "se", "O", "Neil", "s"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
 }
--- a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterGraphTokenFilterFactoryTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterGraphTokenFilterFactoryTests.java
@ -0,0 +1,75 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.index.analysis;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.test.ESTestCase;
 import java.io.IOException;
 import java.io.StringReader;
 public class WordDelimiterGraphTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase {
    public WordDelimiterGraphTokenFilterFactoryTests() {
        super("word_delimiter_graph");
    }
    public void testMultiTerms() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .put("index.analysis.filter.my_word_delimiter.type", type)
            .put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
            .put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
            .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"PowerShot", "PowerShot", "Power", "Shot", "50042", "500-42", "500", "42",
            "wifi", "wi-fi", "wi", "fi", "wifi4000", "wi-fi-4000", "wi", "fi", "4000", "j2se", "j2se", "j", "2", "se",
            "ONeil", "O'Neil's", "O", "Neil" };
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        int[] expectedIncr = new int[]{1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1};
        int[] expectedPosLen = new int[]{2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 1, 1, 1, 3, 3, 1, 1, 1, 2, 2, 1, 1};
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected, null, null, null,
                expectedIncr, expectedPosLen, null);
    }
    /** Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power */
    public void testPartsAndCatenate() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .put("index.analysis.filter.my_word_delimiter.type", type)
            .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
            .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
            .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot";
        int[] expectedIncr = new int[]{1, 0, 1};
        int[] expectedPosLen = new int[]{2, 1, 1};
        String[] expected = new String[]{"PowerShot", "Power", "Shot" };
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected, null, null, null,
            expectedIncr, expectedPosLen, null);
    }
 }
--- a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java
@ -24,118 +24,20 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.ESTokenStreamTestCase;
 import java.io.IOException;
 import java.io.StringReader;
-public class WordDelimiterTokenFilterFactoryTests extends ESTokenStreamTestCase {
+public class WordDelimiterTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase {
-    public void testDefault() throws IOException {
+    public WordDelimiterTokenFilterFactoryTests() {
-        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
+        super("word_delimiter");
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testCatenateWords() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
                .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"PowerShot", "500", "42", "wifi", "wifi", "4000", "j", "2", "se", "ONeil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testCatenateNumbers() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
                .put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
                .put("index.analysis.filter.my_word_delimiter.catenate_numbers", "true")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"Power", "Shot", "50042", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testCatenateAll() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
                .put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
                .put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"PowerShot", "50042", "wifi", "wifi4000", "j2se", "ONeil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testSplitOnCaseChange() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
                .put("index.analysis.filter.my_word_delimiter.split_on_case_change", "false")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot";
        String[] expected = new String[]{"PowerShot"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testPreserveOriginal() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
                .put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"PowerShot", "Power", "Shot", "500-42", "500", "42", "wi-fi", "wi", "fi", "wi-fi-4000", "wi", "fi", "4000", "j2se", "j", "2", "se", "O'Neil's", "O", "Neil"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testStemEnglishPossessive() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
                .put("index.analysis.filter.my_word_delimiter.stem_english_possessive", "false")
                .build());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
        String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
        String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil", "s"};
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    /** Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power */
    public void testPartsAndCatenate() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
+            .put("index.analysis.filter.my_word_delimiter.type", type)
            .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
            .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
            .build());
--- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilderTests.java
+++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightBuilderTests.java
@ -47,6 +47,7 @@ import org.elasticsearch.index.query.QueryParseContext;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.index.query.TermQueryBuilder;
 import org.elasticsearch.search.SearchModule;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Field;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Order;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.FieldOptions;
@ -288,6 +289,7 @@ public class HighlightBuilderTests extends ESTestCase {
                        mergeBeforeChek(highlightBuilder, fieldBuilder, fieldOptions);
                checkSame.accept(AbstractHighlighterBuilder::boundaryChars, FieldOptions::boundaryChars);
                checkSame.accept(AbstractHighlighterBuilder::boundaryScannerType, FieldOptions::boundaryScannerType);
                checkSame.accept(AbstractHighlighterBuilder::boundaryMaxScan, FieldOptions::boundaryMaxScan);
                checkSame.accept(AbstractHighlighterBuilder::fragmentSize, FieldOptions::fragmentCharSize);
                checkSame.accept(AbstractHighlighterBuilder::fragmenter, FieldOptions::fragmenter);
@ -557,12 +559,23 @@ public class HighlightBuilderTests extends ESTestCase {
        if (randomBoolean()) {
            highlightBuilder.forceSource(randomBoolean());
        }
        if (randomBoolean()) {
            if (randomBoolean()) {
                highlightBuilder.boundaryScannerType(randomFrom(BoundaryScannerType.values()));
            } else {
                // also test the string setter
                highlightBuilder.boundaryScannerType(randomFrom(BoundaryScannerType.values()).toString());
            }
        }
        if (randomBoolean()) {
            highlightBuilder.boundaryMaxScan(randomIntBetween(0, 10));
        }
        if (randomBoolean()) {
            highlightBuilder.boundaryChars(randomAsciiOfLengthBetween(1, 10).toCharArray());
        }
        if (randomBoolean()) {
            highlightBuilder.boundaryScannerLocale(randomLocale(random()).toLanguageTag());
        }
        if (randomBoolean()) {
            highlightBuilder.noMatchSize(randomIntBetween(0, 10));
        }
--- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
+++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
@ -44,6 +44,7 @@ import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Field;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.test.ESIntegTestCase;
@ -57,6 +58,7 @@ import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
 import static org.elasticsearch.client.Requests.searchRequest;
@ -747,7 +749,94 @@ public class HighlighterSearchIT extends ESIntegTestCase {
        searchResponse = client().prepareSearch("test").setSource(source).get();
        assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over"));
    }
    public void testFastVectorHighlighterWithSentenceBoundaryScanner() throws Exception {
        assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
        ensureGreen();
        indexRandom(true, client().prepareIndex("test", "type1")
                .setSource("field1", "A sentence with few words. Another sentence with even more words."));
        logger.info("--> highlighting and searching on 'field' with sentence boundary_scanner");
        SearchSourceBuilder source = searchSource()
                .query(termQuery("field1", "sentence"))
                .highlighter(highlight()
                        .field("field1", 20, 2)
                        .order("score")
                        .preTags("<xxx>").postTags("</xxx>")
                        .boundaryScannerType(BoundaryScannerType.SENTENCE));
        SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
        assertHighlight(searchResponse, 0, "field1", 0, 2, equalTo("A <xxx>sentence</xxx> with few words. "));
        assertHighlight(searchResponse, 0, "field1", 1, 2, equalTo("Another <xxx>sentence</xxx> with even more words. "));
    }
    public void testFastVectorHighlighterWithSentenceBoundaryScannerAndLocale() throws Exception {
        assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
        ensureGreen();
        indexRandom(true, client().prepareIndex("test", "type1")
                .setSource("field1", "A sentence with few words. Another sentence with even more words."));
        logger.info("--> highlighting and searching on 'field' with sentence boundary_scanner");
        SearchSourceBuilder source = searchSource()
                .query(termQuery("field1", "sentence"))
                .highlighter(highlight()
                        .field("field1", 20, 2)
                        .order("score")
                        .preTags("<xxx>").postTags("</xxx>")
                        .boundaryScannerType(BoundaryScannerType.SENTENCE)
                        .boundaryScannerLocale(Locale.ENGLISH.toLanguageTag()));
        SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
        assertHighlight(searchResponse, 0, "field1", 0, 2, equalTo("A <xxx>sentence</xxx> with few words. "));
        assertHighlight(searchResponse, 0, "field1", 1, 2, equalTo("Another <xxx>sentence</xxx> with even more words. "));
    }
    public void testFastVectorHighlighterWithWordBoundaryScanner() throws Exception {
        assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
        ensureGreen();
        indexRandom(true, client().prepareIndex("test", "type1")
                .setSource("field1", "some quick and hairy brown:fox jumped over the lazy dog"));
        logger.info("--> highlighting and searching on 'field' with word boundary_scanner");
        SearchSourceBuilder source = searchSource()
                .query(termQuery("field1", "some"))
                .highlighter(highlight()
                        .field("field1", 23, 1)
                        .order("score")
                        .preTags("<xxx>").postTags("</xxx>")
                        .boundaryScannerType(BoundaryScannerType.WORD));
        SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
        assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<xxx>some</xxx> quick and hairy brown"));
    }
    public void testFastVectorHighlighterWithWordBoundaryScannerAndLocale() throws Exception {
        assertAcked(prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
        ensureGreen();
        indexRandom(true, client().prepareIndex("test", "type1")
                .setSource("field1", "some quick and hairy brown:fox jumped over the lazy dog"));
        logger.info("--> highlighting and searching on 'field' with word boundary_scanner");
        SearchSourceBuilder source = searchSource()
                .query(termQuery("field1", "some"))
                .highlighter(highlight()
                        .field("field1", 23, 1)
                        .order("score")
                        .preTags("<xxx>").postTags("</xxx>")
                        .boundaryScannerType(BoundaryScannerType.WORD)
                        .boundaryScannerLocale(Locale.ENGLISH.toLanguageTag()));
        SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
        assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<xxx>some</xxx> quick and hairy brown"));
    }
    /**
--- a/docs/build.gradle
+++ b/docs/build.gradle
@ -81,6 +81,7 @@ buildRestTests.expectedUnconvertedCandidates = [
  'reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc',
  'reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc',
  'reference/analysis/tokenfilters/word-delimiter-tokenfilter.asciidoc',
  'reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc',
  'reference/cat/snapshots.asciidoc',
  'reference/cat/templates.asciidoc',
  'reference/cat/thread_pool.asciidoc',
--- a/docs/reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc
@ -0,0 +1,97 @@
 [[analysis-word-delimiter-graph-tokenfilter]]
 === Word Delimiter Graph Token Filter
 experimental[]
 Named `word_delimiter_graph`, it splits words into subwords and performs
 optional transformations on subword groups. Words are split into
 subwords with the following rules:
 * split on intra-word delimiters (by default, all non alpha-numeric
 characters).
 * "Wi-Fi" -> "Wi", "Fi"
 * split on case transitions: "PowerShot" -> "Power", "Shot"
 * split on letter-number transitions: "SD500" -> "SD", "500"
 * leading and trailing intra-word delimiters on each subword are
 ignored: "//hello---there, 'dude'" -> "hello", "there", "dude"
 * trailing "'s" are removed for each subword: "O'Neil's" -> "O", "Neil"
 Unlike the `word_delimiter`, this token filter correctly handles positions for
 multi terms expansion at search-time when any of the following options
 are set to true:
 * `preserve_original`
 * `catenate_numbers`
 * `catenate_words`
 * `catenate_all`
 Parameters include:
 `generate_word_parts`::
    If `true` causes parts of words to be
    generated: "PowerShot" => "Power" "Shot". Defaults to `true`.
 `generate_number_parts`::
    If `true` causes number subwords to be
    generated: "500-42" => "500" "42". Defaults to `true`.
 `catenate_words`::
    If `true` causes maximum runs of word parts to be
    catenated: "wi-fi" => "wifi". Defaults to `false`.
 `catenate_numbers`::
    If `true` causes maximum runs of number parts to
    be catenated: "500-42" => "50042". Defaults to `false`.
 `catenate_all`::
    If `true` causes all subword parts to be catenated:
    "wi-fi-4000" => "wifi4000". Defaults to `false`.
 `split_on_case_change`::
    If `true` causes "PowerShot" to be two tokens;
    ("Power-Shot" remains two parts regards). Defaults to `true`.
 `preserve_original`::
    If `true` includes original words in subwords:
    "500-42" => "500-42" "500" "42". Defaults to `false`.
 `split_on_numerics`::
    If `true` causes "j2se" to be three tokens; "j"
    "2" "se". Defaults to `true`.
 `stem_english_possessive`::
    If `true` causes trailing "'s" to be
    removed for each subword: "O'Neil's" => "O", "Neil". Defaults to `true`.
 Advance settings include:
 `protected_words`::
    A list of protected words from being delimiter.
    Either an array, or also can set `protected_words_path` which resolved
    to a file configured with protected words (one on each line).
    Automatically resolves to `config/` based location if exists.
 `type_table`::
    A custom type mapping table, for example (when configured
    using `type_table_path`):
 [source,js]
 --------------------------------------------------
    # Map the $, %, '.', and ',' characters to DIGIT
    # This might be useful for financial data.
    $ => DIGIT
    % => DIGIT
    . => DIGIT
    \\u002C => DIGIT
    # in some cases you might not want to split on ZWJ
    # this also tests the case where we need a bigger byte[]
    # see http://en.wikipedia.org/wiki/Zero-width_joiner
    \\u200D => ALPHANUM
 --------------------------------------------------
 NOTE: Using a tokenizer like the `standard` tokenizer may interfere with
 the `catenate_*` and `preserve_original` parameters, as the original
 string may already have lost punctuation during tokenization.  Instead,
 you may want to use the `whitespace` tokenizer.
--- a/docs/reference/search/request/highlighting.asciidoc
+++ b/docs/reference/search/request/highlighting.asciidoc
@ -103,8 +103,7 @@ If `term_vector` information is provided by setting `term_vector` to
 will be used instead of the plain highlighter.  The fast vector highlighter:
 * Is faster especially for large fields (> `1MB`)
-* Can be customized with `boundary_chars`, `boundary_max_scan`, and
+* Can be customized with `boundary_scanner` (see <<boundary-scanners,below>>)
 `fragment_offset` (see <<boundary-characters,below>>)
 * Requires setting `term_vector` to `with_positions_offsets` which
  increases the size of the index
 * Can combine matches from multiple fields into one result.  See
@ -502,17 +501,23 @@ GET /_search
 --------------------------------------------------
 // CONSOLE
-[[boundary-characters]]
+[[boundary-scanners]]
-==== Boundary Characters
+==== Boundary Scanners
-When highlighting a field using the fast vector highlighter,
+When highlighting a field using the fast vector highlighter, you can specify
-`boundary_chars` can be configured to define what constitutes a boundary
+how to break the highlighted fragments using `boundary_scanner`, which accepts
-for highlighting. It's a single string with each boundary character
+the following values:
 defined in it. It defaults to `.,!? \t\n`.
-The `boundary_max_scan` allows to control how far to look for boundary
+* `chars` (default): allows to configure which characters (`boundary_chars`)
-characters, and defaults to `20`.
+constitute a boundary for highlighting. It's a single string with each boundary
 character defined in it (defaults to `.,!? \t\n`). It also allows configuring
 the `boundary_max_scan` to control how far to look for boundary characters
 (defaults to `20`).
 * `word` and `sentence`: use Java's https://docs.oracle.com/javase/8/docs/api/java/text/BreakIterator.html[BreakIterator]
 to break the highlighted fragments at the next _word_ or _sentence_ boundary.
 You can further specify `boundary_scanner_locale` to control which Locale is used
 to search the text for these boundaries.
 [[matched-fields]]
 ==== Matched Fields
--- a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java
+++ b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java
@ -0,0 +1,62 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.ingest.geoip;
 import com.maxmind.geoip2.DatabaseReader;
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.SetOnce;
 import org.elasticsearch.common.CheckedSupplier;
 import org.elasticsearch.common.logging.Loggers;
 import java.io.Closeable;
 import java.io.IOException;
 /**
 * Facilitates lazy loading of the database reader, so that when the geoip plugin is installed, but not used,
 * no memory is being wasted on the database reader.
 */
 final class DatabaseReaderLazyLoader implements Closeable {
    private static final Logger LOGGER = Loggers.getLogger(DatabaseReaderLazyLoader.class);
    private final String databaseFileName;
    private final CheckedSupplier<DatabaseReader, IOException> loader;
    // package protected for testing only:
    final SetOnce<DatabaseReader> databaseReader;
    DatabaseReaderLazyLoader(String databaseFileName, CheckedSupplier<DatabaseReader, IOException> loader) {
        this.databaseFileName = databaseFileName;
        this.loader = loader;
        this.databaseReader = new SetOnce<>();
    }
    synchronized DatabaseReader get() throws IOException {
        if (databaseReader.get() == null) {
            databaseReader.set(loader.get());
            LOGGER.debug("Loaded [{}] geoip database", databaseFileName);
        }
        return databaseReader.get();
    }
    @Override
    public synchronized void close() throws IOException {
        IOUtils.close(databaseReader.get());
    }
 }
--- a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java
+++ b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java
@ -19,19 +19,6 @@
 package org.elasticsearch.ingest.geoip;
 import java.io.IOException;
 import java.net.InetAddress;
 import java.security.AccessController;
 import java.security.PrivilegedAction;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import com.maxmind.geoip2.DatabaseReader;
 import com.maxmind.geoip2.exception.AddressNotFoundException;
 import com.maxmind.geoip2.model.CityResponse;
@ -49,6 +36,19 @@ import org.elasticsearch.ingest.AbstractProcessor;
 import org.elasticsearch.ingest.IngestDocument;
 import org.elasticsearch.ingest.Processor;
 import java.io.IOException;
 import java.net.InetAddress;
 import java.security.AccessController;
 import java.security.PrivilegedAction;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
 import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
@ -264,9 +264,9 @@ public final class GeoIpProcessor extends AbstractProcessor {
        );
        static final Set<Property> DEFAULT_COUNTRY_PROPERTIES = EnumSet.of(Property.CONTINENT_NAME, Property.COUNTRY_ISO_CODE);
-        private final Map<String, DatabaseReader> databaseReaders;
+        private final Map<String, DatabaseReaderLazyLoader> databaseReaders;
-        public Factory(Map<String, DatabaseReader> databaseReaders) {
+        public Factory(Map<String, DatabaseReaderLazyLoader> databaseReaders) {
            this.databaseReaders = databaseReaders;
        }
@ -279,12 +279,13 @@ public final class GeoIpProcessor extends AbstractProcessor {
            List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
            boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
-            DatabaseReader databaseReader = databaseReaders.get(databaseFile);
+            DatabaseReaderLazyLoader lazyLoader = databaseReaders.get(databaseFile);
-            if (databaseReader == null) {
+            if (lazyLoader == null) {
                throw newConfigurationException(TYPE, processorTag,
                    "database_file", "database file [" + databaseFile + "] doesn't exist");
            }
            DatabaseReader databaseReader = lazyLoader.get();
            String databaseType = databaseReader.getMetadata().getDatabaseType();
            final Set<Property> properties;
--- a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java
+++ b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java
@ -19,6 +19,15 @@
 package org.elasticsearch.ingest.geoip;
 import com.maxmind.db.NoCache;
 import com.maxmind.db.NodeCache;
 import com.maxmind.geoip2.DatabaseReader;
 import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.ingest.Processor;
 import org.elasticsearch.plugins.IngestPlugin;
 import org.elasticsearch.plugins.Plugin;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.InputStream;
@ -35,20 +44,11 @@ import java.util.Map;
 import java.util.stream.Stream;
 import java.util.zip.GZIPInputStream;
 import com.maxmind.db.NoCache;
 import com.maxmind.db.NodeCache;
 import com.maxmind.geoip2.DatabaseReader;
 import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.ingest.Processor;
 import org.elasticsearch.plugins.IngestPlugin;
 import org.elasticsearch.plugins.Plugin;
 public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable {
    public static final Setting<Long> CACHE_SIZE =
        Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope);
-    private Map<String, DatabaseReader> databaseReaders;
+    private Map<String, DatabaseReaderLazyLoader> databaseReaders;
    @Override
    public List<Setting<?>> getSettings() {
@ -76,12 +76,12 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable
        return Collections.singletonMap(GeoIpProcessor.TYPE, new GeoIpProcessor.Factory(databaseReaders));
    }
-    static Map<String, DatabaseReader> loadDatabaseReaders(Path geoIpConfigDirectory, NodeCache cache) throws IOException {
+    static Map<String, DatabaseReaderLazyLoader> loadDatabaseReaders(Path geoIpConfigDirectory, NodeCache cache) throws IOException {
        if (Files.exists(geoIpConfigDirectory) == false && Files.isDirectory(geoIpConfigDirectory)) {
            throw new IllegalStateException("the geoip directory [" + geoIpConfigDirectory  + "] containing databases doesn't exist");
        }
-        Map<String, DatabaseReader> databaseReaders = new HashMap<>();
+        Map<String, DatabaseReaderLazyLoader> databaseReaders = new HashMap<>();
        try (Stream<Path> databaseFiles = Files.list(geoIpConfigDirectory)) {
            PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb.gz");
            // Use iterator instead of forEach otherwise IOException needs to be caught twice...
@ -89,10 +89,13 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable
            while (iterator.hasNext()) {
                Path databasePath = iterator.next();
                if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
                    String databaseFileName = databasePath.getFileName().toString();
                    DatabaseReaderLazyLoader holder = new DatabaseReaderLazyLoader(databaseFileName, () -> {
                        try (InputStream inputStream = new GZIPInputStream(Files.newInputStream(databasePath, StandardOpenOption.READ))) {
-                        databaseReaders.put(databasePath.getFileName().toString(),
+                            return new DatabaseReader.Builder(inputStream).withCache(cache).build();
                            new DatabaseReader.Builder(inputStream).withCache(cache).build());
                        }
                    });
                    databaseReaders.put(databaseFileName, holder);
                }
            }
        }
--- a/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java
+++ b/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java
@ -22,7 +22,6 @@ package org.elasticsearch.ingest.geoip;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import com.maxmind.db.NoCache;
 import com.maxmind.db.NodeCache;
 import com.maxmind.geoip2.DatabaseReader;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.Randomness;
 import org.elasticsearch.test.ESTestCase;
@ -48,7 +47,7 @@ import static org.hamcrest.Matchers.sameInstance;
 public class GeoIpProcessorFactoryTests extends ESTestCase {
-    private static Map<String, DatabaseReader> databaseReaders;
+    private static Map<String, DatabaseReaderLazyLoader> databaseReaders;
    @BeforeClass
    public static void loadDatabaseReaders() throws IOException {
@ -66,7 +65,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
    @AfterClass
    public static void closeDatabaseReaders() throws IOException {
-        for (DatabaseReader reader : databaseReaders.values()) {
+        for (DatabaseReaderLazyLoader reader : databaseReaders.values()) {
            reader.close();
        }
        databaseReaders = null;
@ -222,4 +221,37 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
            assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
        }
    }
    public void testLazyLoading() throws Exception {
        Path configDir = createTempDir();
        Path geoIpConfigDir = configDir.resolve("ingest-geoip");
        Files.createDirectories(geoIpConfigDir);
        Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb.gz")),
            geoIpConfigDir.resolve("GeoLite2-City.mmdb.gz"));
        Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb.gz")),
            geoIpConfigDir.resolve("GeoLite2-Country.mmdb.gz"));
        // Loading another database reader instances, because otherwise we can't test lazy loading as the the
        // database readers used at class level are reused between tests. (we want to keep that otherwise running this
        // test will take roughly 4 times more time)
        Map<String, DatabaseReaderLazyLoader> databaseReaders =
            IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir, NoCache.getInstance());
        GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
        for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {
            assertNull(lazyLoader.databaseReader.get());
        }
        Map<String, Object> config = new HashMap<>();
        config.put("field", "_field");
        config.put("database_file", "GeoLite2-City.mmdb.gz");
        factory.create(null, "_tag", config);
        config = new HashMap<>();
        config.put("field", "_field");
        config.put("database_file", "GeoLite2-Country.mmdb.gz");
        factory.create(null, "_tag", config);
        for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {
            assertNotNull(lazyLoader.databaseReader.get());
        }
    }
 }
--- a/plugins/repository-s3/src/main/java/org/elasticsearch/cloud/aws/InternalAwsS3Service.java
+++ b/plugins/repository-s3/src/main/java/org/elasticsearch/cloud/aws/InternalAwsS3Service.java
@ -150,18 +150,7 @@ public class InternalAwsS3Service extends AbstractLifecycleComponent implements
            if (key.length() == 0 && secret.length() == 0) {
                logger.debug("Using instance profile credentials");
-                AWSCredentialsProvider credentials = new InstanceProfileCredentialsProvider();
+                return new PrivilegedInstanceProfileCredentialsProvider();
                return new AWSCredentialsProvider() {
                    @Override
                    public AWSCredentials getCredentials() {
                        return SocketAccess.doPrivileged(credentials::getCredentials);
                    }
                    @Override
                    public void refresh() {
                        SocketAccess.doPrivilegedVoid(credentials::refresh);
                    }
                };
            } else {
                logger.debug("Using basic key/secret credentials");
                return new StaticCredentialsProvider(new BasicAWSCredentials(key.toString(), secret.toString()));
@ -221,4 +210,22 @@ public class InternalAwsS3Service extends AbstractLifecycleComponent implements
        // Ensure that IdleConnectionReaper is shutdown
        IdleConnectionReaper.shutdown();
    }
    static class PrivilegedInstanceProfileCredentialsProvider implements AWSCredentialsProvider {
        private final InstanceProfileCredentialsProvider credentials;
        private PrivilegedInstanceProfileCredentialsProvider() {
            this.credentials = new InstanceProfileCredentialsProvider();
        }
        @Override
        public AWSCredentials getCredentials() {
            return SocketAccess.doPrivileged(credentials::getCredentials);
        }
        @Override
        public void refresh() {
            SocketAccess.doPrivilegedVoid(credentials::refresh);
        }
    }
 }
--- a/plugins/repository-s3/src/test/java/org/elasticsearch/cloud/aws/AwsS3ServiceImplTests.java
+++ b/plugins/repository-s3/src/test/java/org/elasticsearch/cloud/aws/AwsS3ServiceImplTests.java
@ -37,7 +37,7 @@ public class AwsS3ServiceImplTests extends ESTestCase {
    public void testAWSCredentialsWithSystemProviders() {
        AWSCredentialsProvider credentialsProvider =
            InternalAwsS3Service.buildCredentials(logger, deprecationLogger, Settings.EMPTY, Settings.EMPTY, "default");
-        assertThat(credentialsProvider, instanceOf(AWSCredentialsProvider.class));
+        assertThat(credentialsProvider, instanceOf(InternalAwsS3Service.PrivilegedInstanceProfileCredentialsProvider.class));
    }
    public void testAwsCredsDefaultSettings() {
--- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml
@ -28,7 +28,7 @@ setup:
 ---
 "Basic":
  - skip:
-      version: " - 5.2.99"
+      version: " - 5.99.99"
      reason:  this uses a new highlighter that has been added in 5.3
  - do:
      search: