From 61c435e6a9b3d2888a2c8e6d65e643bcde14d280 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <christoph@elastic.co>
Date: Thu, 21 Jan 2016 19:35:46 +0100
Subject: [PATCH] PhraseSuggestionBuilde: Refactor DirectCandidateGenerator

As a prerequisite for refactoring the whole PhraseSuggestionBuilder
to be able to be parsed and streamed from the coordinating node, the
DirectCandidateGenerator must implement Writeable, be able to parse
a new instance (fromXContent()) and later when transported to the
shard to generate a PhraseSuggestionContext.DirectCandidateGenerator.
Also adding equals/hashCode and tests and moving DirectCandidateGenerator
to its own DirectCandidateGeneratorBuilder class.
---
 .../DirectCandidateGeneratorBuilder.java      | 493 ++++++++++++++++++
 .../suggest/phrase/PhraseSuggestParser.java   |  74 +--
 .../phrase/PhraseSuggestionBuilder.java       | 270 +---------
 .../phrase/DirectCandidateGeneratorTests.java | 323 ++++++++++++
 docs/reference/migration/migrate_3_0.asciidoc |   4 +
 .../messy/tests/SuggestSearchTests.java       |  76 +--
 6 files changed, 903 insertions(+), 337 deletions(-)
 create mode 100644 core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java
 create mode 100644 core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java

diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java
new file mode 100644
index 00000000000..90ec2845b8a
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java
@@ -0,0 +1,493 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.phrase;
+
+import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.elasticsearch.ExceptionsHelper;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.collect.Tuple;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.search.suggest.SuggestUtils;
+import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.CandidateGenerator;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Set;
+import java.util.function.Consumer;
+
+public final class DirectCandidateGeneratorBuilder
+        implements Writeable<DirectCandidateGeneratorBuilder>, CandidateGenerator {
+
+    private static final String TYPE = "direct_generator";
+    static final DirectCandidateGeneratorBuilder PROTOTYPE = new DirectCandidateGeneratorBuilder("_na_");
+
+    static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE);
+    static final ParseField FIELDNAME_FIELD = new ParseField("field");
+    static final ParseField PREFILTER_FIELD = new ParseField("pre_filter");
+    static final ParseField POSTFILTER_FIELD = new ParseField("post_filter");
+    static final ParseField SUGGESTMODE_FIELD = new ParseField("suggest_mode");
+    static final ParseField MIN_DOC_FREQ_FIELD = new ParseField("min_doc_freq");
+    static final ParseField ACCURACY_FIELD = new ParseField("accuracy");
+    static final ParseField SIZE_FIELD = new ParseField("size");
+    static final ParseField SORT_FIELD = new ParseField("sort");
+    static final ParseField STRING_DISTANCE_FIELD = new ParseField("string_distance");
+    static final ParseField MAX_EDITS_FIELD = new ParseField("max_edits");
+    static final ParseField MAX_INSPECTIONS_FIELD = new ParseField("max_inspections");
+    static final ParseField MAX_TERM_FREQ_FIELD = new ParseField("max_term_freq");
+    static final ParseField PREFIX_LENGTH_FIELD = new ParseField("prefix_length");
+    static final ParseField MIN_WORD_LENGTH_FIELD = new ParseField("min_word_length");
+
+    private final String field;
+    private String preFilter;
+    private String postFilter;
+    private String suggestMode;
+    private Float accuracy;
+    private Integer size;
+    private String sort;
+    private String stringDistance;
+    private Integer maxEdits;
+    private Integer maxInspections;
+    private Float maxTermFreq;
+    private Integer prefixLength;
+    private Integer minWordLength;
+    private Float minDocFreq;
+
+    /**
+     * @param field Sets from what field to fetch the candidate suggestions from.
+     */
+    public DirectCandidateGeneratorBuilder(String field) {
+        this.field = field;
+    }
+
+    /**
+     * Quasi copy-constructor that takes all values from the generator
+     * passed in, but uses different field name. Needed by parser because we
+     * need to buffer the field name but read all other properties to a
+     * temporary object.
+     */
+    private static DirectCandidateGeneratorBuilder replaceField(String field, DirectCandidateGeneratorBuilder other) {
+        DirectCandidateGeneratorBuilder generator = new DirectCandidateGeneratorBuilder(field);
+        generator.preFilter = other.preFilter;
+        generator.postFilter = other.postFilter;
+        generator.suggestMode = other.suggestMode;
+        generator.accuracy = other.accuracy;
+        generator.size = other.size;
+        generator.sort = other.sort;
+        generator.stringDistance = other.stringDistance;
+        generator.maxEdits = other.maxEdits;
+        generator.maxInspections = other.maxInspections;
+        generator.maxTermFreq = other.maxTermFreq;
+        generator.prefixLength = other.prefixLength;
+        generator.minWordLength = other.minWordLength;
+        generator.minDocFreq = other.minDocFreq;
+        return generator;
+    }
+
+    /**
+     * The global suggest mode controls what suggested terms are included or
+     * controls for what suggest text tokens, terms should be suggested for.
+     * Three possible values can be specified:
+     * <ol>
+     * <li><code>missing</code> - Only suggest terms in the suggest text
+     * that aren't in the index. This is the default.
+     * <li><code>popular</code> - Only suggest terms that occur in more docs
+     * then the original suggest text term.
+     * <li><code>always</code> - Suggest any matching suggest terms based on
+     * tokens in the suggest text.
+     * </ol>
+     */
+    public DirectCandidateGeneratorBuilder suggestMode(String suggestMode) {
+        this.suggestMode = suggestMode;
+        return this;
+    }
+
+    /**
+     * Sets how similar the suggested terms at least need to be compared to
+     * the original suggest text tokens. A value between 0 and 1 can be
+     * specified. This value will be compared to the string distance result
+     * of each candidate spelling correction.
+     * <p>
+     * Default is <tt>0.5</tt>
+     */
+    public DirectCandidateGeneratorBuilder accuracy(float accuracy) {
+        this.accuracy = accuracy;
+        return this;
+    }
+
+    /**
+     * Sets the maximum suggestions to be returned per suggest text term.
+     */
+    public DirectCandidateGeneratorBuilder size(int size) {
+        if (size <= 0) {
+            throw new IllegalArgumentException("Size must be positive");
+        }
+        this.size = size;
+        return this;
+    }
+
+    /**
+     * Sets how to sort the suggest terms per suggest text token. Two
+     * possible values:
+     * <ol>
+     * <li><code>score</code> - Sort should first be based on score, then
+     * document frequency and then the term itself.
+     * <li><code>frequency</code> - Sort should first be based on document
+     * frequency, then score and then the term itself.
+     * </ol>
+     * <p>
+     * What the score is depends on the suggester being used.
+     */
+    public DirectCandidateGeneratorBuilder sort(String sort) {
+        this.sort = sort;
+        return this;
+    }
+
+    /**
+     * Sets what string distance implementation to use for comparing how
+     * similar suggested terms are. Four possible values can be specified:
+     * <ol>
+     * <li><code>internal</code> - This is the default and is based on
+     * <code>damerau_levenshtein</code>, but highly optimized for comparing
+     * string distance for terms inside the index.
+     * <li><code>damerau_levenshtein</code> - String distance algorithm
+     * based on Damerau-Levenshtein algorithm.
+     * <li><code>levenstein</code> - String distance algorithm based on
+     * Levenstein edit distance algorithm.
+     * <li><code>jarowinkler</code> - String distance algorithm based on
+     * Jaro-Winkler algorithm.
+     * <li><code>ngram</code> - String distance algorithm based on character
+     * n-grams.
+     * </ol>
+     */
+    public DirectCandidateGeneratorBuilder stringDistance(String stringDistance) {
+        this.stringDistance = stringDistance;
+        return this;
+    }
+
+    /**
+     * Sets the maximum edit distance candidate suggestions can have in
+     * order to be considered as a suggestion. Can only be a value between 1
+     * and 2. Any other value result in an bad request error being thrown.
+     * Defaults to <tt>2</tt>.
+     */
+    public DirectCandidateGeneratorBuilder maxEdits(Integer maxEdits) {
+        if (maxEdits < 1 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
+            throw new IllegalArgumentException("Illegal max_edits value " + maxEdits);
+        }
+        this.maxEdits = maxEdits;
+        return this;
+    }
+
+    /**
+     * A factor that is used to multiply with the size in order to inspect
+     * more candidate suggestions. Can improve accuracy at the cost of
+     * performance. Defaults to <tt>5</tt>.
+     */
+    public DirectCandidateGeneratorBuilder maxInspections(Integer maxInspections) {
+        this.maxInspections = maxInspections;
+        return this;
+    }
+
+    /**
+     * Sets a maximum threshold in number of documents a suggest text token
+     * can exist in order to be corrected. Can be a relative percentage
+     * number (e.g 0.4) or an absolute number to represent document
+     * frequencies. If an value higher than 1 is specified then fractional
+     * can not be specified. Defaults to <tt>0.01</tt>.
+     * <p>
+     * This can be used to exclude high frequency terms from being
+     * suggested. High frequency terms are usually spelled correctly on top
+     * of this this also improves the suggest performance.
+     */
+    public DirectCandidateGeneratorBuilder maxTermFreq(float maxTermFreq) {
+        this.maxTermFreq = maxTermFreq;
+        return this;
+    }
+
+    /**
+     * Sets the number of minimal prefix characters that must match in order
+     * be a candidate suggestion. Defaults to 1. Increasing this number
+     * improves suggest performance. Usually misspellings don't occur in the
+     * beginning of terms.
+     */
+    public DirectCandidateGeneratorBuilder prefixLength(int prefixLength) {
+        this.prefixLength = prefixLength;
+        return this;
+    }
+
+    /**
+     * The minimum length a suggest text term must have in order to be
+     * corrected. Defaults to <tt>4</tt>.
+     */
+    public DirectCandidateGeneratorBuilder minWordLength(int minWordLength) {
+        this.minWordLength = minWordLength;
+        return this;
+    }
+
+    /**
+     * Sets a minimal threshold in number of documents a suggested term
+     * should appear in. This can be specified as an absolute number or as a
+     * relative percentage of number of documents. This can improve quality
+     * by only suggesting high frequency terms. Defaults to 0f and is not
+     * enabled. If a value higher than 1 is specified then the number cannot
+     * be fractional.
+     */
+    public DirectCandidateGeneratorBuilder minDocFreq(float minDocFreq) {
+        this.minDocFreq = minDocFreq;
+        return this;
+    }
+
+    /**
+     * Sets a filter (analyzer) that is applied to each of the tokens passed to this candidate generator.
+     * This filter is applied to the original token before candidates are generated.
+     */
+    public DirectCandidateGeneratorBuilder preFilter(String preFilter) {
+        this.preFilter = preFilter;
+        return this;
+    }
+
+    /**
+     * Sets a filter (analyzer) that is applied to each of the generated tokens
+     * before they are passed to the actual phrase scorer.
+     */
+    public DirectCandidateGeneratorBuilder postFilter(String postFilter) {
+        this.postFilter = postFilter;
+        return this;
+    }
+
+    /**
+     * gets the type identifier of this {@link CandidateGenerator}
+     */
+    @Override
+    public String getType() {
+        return TYPE;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        outputFieldIfNotNull(field, FIELDNAME_FIELD, builder);
+        outputFieldIfNotNull(accuracy, ACCURACY_FIELD, builder);
+        outputFieldIfNotNull(maxEdits, MAX_EDITS_FIELD, builder);
+        outputFieldIfNotNull(maxInspections, MAX_INSPECTIONS_FIELD, builder);
+        outputFieldIfNotNull(maxTermFreq, MAX_TERM_FREQ_FIELD, builder);
+        outputFieldIfNotNull(minWordLength, MIN_WORD_LENGTH_FIELD, builder);
+        outputFieldIfNotNull(minDocFreq, MIN_DOC_FREQ_FIELD, builder);
+        outputFieldIfNotNull(preFilter, PREFILTER_FIELD, builder);
+        outputFieldIfNotNull(prefixLength, PREFIX_LENGTH_FIELD, builder);
+        outputFieldIfNotNull(postFilter, POSTFILTER_FIELD, builder);
+        outputFieldIfNotNull(suggestMode, SUGGESTMODE_FIELD, builder);
+        outputFieldIfNotNull(size, SIZE_FIELD, builder);
+        outputFieldIfNotNull(sort, SORT_FIELD, builder);
+        outputFieldIfNotNull(stringDistance, STRING_DISTANCE_FIELD, builder);
+        builder.endObject();
+        return builder;
+    }
+
+    private static <T> void outputFieldIfNotNull(T value, ParseField field, XContentBuilder builder) throws IOException {
+        if (value != null) {
+            builder.field(field.getPreferredName(), value);
+        }
+    }
+
+    private static ObjectParser<Tuple<Set<String>, DirectCandidateGeneratorBuilder>, QueryParseContext> PARSER = new ObjectParser<>(TYPE);
+
+    static {
+        PARSER.declareString((tp, s) -> tp.v1().add(s), FIELDNAME_FIELD);
+        PARSER.declareString((tp, s) -> tp.v2().preFilter(s), PREFILTER_FIELD);
+        PARSER.declareString((tp, s) -> tp.v2().postFilter(s), POSTFILTER_FIELD);
+        PARSER.declareString((tp, s) -> tp.v2().suggestMode(s), SUGGESTMODE_FIELD);
+        PARSER.declareFloat((tp, f) -> tp.v2().minDocFreq(f), MIN_DOC_FREQ_FIELD);
+        PARSER.declareFloat((tp, f) -> tp.v2().accuracy(f), ACCURACY_FIELD);
+        PARSER.declareInt((tp, i) -> tp.v2().size(i), SIZE_FIELD);
+        PARSER.declareString((tp, s) -> tp.v2().sort(s), SORT_FIELD);
+        PARSER.declareString((tp, s) -> tp.v2().stringDistance(s), STRING_DISTANCE_FIELD);
+        PARSER.declareInt((tp, i) -> tp.v2().maxInspections(i), MAX_INSPECTIONS_FIELD);
+        PARSER.declareFloat((tp, f) -> tp.v2().maxTermFreq(f), MAX_TERM_FREQ_FIELD);
+        PARSER.declareInt((tp, i) -> tp.v2().maxEdits(i), MAX_EDITS_FIELD);
+        PARSER.declareInt((tp, i) -> tp.v2().minWordLength(i), MIN_WORD_LENGTH_FIELD);
+        PARSER.declareInt((tp, i) -> tp.v2().prefixLength(i), PREFIX_LENGTH_FIELD);
+    }
+
+    @Override
+    public DirectCandidateGeneratorBuilder fromXContent(QueryParseContext parseContext) throws IOException {
+        DirectCandidateGeneratorBuilder tempGenerator = new DirectCandidateGeneratorBuilder("_na_");
+        Set<String> tmpFieldName = new HashSet<>(1); // bucket for the field
+                                                     // name, needed as
+                                                     // constructor arg
+                                                     // later
+        PARSER.parse(parseContext.parser(),
+                new Tuple<Set<String>, DirectCandidateGeneratorBuilder>(tmpFieldName, tempGenerator));
+        if (tmpFieldName.size() != 1) {
+            throw new IllegalArgumentException("[" + TYPE + "] expects exactly one field parameter, but found " + tmpFieldName);
+        }
+        return replaceField(tmpFieldName.iterator().next(), tempGenerator);
+    }
+
+    public PhraseSuggestionContext.DirectCandidateGenerator build(QueryShardContext context) throws IOException {
+        MapperService mapperService = context.getMapperService();
+        PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
+        generator.setField(this.field);
+        transferIfNotNull(this.size, generator::size);
+        if (this.preFilter != null) {
+            generator.preFilter(mapperService.analysisService().analyzer(this.preFilter));
+            if (generator.preFilter() == null) {
+                throw new IllegalArgumentException("Analyzer [" + this.preFilter + "] doesn't exists");
+            }
+        }
+        if (this.postFilter != null) {
+            generator.postFilter(mapperService.analysisService().analyzer(this.postFilter));
+            if (generator.postFilter() == null) {
+                throw new IllegalArgumentException("Analyzer [" + this.postFilter + "] doesn't exists");
+            }
+        }
+        transferIfNotNull(this.accuracy, generator::accuracy);
+        if (this.suggestMode != null) {
+            generator.suggestMode(SuggestUtils.resolveSuggestMode(this.suggestMode));
+        }
+        if (this.sort != null) {
+            generator.sort(SuggestUtils.resolveSort(this.sort));
+        }
+        if (this.stringDistance != null) {
+            generator.stringDistance(SuggestUtils.resolveDistance(this.stringDistance));
+        }
+        transferIfNotNull(this.maxEdits, generator::maxEdits);
+        if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
+            throw new IllegalArgumentException("Illegal max_edits value " + generator.maxEdits());
+        }
+        transferIfNotNull(this.maxInspections, generator::maxInspections);
+        transferIfNotNull(this.maxTermFreq, generator::maxTermFreq);
+        transferIfNotNull(this.prefixLength, generator::prefixLength);
+        transferIfNotNull(this.minWordLength, generator::minQueryLength);
+        transferIfNotNull(this.minDocFreq, generator::minDocFreq);
+        return generator;
+    }
+
+     private static <T> void transferIfNotNull(T value, Consumer<T> consumer) {
+         if (value != null) {
+             consumer.accept(value);
+         }
+     }
+
+    @Override
+    public final String toString() {
+        try {
+            XContentBuilder builder = XContentFactory.jsonBuilder();
+            builder.prettyPrint();
+            toXContent(builder, EMPTY_PARAMS);
+            return builder.string();
+        } catch (Exception e) {
+            return "{ \"error\" : \"" + ExceptionsHelper.detailedMessage(e) + "\"}";
+        }
+    }
+
+    @Override
+    public DirectCandidateGeneratorBuilder readFrom(StreamInput in) throws IOException {
+        DirectCandidateGeneratorBuilder cg = new DirectCandidateGeneratorBuilder(in.readString());
+        cg.suggestMode = in.readOptionalString();
+        if (in.readBoolean()) {
+            cg.accuracy = in.readFloat();
+        }
+        cg.size = in.readOptionalVInt();
+        cg.sort = in.readOptionalString();
+        cg.stringDistance = in.readOptionalString();
+        cg.maxEdits = in.readOptionalVInt();
+        cg.maxInspections = in.readOptionalVInt();
+        if (in.readBoolean()) {
+            cg.maxTermFreq = in.readFloat();
+        }
+        cg.prefixLength = in.readOptionalVInt();
+        cg.minWordLength = in.readOptionalVInt();
+        if (in.readBoolean()) {
+            cg.minDocFreq = in.readFloat();
+        }
+        cg.preFilter = in.readOptionalString();
+        cg.postFilter = in.readOptionalString();
+        return cg;
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        out.writeString(field);
+        out.writeOptionalString(suggestMode);
+        out.writeBoolean(accuracy != null);
+        if (accuracy != null) {
+            out.writeFloat(accuracy);
+        }
+        out.writeOptionalVInt(size);
+        out.writeOptionalString(sort);
+        out.writeOptionalString(stringDistance);
+        out.writeOptionalVInt(maxEdits);
+        out.writeOptionalVInt(maxInspections);
+        out.writeBoolean(maxTermFreq != null);
+        if (maxTermFreq != null) {
+            out.writeFloat(maxTermFreq);
+        }
+        out.writeOptionalVInt(prefixLength);
+        out.writeOptionalVInt(minWordLength);
+        out.writeBoolean(minDocFreq != null);
+        if (minDocFreq != null) {
+            out.writeFloat(minDocFreq);
+        }
+        out.writeOptionalString(preFilter);
+        out.writeOptionalString(postFilter);
+    }
+
+    @Override
+    public final int hashCode() {
+        return Objects.hash(field, preFilter, postFilter, suggestMode, accuracy,
+                size, sort, stringDistance, maxEdits, maxInspections,
+                maxTermFreq, prefixLength, minWordLength, minDocFreq);
+    }
+
+    @Override
+    public final boolean equals(Object obj) {
+        if (this == obj) {
+            return true;
+        }
+        if (obj == null || getClass() != obj.getClass()) {
+            return false;
+        }
+        DirectCandidateGeneratorBuilder other = (DirectCandidateGeneratorBuilder) obj;
+        return Objects.equals(field, other.field) &&
+                Objects.equals(preFilter, other.preFilter) &&
+                Objects.equals(postFilter, other.postFilter) &&
+                Objects.equals(suggestMode, other.suggestMode) &&
+                Objects.equals(accuracy, other.accuracy) &&
+                Objects.equals(size, other.size) &&
+                Objects.equals(sort, other.sort) &&
+                Objects.equals(stringDistance, other.stringDistance) &&
+                Objects.equals(maxEdits, other.maxEdits) &&
+                Objects.equals(maxInspections, other.maxInspections) &&
+                Objects.equals(maxTermFreq, other.maxTermFreq) &&
+                Objects.equals(prefixLength, other.prefixLength) &&
+                Objects.equals(minWordLength, other.minWordLength) &&
+                Objects.equals(minDocFreq, other.minDocFreq);
+    }
+}
\ No newline at end of file
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java
index ee0dc1b0b9f..fc60fc6fc80 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java
@@ -98,18 +98,10 @@ public final class PhraseSuggestParser implements SuggestContextParser {
                     }
                 }
             } else if (token == Token.START_ARRAY) {
-                if ("direct_generator".equals(fieldName) || "directGenerator".equals(fieldName)) {
+                if (parseFieldMatcher.match(fieldName, DirectCandidateGeneratorBuilder.DIRECT_GENERATOR_FIELD)) {
                     // for now we only have a single type of generators
                     while ((token = parser.nextToken()) == Token.START_OBJECT) {
-                        PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
-                        while ((token = parser.nextToken()) != Token.END_OBJECT) {
-                            if (token == XContentParser.Token.FIELD_NAME) {
-                                fieldName = parser.currentName();
-                            }
-                            if (token.isValue()) {
-                                parseCandidateGenerator(parser, mapperService, fieldName, generator, parseFieldMatcher);
-                            }
-                        }
+                        PhraseSuggestionContext.DirectCandidateGenerator generator = parseCandidateGenerator(parser, mapperService, parseFieldMatcher);
                         verifyGenerator(generator);
                         suggestion.addGenerator(generator);
                     }
@@ -323,34 +315,44 @@ public final class PhraseSuggestParser implements SuggestContextParser {
         }
     }
 
-    private void parseCandidateGenerator(XContentParser parser, MapperService mapperService, String fieldName,
-            PhraseSuggestionContext.DirectCandidateGenerator generator, ParseFieldMatcher parseFieldMatcher) throws IOException {
-        if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator, parseFieldMatcher)) {
-            if ("field".equals(fieldName)) {
-                generator.setField(parser.text());
-                if (mapperService.fullName(generator.field()) == null) {
-                    throw new IllegalArgumentException("No mapping found for field [" + generator.field() + "]");
+    static PhraseSuggestionContext.DirectCandidateGenerator parseCandidateGenerator(XContentParser parser, MapperService mapperService,
+             ParseFieldMatcher parseFieldMatcher) throws IOException {
+        XContentParser.Token token;
+        String fieldName = null;
+        PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
+        while ((token = parser.nextToken()) != Token.END_OBJECT) {
+            if (token == XContentParser.Token.FIELD_NAME) {
+                fieldName = parser.currentName();
+            }
+            if (token.isValue()) {
+                if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator, parseFieldMatcher)) {
+                    if ("field".equals(fieldName)) {
+                        generator.setField(parser.text());
+                        if (mapperService.fullName(generator.field()) == null) {
+                            throw new IllegalArgumentException("No mapping found for field [" + generator.field() + "]");
+                        }
+                    } else if ("size".equals(fieldName)) {
+                        generator.size(parser.intValue());
+                    } else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {
+                        String analyzerName = parser.text();
+                        Analyzer analyzer = mapperService.analysisService().analyzer(analyzerName);
+                        if (analyzer == null) {
+                            throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
+                        }
+                        generator.preFilter(analyzer);
+                    } else if ("post_filter".equals(fieldName) || "postFilter".equals(fieldName)) {
+                        String analyzerName = parser.text();
+                        Analyzer analyzer = mapperService.analysisService().analyzer(analyzerName);
+                        if (analyzer == null) {
+                            throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
+                        }
+                        generator.postFilter(analyzer);
+                    } else {
+                        throw new IllegalArgumentException("CandidateGenerator doesn't support [" + fieldName + "]");
+                    }
                 }
-            } else if ("size".equals(fieldName)) {
-                generator.size(parser.intValue());
-            } else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {
-                String analyzerName = parser.text();
-                Analyzer analyzer = mapperService.analysisService().analyzer(analyzerName);
-                if (analyzer == null) {
-                    throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
-                }
-                generator.preFilter(analyzer);
-            } else if ("post_filter".equals(fieldName) || "postFilter".equals(fieldName)) {
-                String analyzerName = parser.text();
-                Analyzer analyzer = mapperService.analysisService().analyzer(analyzerName);
-                if (analyzer == null) {
-                    throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
-                }
-                generator.postFilter(analyzer);
-            } else {
-                throw new IllegalArgumentException("CandidateGenerator doesn't support [" + fieldName + "]");
             }
         }
+        return generator;
     }
-
 }
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java
index 0e1fec6c7b2..b72cd41ea73 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java
@@ -278,13 +278,13 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
     }
 
     /**
-     * Creates a new {@link DirectCandidateGenerator}
+     * Creates a new {@link DirectCandidateGeneratorBuilder}
      *
      * @param field
      *            the field this candidate generator operates on.
      */
-    public static DirectCandidateGenerator candidateGenerator(String field) {
-        return new DirectCandidateGenerator(field);
+    public static DirectCandidateGeneratorBuilder candidateGenerator(String field) {
+        return new DirectCandidateGeneratorBuilder(field);
     }
 
     /**
@@ -644,267 +644,11 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
     }
 
     /**
-     * {@link CandidateGenerator} base class.
+     * {@link CandidateGenerator} interface.
      */
-    public static abstract class CandidateGenerator implements ToXContent {
-        private final String type;
-
-        public CandidateGenerator(String type) {
-            this.type = type;
-        }
-
-        public String getType() {
-            return type;
-        }
+    public interface CandidateGenerator extends ToXContent {
+        String getType();
 
+        CandidateGenerator fromXContent(QueryParseContext parseContext) throws IOException;
     }
-
-    /**
-     *
-     *
-     */
-    public static final class DirectCandidateGenerator extends CandidateGenerator {
-        private final String field;
-        private String preFilter;
-        private String postFilter;
-        private String suggestMode;
-        private Float accuracy;
-        private Integer size;
-        private String sort;
-        private String stringDistance;
-        private Integer maxEdits;
-        private Integer maxInspections;
-        private Float maxTermFreq;
-        private Integer prefixLength;
-        private Integer minWordLength;
-        private Float minDocFreq;
-
-        /**
-         * @param field Sets from what field to fetch the candidate suggestions from.
-         */
-        public DirectCandidateGenerator(String field) {
-            super("direct_generator");
-            this.field = field;
-        }
-
-        /**
-         * The global suggest mode controls what suggested terms are included or
-         * controls for what suggest text tokens, terms should be suggested for.
-         * Three possible values can be specified:
-         * <ol>
-         * <li><code>missing</code> - Only suggest terms in the suggest text
-         * that aren't in the index. This is the default.
-         * <li><code>popular</code> - Only suggest terms that occur in more docs
-         * then the original suggest text term.
-         * <li><code>always</code> - Suggest any matching suggest terms based on
-         * tokens in the suggest text.
-         * </ol>
-         */
-        public DirectCandidateGenerator suggestMode(String suggestMode) {
-            this.suggestMode = suggestMode;
-            return this;
-        }
-
-        /**
-         * Sets how similar the suggested terms at least need to be compared to
-         * the original suggest text tokens. A value between 0 and 1 can be
-         * specified. This value will be compared to the string distance result
-         * of each candidate spelling correction.
-         * <p>
-         * Default is <tt>0.5</tt>
-         */
-        public DirectCandidateGenerator accuracy(float accuracy) {
-            this.accuracy = accuracy;
-            return this;
-        }
-
-        /**
-         * Sets the maximum suggestions to be returned per suggest text term.
-         */
-        public DirectCandidateGenerator size(int size) {
-            if (size <= 0) {
-                throw new IllegalArgumentException("Size must be positive");
-            }
-            this.size = size;
-            return this;
-        }
-
-        /**
-         * Sets how to sort the suggest terms per suggest text token. Two
-         * possible values:
-         * <ol>
-         * <li><code>score</code> - Sort should first be based on score, then
-         * document frequency and then the term itself.
-         * <li><code>frequency</code> - Sort should first be based on document
-         * frequency, then scotr and then the term itself.
-         * </ol>
-         * <p>
-         * What the score is depends on the suggester being used.
-         */
-        public DirectCandidateGenerator sort(String sort) {
-            this.sort = sort;
-            return this;
-        }
-
-        /**
-         * Sets what string distance implementation to use for comparing how
-         * similar suggested terms are. Four possible values can be specified:
-         * <ol>
-         * <li><code>internal</code> - This is the default and is based on
-         * <code>damerau_levenshtein</code>, but highly optimized for comparing
-         * string distance for terms inside the index.
-         * <li><code>damerau_levenshtein</code> - String distance algorithm
-         * based on Damerau-Levenshtein algorithm.
-         * <li><code>levenstein</code> - String distance algorithm based on
-         * Levenstein edit distance algorithm.
-         * <li><code>jarowinkler</code> - String distance algorithm based on
-         * Jaro-Winkler algorithm.
-         * <li><code>ngram</code> - String distance algorithm based on character
-         * n-grams.
-         * </ol>
-         */
-        public DirectCandidateGenerator stringDistance(String stringDistance) {
-            this.stringDistance = stringDistance;
-            return this;
-        }
-
-        /**
-         * Sets the maximum edit distance candidate suggestions can have in
-         * order to be considered as a suggestion. Can only be a value between 1
-         * and 2. Any other value result in an bad request error being thrown.
-         * Defaults to <tt>2</tt>.
-         */
-        public DirectCandidateGenerator maxEdits(Integer maxEdits) {
-            this.maxEdits = maxEdits;
-            return this;
-        }
-
-        /**
-         * A factor that is used to multiply with the size in order to inspect
-         * more candidate suggestions. Can improve accuracy at the cost of
-         * performance. Defaults to <tt>5</tt>.
-         */
-        public DirectCandidateGenerator maxInspections(Integer maxInspections) {
-            this.maxInspections = maxInspections;
-            return this;
-        }
-
-        /**
-         * Sets a maximum threshold in number of documents a suggest text token
-         * can exist in order to be corrected. Can be a relative percentage
-         * number (e.g 0.4) or an absolute number to represent document
-         * frequencies. If an value higher than 1 is specified then fractional
-         * can not be specified. Defaults to <tt>0.01</tt>.
-         * <p>
-         * This can be used to exclude high frequency terms from being
-         * suggested. High frequency terms are usually spelled correctly on top
-         * of this this also improves the suggest performance.
-         */
-        public DirectCandidateGenerator maxTermFreq(float maxTermFreq) {
-            this.maxTermFreq = maxTermFreq;
-            return this;
-        }
-
-        /**
-         * Sets the number of minimal prefix characters that must match in order
-         * be a candidate suggestion. Defaults to 1. Increasing this number
-         * improves suggest performance. Usually misspellings don't occur in the
-         * beginning of terms.
-         */
-        public DirectCandidateGenerator prefixLength(int prefixLength) {
-            this.prefixLength = prefixLength;
-            return this;
-        }
-
-        /**
-         * The minimum length a suggest text term must have in order to be
-         * corrected. Defaults to <tt>4</tt>.
-         */
-        public DirectCandidateGenerator minWordLength(int minWordLength) {
-            this.minWordLength = minWordLength;
-            return this;
-        }
-
-        /**
-         * Sets a minimal threshold in number of documents a suggested term
-         * should appear in. This can be specified as an absolute number or as a
-         * relative percentage of number of documents. This can improve quality
-         * by only suggesting high frequency terms. Defaults to 0f and is not
-         * enabled. If a value higher than 1 is specified then the number cannot
-         * be fractional.
-         */
-        public DirectCandidateGenerator minDocFreq(float minDocFreq) {
-            this.minDocFreq = minDocFreq;
-            return this;
-        }
-
-        /**
-         * Sets a filter (analyzer) that is applied to each of the tokens passed to this candidate generator.
-         * This filter is applied to the original token before candidates are generated.
-         */
-        public DirectCandidateGenerator preFilter(String preFilter) {
-            this.preFilter = preFilter;
-            return this;
-        }
-
-        /**
-         * Sets a filter (analyzer) that is applied to each of the generated tokens
-         * before they are passed to the actual phrase scorer.
-         */
-        public DirectCandidateGenerator postFilter(String postFilter) {
-            this.postFilter = postFilter;
-            return this;
-        }
-
-        @Override
-        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
-            builder.startObject();
-            if (field != null) {
-                builder.field("field", field);
-            }
-            if (suggestMode != null) {
-                builder.field("suggest_mode", suggestMode);
-            }
-            if (accuracy != null) {
-                builder.field("accuracy", accuracy);
-            }
-            if (size != null) {
-                builder.field("size", size);
-            }
-            if (sort != null) {
-                builder.field("sort", sort);
-            }
-            if (stringDistance != null) {
-                builder.field("string_distance", stringDistance);
-            }
-            if (maxEdits != null) {
-                builder.field("max_edits", maxEdits);
-            }
-            if (maxInspections != null) {
-                builder.field("max_inspections", maxInspections);
-            }
-            if (maxTermFreq != null) {
-                builder.field("max_term_freq", maxTermFreq);
-            }
-            if (prefixLength != null) {
-                builder.field("prefix_length", prefixLength);
-            }
-            if (minWordLength != null) {
-                builder.field("min_word_length", minWordLength);
-            }
-            if (minDocFreq != null) {
-                builder.field("min_doc_freq", minDocFreq);
-            }
-            if (preFilter != null) {
-                builder.field("pre_filter", preFilter);
-            }
-            if (postFilter != null) {
-                builder.field("post_filter", postFilter);
-            }
-            builder.endObject();
-            return builder;
-        }
-
-    }
-
 }
diff --git a/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java b/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java
new file mode 100644
index 00000000000..02826b9a7eb
--- /dev/null
+++ b/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java
@@ -0,0 +1,323 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.phrase;
+
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.elasticsearch.common.ParseFieldMatcher;
+import org.elasticsearch.common.ParsingException;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AnalysisService;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.mapper.ContentPath;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.index.mapper.MapperBuilders;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.core.StringFieldMapper;
+import org.elasticsearch.index.mapper.core.StringFieldMapper.StringFieldType;
+import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.indices.IndicesModule;
+import org.elasticsearch.indices.query.IndicesQueriesRegistry;
+import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.IndexSettingsModule;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.function.Consumer;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class DirectCandidateGeneratorTests extends ESTestCase{
+
+    private static final int NUMBER_OF_RUNS = 20;
+
+
+
+    /**
+     * Test serialization and deserialization of the generator
+     */
+    public void testSerialization() throws IOException {
+        for (int runs = 0; runs < NUMBER_OF_RUNS; runs++) {
+            DirectCandidateGeneratorBuilder original = randomCandidateGenerator();
+            DirectCandidateGeneratorBuilder deserialized = serializedCopy(original);
+            assertEquals(deserialized, original);
+            assertEquals(deserialized.hashCode(), original.hashCode());
+            assertNotSame(deserialized, original);
+        }
+    }
+
+    /**
+     * Test equality and hashCode properties
+     */
+    public void testEqualsAndHashcode() throws IOException {
+        for (int runs = 0; runs < NUMBER_OF_RUNS; runs++) {
+            DirectCandidateGeneratorBuilder first = randomCandidateGenerator();
+            assertFalse("generator is equal to null", first.equals(null));
+            assertFalse("generator is equal to incompatible type", first.equals(""));
+            assertTrue("generator is not equal to self", first.equals(first));
+            assertThat("same generator's hashcode returns different values if called multiple times", first.hashCode(),
+                    equalTo(first.hashCode()));
+
+            DirectCandidateGeneratorBuilder second = serializedCopy(first);
+            assertTrue("generator is not equal to self", second.equals(second));
+            assertTrue("generator is not equal to its copy", first.equals(second));
+            assertTrue("equals is not symmetric", second.equals(first));
+            assertThat("generator copy's hashcode is different from original hashcode", second.hashCode(), equalTo(first.hashCode()));
+
+            DirectCandidateGeneratorBuilder third = serializedCopy(second);
+            assertTrue("generator is not equal to self", third.equals(third));
+            assertTrue("generator is not equal to its copy", second.equals(third));
+            assertThat("generator copy's hashcode is different from original hashcode", second.hashCode(), equalTo(third.hashCode()));
+            assertTrue("equals is not transitive", first.equals(third));
+            assertThat("generator copy's hashcode is different from original hashcode", first.hashCode(), equalTo(third.hashCode()));
+            assertTrue("equals is not symmetric", third.equals(second));
+            assertTrue("equals is not symmetric", third.equals(first));
+
+            // test for non-equality, check that all fields are covered by changing one by one
+            first = new DirectCandidateGeneratorBuilder("aaa");
+            assertEquals(first, serializedCopy(first));
+            second = new DirectCandidateGeneratorBuilder("bbb");
+            assertNotEquals(first, second);
+            assertNotEquals(first.accuracy(0.1f), serializedCopy(first).accuracy(0.2f));
+            assertNotEquals(first.maxEdits(1), serializedCopy(first).maxEdits(2));
+            assertNotEquals(first.maxInspections(1), serializedCopy(first).maxInspections(2));
+            assertNotEquals(first.maxTermFreq(0.1f), serializedCopy(first).maxTermFreq(0.2f));
+            assertNotEquals(first.minDocFreq(0.1f), serializedCopy(first).minDocFreq(0.2f));
+            assertNotEquals(first.minWordLength(1), serializedCopy(first).minWordLength(2));
+            assertNotEquals(first.postFilter("postFilter"), serializedCopy(first).postFilter("postFilter_other"));
+            assertNotEquals(first.preFilter("preFilter"), serializedCopy(first).preFilter("preFilter_other"));
+            assertNotEquals(first.prefixLength(1), serializedCopy(first).prefixLength(2));
+            assertNotEquals(first.size(1), serializedCopy(first).size(2));
+            assertNotEquals(first.sort("score"), serializedCopy(first).sort("frequency"));
+            assertNotEquals(first.stringDistance("levenstein"), serializedCopy(first).sort("ngram"));
+            assertNotEquals(first.suggestMode("missing"), serializedCopy(first).suggestMode("always"));
+        }
+    }
+
+    /**
+     *  creates random candidate generator, renders it to xContent and back to new instance that should be equal to original
+     */
+    public void testFromXContent() throws IOException {
+        QueryParseContext context = new QueryParseContext(new IndicesQueriesRegistry(Settings.EMPTY, Collections.emptyMap()));
+        context.parseFieldMatcher(new ParseFieldMatcher(Settings.EMPTY));
+        for (int runs = 0; runs < NUMBER_OF_RUNS; runs++) {
+            DirectCandidateGeneratorBuilder generator = randomCandidateGenerator();
+            XContentBuilder builder = XContentFactory.contentBuilder(randomFrom(XContentType.values()));
+            if (randomBoolean()) {
+                builder.prettyPrint();
+            }
+            generator.toXContent(builder, ToXContent.EMPTY_PARAMS);
+
+            XContentParser parser = XContentHelper.createParser(builder.bytes());
+            context.reset(parser);
+            parser.nextToken();
+            DirectCandidateGeneratorBuilder secondGenerator = DirectCandidateGeneratorBuilder.PROTOTYPE
+                    .fromXContent(context);
+            assertNotSame(generator, secondGenerator);
+            assertEquals(generator, secondGenerator);
+            assertEquals(generator.hashCode(), secondGenerator.hashCode());
+        }
+    }
+
+    /**
+     * test that build() outputs a {@link DirectCandidateGenerator} that is similar to the one
+     * we would get when parsing the xContent the test generator is rendering out
+     */
+    public void testBuild() throws IOException {
+
+        long start = System.currentTimeMillis();
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings(randomAsciiOfLengthBetween(1, 10), Settings.EMPTY);
+
+        AnalysisService mockAnalysisService = new AnalysisService(idxSettings, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap()) {
+            @Override
+            public NamedAnalyzer analyzer(String name) {
+                return new NamedAnalyzer(name, new WhitespaceAnalyzer());
+            }
+        };
+
+        MapperService mockMapperService = new MapperService(idxSettings, mockAnalysisService , null, new IndicesModule().getMapperRegistry(), null) {
+            @Override
+            public MappedFieldType fullName(String fullName) {
+                return new StringFieldType();
+            }
+        };
+
+        QueryShardContext mockShardContext = new QueryShardContext(idxSettings, null, null, null, mockMapperService, null, null, null) {
+            @Override
+            public MappedFieldType fieldMapper(String name) {
+                StringFieldMapper.Builder builder = MapperBuilders.stringField(name);
+                return builder.build(new Mapper.BuilderContext(idxSettings.getSettings(), new ContentPath(1))).fieldType();
+            }
+        };
+        mockShardContext.setMapUnmappedFieldAsString(true);
+
+        for (int runs = 0; runs < NUMBER_OF_RUNS; runs++) {
+            DirectCandidateGeneratorBuilder generator = randomCandidateGenerator();
+            // first, build via DirectCandidateGenerator#build()
+            DirectCandidateGenerator contextGenerator = generator.build(mockShardContext);
+
+            // second, render random test generator to xContent and parse using
+            // PhraseSuggestParser
+            XContentBuilder builder = XContentFactory.contentBuilder(randomFrom(XContentType.values()));
+            if (randomBoolean()) {
+                builder.prettyPrint();
+            }
+            generator.toXContent(builder, ToXContent.EMPTY_PARAMS);
+            XContentParser parser = XContentHelper.createParser(builder.bytes());
+
+            DirectCandidateGenerator secondGenerator = PhraseSuggestParser.parseCandidateGenerator(parser,
+                    mockShardContext.getMapperService(), mockShardContext.parseFieldMatcher());
+
+            // compare their properties
+            assertNotSame(contextGenerator, secondGenerator);
+            assertEquals(contextGenerator.field(), secondGenerator.field());
+            assertEquals(contextGenerator.accuracy(), secondGenerator.accuracy(), Float.MIN_VALUE);
+            assertEquals(contextGenerator.maxTermFreq(), secondGenerator.maxTermFreq(), Float.MIN_VALUE);
+            assertEquals(contextGenerator.maxEdits(), secondGenerator.maxEdits());
+            assertEquals(contextGenerator.maxInspections(), secondGenerator.maxInspections());
+            assertEquals(contextGenerator.minDocFreq(), secondGenerator.minDocFreq(), Float.MIN_VALUE);
+            assertEquals(contextGenerator.minWordLength(), secondGenerator.minWordLength());
+            assertEquals(contextGenerator.postFilter(), secondGenerator.postFilter());
+            assertEquals(contextGenerator.prefixLength(), secondGenerator.prefixLength());
+            assertEquals(contextGenerator.preFilter(), secondGenerator.preFilter());
+            assertEquals(contextGenerator.sort(), secondGenerator.sort());
+            assertEquals(contextGenerator.size(), secondGenerator.size());
+            // some instances of StringDistance don't support equals, just checking the class here
+            assertEquals(contextGenerator.stringDistance().getClass(), secondGenerator.stringDistance().getClass());
+            assertEquals(contextGenerator.suggestMode(), secondGenerator.suggestMode());
+        }
+    }
+
+    /**
+     * test that bad xContent throws exception
+     */
+    public void testIllegalXContent() throws IOException {
+        QueryParseContext context = new QueryParseContext(new IndicesQueriesRegistry(Settings.EMPTY, Collections.emptyMap()));
+        context.parseFieldMatcher(new ParseFieldMatcher(Settings.EMPTY));
+
+        // test missing fieldname
+        String directGenerator = "{ }";
+        XContentParser parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
+
+        context.reset(parser);
+        try {
+            DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
+            fail("expected an exception");
+        } catch (IllegalArgumentException e) {
+            assertEquals("[direct_generator] expects exactly one field parameter, but found []", e.getMessage());
+        }
+
+        // test two fieldnames
+        directGenerator = "{ \"field\" : \"f1\", \"field\" : \"f2\" }";
+        parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
+
+        context.reset(parser);
+        try {
+            DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
+            fail("expected an exception");
+        } catch (IllegalArgumentException e) {
+            assertEquals("[direct_generator] expects exactly one field parameter, but found [f2, f1]", e.getMessage());
+        }
+
+        // test unknown field
+        directGenerator = "{ \"unknown_param\" : \"f1\" }";
+        parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
+
+        context.reset(parser);
+        try {
+            DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
+            fail("expected an exception");
+        } catch (IllegalArgumentException e) {
+            assertEquals("[direct_generator] unknown field [unknown_param], parser not found", e.getMessage());
+        }
+
+        // test bad value for field (e.g. size expects an int)
+        directGenerator = "{ \"size\" : \"xxl\" }";
+        parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
+
+        context.reset(parser);
+        try {
+            DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
+            fail("expected an exception");
+        } catch (ParsingException e) {
+            assertEquals("[direct_generator] failed to parse field [size]", e.getMessage());
+        }
+
+        // test unexpected token
+        directGenerator = "{ \"size\" : [ \"xxl\" ] }";
+        parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
+
+        context.reset(parser);
+        try {
+            DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
+            fail("expected an exception");
+        } catch (IllegalArgumentException e) {
+            assertEquals("[direct_generator] size doesn't support values of type: START_ARRAY", e.getMessage());
+        }
+    }
+
+    /**
+     * create random {@link DirectCandidateGeneratorBuilder}
+     */
+    public static DirectCandidateGeneratorBuilder randomCandidateGenerator() {
+        DirectCandidateGeneratorBuilder generator = new DirectCandidateGeneratorBuilder(randomAsciiOfLength(10));
+        maybeSet(generator::accuracy, randomFloat());
+        maybeSet(generator::maxEdits, randomIntBetween(1, 2));
+        maybeSet(generator::maxInspections, randomIntBetween(1, 20));
+        maybeSet(generator::maxTermFreq, randomFloat());
+        maybeSet(generator::minDocFreq, randomFloat());
+        maybeSet(generator::minWordLength, randomIntBetween(1, 20));
+        maybeSet(generator::prefixLength, randomIntBetween(1, 20));
+        maybeSet(generator::preFilter, randomAsciiOfLengthBetween(1, 20));
+        maybeSet(generator::postFilter, randomAsciiOfLengthBetween(1, 20));
+        maybeSet(generator::size, randomIntBetween(1, 20));
+        maybeSet(generator::sort, randomFrom(Arrays.asList(new String[]{ "score", "frequency" })));
+        maybeSet(generator::stringDistance, randomFrom(Arrays.asList(new String[]{ "internal", "damerau_levenshtein", "levenstein", "jarowinkler", "ngram"})));
+        maybeSet(generator::suggestMode, randomFrom(Arrays.asList(new String[]{ "missing", "popular", "always"})));
+        return generator;
+    }
+
+    private static <T> void maybeSet(Consumer<T> consumer, T value) {
+         if (randomBoolean()) {
+             consumer.accept(value);
+         }
+    }
+
+    private static DirectCandidateGeneratorBuilder serializedCopy(DirectCandidateGeneratorBuilder original) throws IOException {
+        try (BytesStreamOutput output = new BytesStreamOutput()) {
+            original.writeTo(output);
+            try (StreamInput in = StreamInput.wrap(output.bytes())) {
+                return DirectCandidateGeneratorBuilder.PROTOTYPE.readFrom(in);
+            }
+        }
+    }
+}
diff --git a/docs/reference/migration/migrate_3_0.asciidoc b/docs/reference/migration/migrate_3_0.asciidoc
index 78f8ff40307..c76dec77399 100644
--- a/docs/reference/migration/migrate_3_0.asciidoc
+++ b/docs/reference/migration/migrate_3_0.asciidoc
@@ -578,6 +578,10 @@ to index a document only if it doesn't already exist.
 
 `RecoreBuilder.Rescorer` was merged with `RescoreBuilder`, which now is an abstract superclass. QueryRescoreBuilder currently is its only implementation.
 
+==== PhraseSuggestionBuilder
+
+The inner DirectCandidateGenerator class has been moved out to its own class called DirectCandidateGeneratorBuilder.
+
 [[breaking_30_cache_concurrency]]
 === Cache concurrency level settings removed
 
diff --git a/modules/lang-mustache/src/test/java/org/elasticsearch/messy/tests/SuggestSearchTests.java b/modules/lang-mustache/src/test/java/org/elasticsearch/messy/tests/SuggestSearchTests.java
index 4fd83f9a850..9f00f58ae99 100644
--- a/modules/lang-mustache/src/test/java/org/elasticsearch/messy/tests/SuggestSearchTests.java
+++ b/modules/lang-mustache/src/test/java/org/elasticsearch/messy/tests/SuggestSearchTests.java
@@ -20,43 +20,6 @@
 package org.elasticsearch.messy.tests;
 
 
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
-import org.elasticsearch.action.index.IndexRequestBuilder;
-import org.elasticsearch.action.search.ReduceSearchPhaseException;
-import org.elasticsearch.action.search.SearchPhaseExecutionException;
-import org.elasticsearch.action.search.SearchRequestBuilder;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.action.search.ShardSearchFailure;
-import org.elasticsearch.action.suggest.SuggestRequestBuilder;
-import org.elasticsearch.action.suggest.SuggestResponse;
-import org.elasticsearch.common.io.PathUtils;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentFactory;
-import org.elasticsearch.plugins.Plugin;
-import org.elasticsearch.script.mustache.MustachePlugin;
-import org.elasticsearch.search.suggest.Suggest;
-import org.elasticsearch.search.suggest.SuggestBuilder;
-import org.elasticsearch.search.suggest.SuggestBuilder.SuggestionBuilder;
-import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
-import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.DirectCandidateGenerator;
-import org.elasticsearch.search.suggest.term.TermSuggestionBuilder;
-import org.elasticsearch.test.ESIntegTestCase;
-import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
-
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-
 import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
 import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
 import static org.elasticsearch.common.settings.Settings.settingsBuilder;
@@ -76,6 +39,43 @@ import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.nullValue;
 
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.action.search.ReduceSearchPhaseException;
+import org.elasticsearch.action.search.SearchPhaseExecutionException;
+import org.elasticsearch.action.search.SearchRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.search.ShardSearchFailure;
+import org.elasticsearch.action.suggest.SuggestRequestBuilder;
+import org.elasticsearch.action.suggest.SuggestResponse;
+import org.elasticsearch.common.io.PathUtils;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.script.mustache.MustachePlugin;
+import org.elasticsearch.search.suggest.Suggest;
+import org.elasticsearch.search.suggest.SuggestBuilder;
+import org.elasticsearch.search.suggest.SuggestBuilder.SuggestionBuilder;
+import org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder;
+import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
+import org.elasticsearch.search.suggest.term.TermSuggestionBuilder;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
+
 /**
  * Integration tests for term and phrase suggestions.  Many of these tests many requests that vary only slightly from one another.  Where
  * possible these tests should declare for the first request, make the request, modify the configuration for the next request, make that
@@ -213,7 +213,7 @@ public class SuggestSearchTests extends ESIntegTestCase {
         index("test", "type1", "3", "name", "I like ice cream.");
         refresh();
 
-        DirectCandidateGenerator generator = candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2);
+        DirectCandidateGeneratorBuilder generator = candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2);
         PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("did_you_mean").field("name.shingled")
                 .addCandidateGenerator(generator)
                 .gramSize(3);