Merge pull request #16185 from cbuescher/ref-DirectCandidateGenerator
As a prerequisite for refactoring the whole PhraseSuggestionBuilder to be able to be parsed and streamed from the coordinating node, the DirectCandidateGenerator must implement Writeable, be able to parse a new instance (fromXContent()) and later when transported to the shard to generate a PhraseSuggestionContext.DirectCandidateGenerator. Also adding equals/hashCode and tests and moving DirectCandidateGenerator to its own DirectCandidateGeneratorBuilder class.
This commit is contained in:
commit
220bf7bd4b
|
@ -0,0 +1,493 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.CandidateGenerator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
public final class DirectCandidateGeneratorBuilder
|
||||
implements Writeable<DirectCandidateGeneratorBuilder>, CandidateGenerator {
|
||||
|
||||
private static final String TYPE = "direct_generator";
|
||||
static final DirectCandidateGeneratorBuilder PROTOTYPE = new DirectCandidateGeneratorBuilder("_na_");
|
||||
|
||||
static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE);
|
||||
static final ParseField FIELDNAME_FIELD = new ParseField("field");
|
||||
static final ParseField PREFILTER_FIELD = new ParseField("pre_filter");
|
||||
static final ParseField POSTFILTER_FIELD = new ParseField("post_filter");
|
||||
static final ParseField SUGGESTMODE_FIELD = new ParseField("suggest_mode");
|
||||
static final ParseField MIN_DOC_FREQ_FIELD = new ParseField("min_doc_freq");
|
||||
static final ParseField ACCURACY_FIELD = new ParseField("accuracy");
|
||||
static final ParseField SIZE_FIELD = new ParseField("size");
|
||||
static final ParseField SORT_FIELD = new ParseField("sort");
|
||||
static final ParseField STRING_DISTANCE_FIELD = new ParseField("string_distance");
|
||||
static final ParseField MAX_EDITS_FIELD = new ParseField("max_edits");
|
||||
static final ParseField MAX_INSPECTIONS_FIELD = new ParseField("max_inspections");
|
||||
static final ParseField MAX_TERM_FREQ_FIELD = new ParseField("max_term_freq");
|
||||
static final ParseField PREFIX_LENGTH_FIELD = new ParseField("prefix_length");
|
||||
static final ParseField MIN_WORD_LENGTH_FIELD = new ParseField("min_word_length");
|
||||
|
||||
private final String field;
|
||||
private String preFilter;
|
||||
private String postFilter;
|
||||
private String suggestMode;
|
||||
private Float accuracy;
|
||||
private Integer size;
|
||||
private String sort;
|
||||
private String stringDistance;
|
||||
private Integer maxEdits;
|
||||
private Integer maxInspections;
|
||||
private Float maxTermFreq;
|
||||
private Integer prefixLength;
|
||||
private Integer minWordLength;
|
||||
private Float minDocFreq;
|
||||
|
||||
/**
|
||||
* @param field Sets from what field to fetch the candidate suggestions from.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Quasi copy-constructor that takes all values from the generator
|
||||
* passed in, but uses different field name. Needed by parser because we
|
||||
* need to buffer the field name but read all other properties to a
|
||||
* temporary object.
|
||||
*/
|
||||
private static DirectCandidateGeneratorBuilder replaceField(String field, DirectCandidateGeneratorBuilder other) {
|
||||
DirectCandidateGeneratorBuilder generator = new DirectCandidateGeneratorBuilder(field);
|
||||
generator.preFilter = other.preFilter;
|
||||
generator.postFilter = other.postFilter;
|
||||
generator.suggestMode = other.suggestMode;
|
||||
generator.accuracy = other.accuracy;
|
||||
generator.size = other.size;
|
||||
generator.sort = other.sort;
|
||||
generator.stringDistance = other.stringDistance;
|
||||
generator.maxEdits = other.maxEdits;
|
||||
generator.maxInspections = other.maxInspections;
|
||||
generator.maxTermFreq = other.maxTermFreq;
|
||||
generator.prefixLength = other.prefixLength;
|
||||
generator.minWordLength = other.minWordLength;
|
||||
generator.minDocFreq = other.minDocFreq;
|
||||
return generator;
|
||||
}
|
||||
|
||||
/**
|
||||
* The global suggest mode controls what suggested terms are included or
|
||||
* controls for what suggest text tokens, terms should be suggested for.
|
||||
* Three possible values can be specified:
|
||||
* <ol>
|
||||
* <li><code>missing</code> - Only suggest terms in the suggest text
|
||||
* that aren't in the index. This is the default.
|
||||
* <li><code>popular</code> - Only suggest terms that occur in more docs
|
||||
* then the original suggest text term.
|
||||
* <li><code>always</code> - Suggest any matching suggest terms based on
|
||||
* tokens in the suggest text.
|
||||
* </ol>
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder suggestMode(String suggestMode) {
|
||||
this.suggestMode = suggestMode;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets how similar the suggested terms at least need to be compared to
|
||||
* the original suggest text tokens. A value between 0 and 1 can be
|
||||
* specified. This value will be compared to the string distance result
|
||||
* of each candidate spelling correction.
|
||||
* <p>
|
||||
* Default is <tt>0.5</tt>
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder accuracy(float accuracy) {
|
||||
this.accuracy = accuracy;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum suggestions to be returned per suggest text term.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder size(int size) {
|
||||
if (size <= 0) {
|
||||
throw new IllegalArgumentException("Size must be positive");
|
||||
}
|
||||
this.size = size;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets how to sort the suggest terms per suggest text token. Two
|
||||
* possible values:
|
||||
* <ol>
|
||||
* <li><code>score</code> - Sort should first be based on score, then
|
||||
* document frequency and then the term itself.
|
||||
* <li><code>frequency</code> - Sort should first be based on document
|
||||
* frequency, then score and then the term itself.
|
||||
* </ol>
|
||||
* <p>
|
||||
* What the score is depends on the suggester being used.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder sort(String sort) {
|
||||
this.sort = sort;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets what string distance implementation to use for comparing how
|
||||
* similar suggested terms are. Four possible values can be specified:
|
||||
* <ol>
|
||||
* <li><code>internal</code> - This is the default and is based on
|
||||
* <code>damerau_levenshtein</code>, but highly optimized for comparing
|
||||
* string distance for terms inside the index.
|
||||
* <li><code>damerau_levenshtein</code> - String distance algorithm
|
||||
* based on Damerau-Levenshtein algorithm.
|
||||
* <li><code>levenstein</code> - String distance algorithm based on
|
||||
* Levenstein edit distance algorithm.
|
||||
* <li><code>jarowinkler</code> - String distance algorithm based on
|
||||
* Jaro-Winkler algorithm.
|
||||
* <li><code>ngram</code> - String distance algorithm based on character
|
||||
* n-grams.
|
||||
* </ol>
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder stringDistance(String stringDistance) {
|
||||
this.stringDistance = stringDistance;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum edit distance candidate suggestions can have in
|
||||
* order to be considered as a suggestion. Can only be a value between 1
|
||||
* and 2. Any other value result in an bad request error being thrown.
|
||||
* Defaults to <tt>2</tt>.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder maxEdits(Integer maxEdits) {
|
||||
if (maxEdits < 1 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
throw new IllegalArgumentException("Illegal max_edits value " + maxEdits);
|
||||
}
|
||||
this.maxEdits = maxEdits;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* A factor that is used to multiply with the size in order to inspect
|
||||
* more candidate suggestions. Can improve accuracy at the cost of
|
||||
* performance. Defaults to <tt>5</tt>.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder maxInspections(Integer maxInspections) {
|
||||
this.maxInspections = maxInspections;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a maximum threshold in number of documents a suggest text token
|
||||
* can exist in order to be corrected. Can be a relative percentage
|
||||
* number (e.g 0.4) or an absolute number to represent document
|
||||
* frequencies. If an value higher than 1 is specified then fractional
|
||||
* can not be specified. Defaults to <tt>0.01</tt>.
|
||||
* <p>
|
||||
* This can be used to exclude high frequency terms from being
|
||||
* suggested. High frequency terms are usually spelled correctly on top
|
||||
* of this this also improves the suggest performance.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder maxTermFreq(float maxTermFreq) {
|
||||
this.maxTermFreq = maxTermFreq;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of minimal prefix characters that must match in order
|
||||
* be a candidate suggestion. Defaults to 1. Increasing this number
|
||||
* improves suggest performance. Usually misspellings don't occur in the
|
||||
* beginning of terms.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder prefixLength(int prefixLength) {
|
||||
this.prefixLength = prefixLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The minimum length a suggest text term must have in order to be
|
||||
* corrected. Defaults to <tt>4</tt>.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder minWordLength(int minWordLength) {
|
||||
this.minWordLength = minWordLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a minimal threshold in number of documents a suggested term
|
||||
* should appear in. This can be specified as an absolute number or as a
|
||||
* relative percentage of number of documents. This can improve quality
|
||||
* by only suggesting high frequency terms. Defaults to 0f and is not
|
||||
* enabled. If a value higher than 1 is specified then the number cannot
|
||||
* be fractional.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder minDocFreq(float minDocFreq) {
|
||||
this.minDocFreq = minDocFreq;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a filter (analyzer) that is applied to each of the tokens passed to this candidate generator.
|
||||
* This filter is applied to the original token before candidates are generated.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder preFilter(String preFilter) {
|
||||
this.preFilter = preFilter;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a filter (analyzer) that is applied to each of the generated tokens
|
||||
* before they are passed to the actual phrase scorer.
|
||||
*/
|
||||
public DirectCandidateGeneratorBuilder postFilter(String postFilter) {
|
||||
this.postFilter = postFilter;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the type identifier of this {@link CandidateGenerator}
|
||||
*/
|
||||
@Override
|
||||
public String getType() {
|
||||
return TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
outputFieldIfNotNull(field, FIELDNAME_FIELD, builder);
|
||||
outputFieldIfNotNull(accuracy, ACCURACY_FIELD, builder);
|
||||
outputFieldIfNotNull(maxEdits, MAX_EDITS_FIELD, builder);
|
||||
outputFieldIfNotNull(maxInspections, MAX_INSPECTIONS_FIELD, builder);
|
||||
outputFieldIfNotNull(maxTermFreq, MAX_TERM_FREQ_FIELD, builder);
|
||||
outputFieldIfNotNull(minWordLength, MIN_WORD_LENGTH_FIELD, builder);
|
||||
outputFieldIfNotNull(minDocFreq, MIN_DOC_FREQ_FIELD, builder);
|
||||
outputFieldIfNotNull(preFilter, PREFILTER_FIELD, builder);
|
||||
outputFieldIfNotNull(prefixLength, PREFIX_LENGTH_FIELD, builder);
|
||||
outputFieldIfNotNull(postFilter, POSTFILTER_FIELD, builder);
|
||||
outputFieldIfNotNull(suggestMode, SUGGESTMODE_FIELD, builder);
|
||||
outputFieldIfNotNull(size, SIZE_FIELD, builder);
|
||||
outputFieldIfNotNull(sort, SORT_FIELD, builder);
|
||||
outputFieldIfNotNull(stringDistance, STRING_DISTANCE_FIELD, builder);
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
private static <T> void outputFieldIfNotNull(T value, ParseField field, XContentBuilder builder) throws IOException {
|
||||
if (value != null) {
|
||||
builder.field(field.getPreferredName(), value);
|
||||
}
|
||||
}
|
||||
|
||||
private static ObjectParser<Tuple<Set<String>, DirectCandidateGeneratorBuilder>, QueryParseContext> PARSER = new ObjectParser<>(TYPE);
|
||||
|
||||
static {
|
||||
PARSER.declareString((tp, s) -> tp.v1().add(s), FIELDNAME_FIELD);
|
||||
PARSER.declareString((tp, s) -> tp.v2().preFilter(s), PREFILTER_FIELD);
|
||||
PARSER.declareString((tp, s) -> tp.v2().postFilter(s), POSTFILTER_FIELD);
|
||||
PARSER.declareString((tp, s) -> tp.v2().suggestMode(s), SUGGESTMODE_FIELD);
|
||||
PARSER.declareFloat((tp, f) -> tp.v2().minDocFreq(f), MIN_DOC_FREQ_FIELD);
|
||||
PARSER.declareFloat((tp, f) -> tp.v2().accuracy(f), ACCURACY_FIELD);
|
||||
PARSER.declareInt((tp, i) -> tp.v2().size(i), SIZE_FIELD);
|
||||
PARSER.declareString((tp, s) -> tp.v2().sort(s), SORT_FIELD);
|
||||
PARSER.declareString((tp, s) -> tp.v2().stringDistance(s), STRING_DISTANCE_FIELD);
|
||||
PARSER.declareInt((tp, i) -> tp.v2().maxInspections(i), MAX_INSPECTIONS_FIELD);
|
||||
PARSER.declareFloat((tp, f) -> tp.v2().maxTermFreq(f), MAX_TERM_FREQ_FIELD);
|
||||
PARSER.declareInt((tp, i) -> tp.v2().maxEdits(i), MAX_EDITS_FIELD);
|
||||
PARSER.declareInt((tp, i) -> tp.v2().minWordLength(i), MIN_WORD_LENGTH_FIELD);
|
||||
PARSER.declareInt((tp, i) -> tp.v2().prefixLength(i), PREFIX_LENGTH_FIELD);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DirectCandidateGeneratorBuilder fromXContent(QueryParseContext parseContext) throws IOException {
|
||||
DirectCandidateGeneratorBuilder tempGenerator = new DirectCandidateGeneratorBuilder("_na_");
|
||||
Set<String> tmpFieldName = new HashSet<>(1); // bucket for the field
|
||||
// name, needed as
|
||||
// constructor arg
|
||||
// later
|
||||
PARSER.parse(parseContext.parser(),
|
||||
new Tuple<Set<String>, DirectCandidateGeneratorBuilder>(tmpFieldName, tempGenerator));
|
||||
if (tmpFieldName.size() != 1) {
|
||||
throw new IllegalArgumentException("[" + TYPE + "] expects exactly one field parameter, but found " + tmpFieldName);
|
||||
}
|
||||
return replaceField(tmpFieldName.iterator().next(), tempGenerator);
|
||||
}
|
||||
|
||||
public PhraseSuggestionContext.DirectCandidateGenerator build(QueryShardContext context) throws IOException {
|
||||
MapperService mapperService = context.getMapperService();
|
||||
PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
|
||||
generator.setField(this.field);
|
||||
transferIfNotNull(this.size, generator::size);
|
||||
if (this.preFilter != null) {
|
||||
generator.preFilter(mapperService.analysisService().analyzer(this.preFilter));
|
||||
if (generator.preFilter() == null) {
|
||||
throw new IllegalArgumentException("Analyzer [" + this.preFilter + "] doesn't exists");
|
||||
}
|
||||
}
|
||||
if (this.postFilter != null) {
|
||||
generator.postFilter(mapperService.analysisService().analyzer(this.postFilter));
|
||||
if (generator.postFilter() == null) {
|
||||
throw new IllegalArgumentException("Analyzer [" + this.postFilter + "] doesn't exists");
|
||||
}
|
||||
}
|
||||
transferIfNotNull(this.accuracy, generator::accuracy);
|
||||
if (this.suggestMode != null) {
|
||||
generator.suggestMode(SuggestUtils.resolveSuggestMode(this.suggestMode));
|
||||
}
|
||||
if (this.sort != null) {
|
||||
generator.sort(SuggestUtils.resolveSort(this.sort));
|
||||
}
|
||||
if (this.stringDistance != null) {
|
||||
generator.stringDistance(SuggestUtils.resolveDistance(this.stringDistance));
|
||||
}
|
||||
transferIfNotNull(this.maxEdits, generator::maxEdits);
|
||||
if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
throw new IllegalArgumentException("Illegal max_edits value " + generator.maxEdits());
|
||||
}
|
||||
transferIfNotNull(this.maxInspections, generator::maxInspections);
|
||||
transferIfNotNull(this.maxTermFreq, generator::maxTermFreq);
|
||||
transferIfNotNull(this.prefixLength, generator::prefixLength);
|
||||
transferIfNotNull(this.minWordLength, generator::minQueryLength);
|
||||
transferIfNotNull(this.minDocFreq, generator::minDocFreq);
|
||||
return generator;
|
||||
}
|
||||
|
||||
private static <T> void transferIfNotNull(T value, Consumer<T> consumer) {
|
||||
if (value != null) {
|
||||
consumer.accept(value);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String toString() {
|
||||
try {
|
||||
XContentBuilder builder = XContentFactory.jsonBuilder();
|
||||
builder.prettyPrint();
|
||||
toXContent(builder, EMPTY_PARAMS);
|
||||
return builder.string();
|
||||
} catch (Exception e) {
|
||||
return "{ \"error\" : \"" + ExceptionsHelper.detailedMessage(e) + "\"}";
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DirectCandidateGeneratorBuilder readFrom(StreamInput in) throws IOException {
|
||||
DirectCandidateGeneratorBuilder cg = new DirectCandidateGeneratorBuilder(in.readString());
|
||||
cg.suggestMode = in.readOptionalString();
|
||||
if (in.readBoolean()) {
|
||||
cg.accuracy = in.readFloat();
|
||||
}
|
||||
cg.size = in.readOptionalVInt();
|
||||
cg.sort = in.readOptionalString();
|
||||
cg.stringDistance = in.readOptionalString();
|
||||
cg.maxEdits = in.readOptionalVInt();
|
||||
cg.maxInspections = in.readOptionalVInt();
|
||||
if (in.readBoolean()) {
|
||||
cg.maxTermFreq = in.readFloat();
|
||||
}
|
||||
cg.prefixLength = in.readOptionalVInt();
|
||||
cg.minWordLength = in.readOptionalVInt();
|
||||
if (in.readBoolean()) {
|
||||
cg.minDocFreq = in.readFloat();
|
||||
}
|
||||
cg.preFilter = in.readOptionalString();
|
||||
cg.postFilter = in.readOptionalString();
|
||||
return cg;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(field);
|
||||
out.writeOptionalString(suggestMode);
|
||||
out.writeBoolean(accuracy != null);
|
||||
if (accuracy != null) {
|
||||
out.writeFloat(accuracy);
|
||||
}
|
||||
out.writeOptionalVInt(size);
|
||||
out.writeOptionalString(sort);
|
||||
out.writeOptionalString(stringDistance);
|
||||
out.writeOptionalVInt(maxEdits);
|
||||
out.writeOptionalVInt(maxInspections);
|
||||
out.writeBoolean(maxTermFreq != null);
|
||||
if (maxTermFreq != null) {
|
||||
out.writeFloat(maxTermFreq);
|
||||
}
|
||||
out.writeOptionalVInt(prefixLength);
|
||||
out.writeOptionalVInt(minWordLength);
|
||||
out.writeBoolean(minDocFreq != null);
|
||||
if (minDocFreq != null) {
|
||||
out.writeFloat(minDocFreq);
|
||||
}
|
||||
out.writeOptionalString(preFilter);
|
||||
out.writeOptionalString(postFilter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
return Objects.hash(field, preFilter, postFilter, suggestMode, accuracy,
|
||||
size, sort, stringDistance, maxEdits, maxInspections,
|
||||
maxTermFreq, prefixLength, minWordLength, minDocFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null || getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
DirectCandidateGeneratorBuilder other = (DirectCandidateGeneratorBuilder) obj;
|
||||
return Objects.equals(field, other.field) &&
|
||||
Objects.equals(preFilter, other.preFilter) &&
|
||||
Objects.equals(postFilter, other.postFilter) &&
|
||||
Objects.equals(suggestMode, other.suggestMode) &&
|
||||
Objects.equals(accuracy, other.accuracy) &&
|
||||
Objects.equals(size, other.size) &&
|
||||
Objects.equals(sort, other.sort) &&
|
||||
Objects.equals(stringDistance, other.stringDistance) &&
|
||||
Objects.equals(maxEdits, other.maxEdits) &&
|
||||
Objects.equals(maxInspections, other.maxInspections) &&
|
||||
Objects.equals(maxTermFreq, other.maxTermFreq) &&
|
||||
Objects.equals(prefixLength, other.prefixLength) &&
|
||||
Objects.equals(minWordLength, other.minWordLength) &&
|
||||
Objects.equals(minDocFreq, other.minDocFreq);
|
||||
}
|
||||
}
|
|
@ -98,18 +98,10 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
|||
}
|
||||
}
|
||||
} else if (token == Token.START_ARRAY) {
|
||||
if ("direct_generator".equals(fieldName) || "directGenerator".equals(fieldName)) {
|
||||
if (parseFieldMatcher.match(fieldName, DirectCandidateGeneratorBuilder.DIRECT_GENERATOR_FIELD)) {
|
||||
// for now we only have a single type of generators
|
||||
while ((token = parser.nextToken()) == Token.START_OBJECT) {
|
||||
PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
|
||||
while ((token = parser.nextToken()) != Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
fieldName = parser.currentName();
|
||||
}
|
||||
if (token.isValue()) {
|
||||
parseCandidateGenerator(parser, mapperService, fieldName, generator, parseFieldMatcher);
|
||||
}
|
||||
}
|
||||
PhraseSuggestionContext.DirectCandidateGenerator generator = parseCandidateGenerator(parser, mapperService, parseFieldMatcher);
|
||||
verifyGenerator(generator);
|
||||
suggestion.addGenerator(generator);
|
||||
}
|
||||
|
@ -323,34 +315,44 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
|||
}
|
||||
}
|
||||
|
||||
private void parseCandidateGenerator(XContentParser parser, MapperService mapperService, String fieldName,
|
||||
PhraseSuggestionContext.DirectCandidateGenerator generator, ParseFieldMatcher parseFieldMatcher) throws IOException {
|
||||
if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator, parseFieldMatcher)) {
|
||||
if ("field".equals(fieldName)) {
|
||||
generator.setField(parser.text());
|
||||
if (mapperService.fullName(generator.field()) == null) {
|
||||
throw new IllegalArgumentException("No mapping found for field [" + generator.field() + "]");
|
||||
static PhraseSuggestionContext.DirectCandidateGenerator parseCandidateGenerator(XContentParser parser, MapperService mapperService,
|
||||
ParseFieldMatcher parseFieldMatcher) throws IOException {
|
||||
XContentParser.Token token;
|
||||
String fieldName = null;
|
||||
PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
|
||||
while ((token = parser.nextToken()) != Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
fieldName = parser.currentName();
|
||||
}
|
||||
if (token.isValue()) {
|
||||
if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator, parseFieldMatcher)) {
|
||||
if ("field".equals(fieldName)) {
|
||||
generator.setField(parser.text());
|
||||
if (mapperService.fullName(generator.field()) == null) {
|
||||
throw new IllegalArgumentException("No mapping found for field [" + generator.field() + "]");
|
||||
}
|
||||
} else if ("size".equals(fieldName)) {
|
||||
generator.size(parser.intValue());
|
||||
} else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {
|
||||
String analyzerName = parser.text();
|
||||
Analyzer analyzer = mapperService.analysisService().analyzer(analyzerName);
|
||||
if (analyzer == null) {
|
||||
throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
|
||||
}
|
||||
generator.preFilter(analyzer);
|
||||
} else if ("post_filter".equals(fieldName) || "postFilter".equals(fieldName)) {
|
||||
String analyzerName = parser.text();
|
||||
Analyzer analyzer = mapperService.analysisService().analyzer(analyzerName);
|
||||
if (analyzer == null) {
|
||||
throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
|
||||
}
|
||||
generator.postFilter(analyzer);
|
||||
} else {
|
||||
throw new IllegalArgumentException("CandidateGenerator doesn't support [" + fieldName + "]");
|
||||
}
|
||||
}
|
||||
} else if ("size".equals(fieldName)) {
|
||||
generator.size(parser.intValue());
|
||||
} else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {
|
||||
String analyzerName = parser.text();
|
||||
Analyzer analyzer = mapperService.analysisService().analyzer(analyzerName);
|
||||
if (analyzer == null) {
|
||||
throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
|
||||
}
|
||||
generator.preFilter(analyzer);
|
||||
} else if ("post_filter".equals(fieldName) || "postFilter".equals(fieldName)) {
|
||||
String analyzerName = parser.text();
|
||||
Analyzer analyzer = mapperService.analysisService().analyzer(analyzerName);
|
||||
if (analyzer == null) {
|
||||
throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
|
||||
}
|
||||
generator.postFilter(analyzer);
|
||||
} else {
|
||||
throw new IllegalArgumentException("CandidateGenerator doesn't support [" + fieldName + "]");
|
||||
}
|
||||
}
|
||||
return generator;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -278,13 +278,13 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link DirectCandidateGenerator}
|
||||
* Creates a new {@link DirectCandidateGeneratorBuilder}
|
||||
*
|
||||
* @param field
|
||||
* the field this candidate generator operates on.
|
||||
*/
|
||||
public static DirectCandidateGenerator candidateGenerator(String field) {
|
||||
return new DirectCandidateGenerator(field);
|
||||
public static DirectCandidateGeneratorBuilder candidateGenerator(String field) {
|
||||
return new DirectCandidateGeneratorBuilder(field);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -644,267 +644,11 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
|
|||
}
|
||||
|
||||
/**
|
||||
* {@link CandidateGenerator} base class.
|
||||
* {@link CandidateGenerator} interface.
|
||||
*/
|
||||
public static abstract class CandidateGenerator implements ToXContent {
|
||||
private final String type;
|
||||
|
||||
public CandidateGenerator(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
public interface CandidateGenerator extends ToXContent {
|
||||
String getType();
|
||||
|
||||
CandidateGenerator fromXContent(QueryParseContext parseContext) throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
*/
|
||||
public static final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
private final String field;
|
||||
private String preFilter;
|
||||
private String postFilter;
|
||||
private String suggestMode;
|
||||
private Float accuracy;
|
||||
private Integer size;
|
||||
private String sort;
|
||||
private String stringDistance;
|
||||
private Integer maxEdits;
|
||||
private Integer maxInspections;
|
||||
private Float maxTermFreq;
|
||||
private Integer prefixLength;
|
||||
private Integer minWordLength;
|
||||
private Float minDocFreq;
|
||||
|
||||
/**
|
||||
* @param field Sets from what field to fetch the candidate suggestions from.
|
||||
*/
|
||||
public DirectCandidateGenerator(String field) {
|
||||
super("direct_generator");
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/**
|
||||
* The global suggest mode controls what suggested terms are included or
|
||||
* controls for what suggest text tokens, terms should be suggested for.
|
||||
* Three possible values can be specified:
|
||||
* <ol>
|
||||
* <li><code>missing</code> - Only suggest terms in the suggest text
|
||||
* that aren't in the index. This is the default.
|
||||
* <li><code>popular</code> - Only suggest terms that occur in more docs
|
||||
* then the original suggest text term.
|
||||
* <li><code>always</code> - Suggest any matching suggest terms based on
|
||||
* tokens in the suggest text.
|
||||
* </ol>
|
||||
*/
|
||||
public DirectCandidateGenerator suggestMode(String suggestMode) {
|
||||
this.suggestMode = suggestMode;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets how similar the suggested terms at least need to be compared to
|
||||
* the original suggest text tokens. A value between 0 and 1 can be
|
||||
* specified. This value will be compared to the string distance result
|
||||
* of each candidate spelling correction.
|
||||
* <p>
|
||||
* Default is <tt>0.5</tt>
|
||||
*/
|
||||
public DirectCandidateGenerator accuracy(float accuracy) {
|
||||
this.accuracy = accuracy;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum suggestions to be returned per suggest text term.
|
||||
*/
|
||||
public DirectCandidateGenerator size(int size) {
|
||||
if (size <= 0) {
|
||||
throw new IllegalArgumentException("Size must be positive");
|
||||
}
|
||||
this.size = size;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets how to sort the suggest terms per suggest text token. Two
|
||||
* possible values:
|
||||
* <ol>
|
||||
* <li><code>score</code> - Sort should first be based on score, then
|
||||
* document frequency and then the term itself.
|
||||
* <li><code>frequency</code> - Sort should first be based on document
|
||||
* frequency, then scotr and then the term itself.
|
||||
* </ol>
|
||||
* <p>
|
||||
* What the score is depends on the suggester being used.
|
||||
*/
|
||||
public DirectCandidateGenerator sort(String sort) {
|
||||
this.sort = sort;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets what string distance implementation to use for comparing how
|
||||
* similar suggested terms are. Four possible values can be specified:
|
||||
* <ol>
|
||||
* <li><code>internal</code> - This is the default and is based on
|
||||
* <code>damerau_levenshtein</code>, but highly optimized for comparing
|
||||
* string distance for terms inside the index.
|
||||
* <li><code>damerau_levenshtein</code> - String distance algorithm
|
||||
* based on Damerau-Levenshtein algorithm.
|
||||
* <li><code>levenstein</code> - String distance algorithm based on
|
||||
* Levenstein edit distance algorithm.
|
||||
* <li><code>jarowinkler</code> - String distance algorithm based on
|
||||
* Jaro-Winkler algorithm.
|
||||
* <li><code>ngram</code> - String distance algorithm based on character
|
||||
* n-grams.
|
||||
* </ol>
|
||||
*/
|
||||
public DirectCandidateGenerator stringDistance(String stringDistance) {
|
||||
this.stringDistance = stringDistance;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum edit distance candidate suggestions can have in
|
||||
* order to be considered as a suggestion. Can only be a value between 1
|
||||
* and 2. Any other value result in an bad request error being thrown.
|
||||
* Defaults to <tt>2</tt>.
|
||||
*/
|
||||
public DirectCandidateGenerator maxEdits(Integer maxEdits) {
|
||||
this.maxEdits = maxEdits;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* A factor that is used to multiply with the size in order to inspect
|
||||
* more candidate suggestions. Can improve accuracy at the cost of
|
||||
* performance. Defaults to <tt>5</tt>.
|
||||
*/
|
||||
public DirectCandidateGenerator maxInspections(Integer maxInspections) {
|
||||
this.maxInspections = maxInspections;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a maximum threshold in number of documents a suggest text token
|
||||
* can exist in order to be corrected. Can be a relative percentage
|
||||
* number (e.g 0.4) or an absolute number to represent document
|
||||
* frequencies. If an value higher than 1 is specified then fractional
|
||||
* can not be specified. Defaults to <tt>0.01</tt>.
|
||||
* <p>
|
||||
* This can be used to exclude high frequency terms from being
|
||||
* suggested. High frequency terms are usually spelled correctly on top
|
||||
* of this this also improves the suggest performance.
|
||||
*/
|
||||
public DirectCandidateGenerator maxTermFreq(float maxTermFreq) {
|
||||
this.maxTermFreq = maxTermFreq;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of minimal prefix characters that must match in order
|
||||
* be a candidate suggestion. Defaults to 1. Increasing this number
|
||||
* improves suggest performance. Usually misspellings don't occur in the
|
||||
* beginning of terms.
|
||||
*/
|
||||
public DirectCandidateGenerator prefixLength(int prefixLength) {
|
||||
this.prefixLength = prefixLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The minimum length a suggest text term must have in order to be
|
||||
* corrected. Defaults to <tt>4</tt>.
|
||||
*/
|
||||
public DirectCandidateGenerator minWordLength(int minWordLength) {
|
||||
this.minWordLength = minWordLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a minimal threshold in number of documents a suggested term
|
||||
* should appear in. This can be specified as an absolute number or as a
|
||||
* relative percentage of number of documents. This can improve quality
|
||||
* by only suggesting high frequency terms. Defaults to 0f and is not
|
||||
* enabled. If a value higher than 1 is specified then the number cannot
|
||||
* be fractional.
|
||||
*/
|
||||
public DirectCandidateGenerator minDocFreq(float minDocFreq) {
|
||||
this.minDocFreq = minDocFreq;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a filter (analyzer) that is applied to each of the tokens passed to this candidate generator.
|
||||
* This filter is applied to the original token before candidates are generated.
|
||||
*/
|
||||
public DirectCandidateGenerator preFilter(String preFilter) {
|
||||
this.preFilter = preFilter;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a filter (analyzer) that is applied to each of the generated tokens
|
||||
* before they are passed to the actual phrase scorer.
|
||||
*/
|
||||
public DirectCandidateGenerator postFilter(String postFilter) {
|
||||
this.postFilter = postFilter;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
if (field != null) {
|
||||
builder.field("field", field);
|
||||
}
|
||||
if (suggestMode != null) {
|
||||
builder.field("suggest_mode", suggestMode);
|
||||
}
|
||||
if (accuracy != null) {
|
||||
builder.field("accuracy", accuracy);
|
||||
}
|
||||
if (size != null) {
|
||||
builder.field("size", size);
|
||||
}
|
||||
if (sort != null) {
|
||||
builder.field("sort", sort);
|
||||
}
|
||||
if (stringDistance != null) {
|
||||
builder.field("string_distance", stringDistance);
|
||||
}
|
||||
if (maxEdits != null) {
|
||||
builder.field("max_edits", maxEdits);
|
||||
}
|
||||
if (maxInspections != null) {
|
||||
builder.field("max_inspections", maxInspections);
|
||||
}
|
||||
if (maxTermFreq != null) {
|
||||
builder.field("max_term_freq", maxTermFreq);
|
||||
}
|
||||
if (prefixLength != null) {
|
||||
builder.field("prefix_length", prefixLength);
|
||||
}
|
||||
if (minWordLength != null) {
|
||||
builder.field("min_word_length", minWordLength);
|
||||
}
|
||||
if (minDocFreq != null) {
|
||||
builder.field("min_doc_freq", minDocFreq);
|
||||
}
|
||||
if (preFilter != null) {
|
||||
builder.field("pre_filter", preFilter);
|
||||
}
|
||||
if (postFilter != null) {
|
||||
builder.field("post_filter", postFilter);
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,323 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.mapper.ContentPath;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.Mapper;
|
||||
import org.elasticsearch.index.mapper.MapperBuilders;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.core.StringFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.StringFieldMapper.StringFieldType;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.indices.IndicesModule;
|
||||
import org.elasticsearch.indices.query.IndicesQueriesRegistry;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.IndexSettingsModule;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class DirectCandidateGeneratorTests extends ESTestCase{
|
||||
|
||||
private static final int NUMBER_OF_RUNS = 20;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Test serialization and deserialization of the generator
|
||||
*/
|
||||
public void testSerialization() throws IOException {
|
||||
for (int runs = 0; runs < NUMBER_OF_RUNS; runs++) {
|
||||
DirectCandidateGeneratorBuilder original = randomCandidateGenerator();
|
||||
DirectCandidateGeneratorBuilder deserialized = serializedCopy(original);
|
||||
assertEquals(deserialized, original);
|
||||
assertEquals(deserialized.hashCode(), original.hashCode());
|
||||
assertNotSame(deserialized, original);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test equality and hashCode properties
|
||||
*/
|
||||
public void testEqualsAndHashcode() throws IOException {
|
||||
for (int runs = 0; runs < NUMBER_OF_RUNS; runs++) {
|
||||
DirectCandidateGeneratorBuilder first = randomCandidateGenerator();
|
||||
assertFalse("generator is equal to null", first.equals(null));
|
||||
assertFalse("generator is equal to incompatible type", first.equals(""));
|
||||
assertTrue("generator is not equal to self", first.equals(first));
|
||||
assertThat("same generator's hashcode returns different values if called multiple times", first.hashCode(),
|
||||
equalTo(first.hashCode()));
|
||||
|
||||
DirectCandidateGeneratorBuilder second = serializedCopy(first);
|
||||
assertTrue("generator is not equal to self", second.equals(second));
|
||||
assertTrue("generator is not equal to its copy", first.equals(second));
|
||||
assertTrue("equals is not symmetric", second.equals(first));
|
||||
assertThat("generator copy's hashcode is different from original hashcode", second.hashCode(), equalTo(first.hashCode()));
|
||||
|
||||
DirectCandidateGeneratorBuilder third = serializedCopy(second);
|
||||
assertTrue("generator is not equal to self", third.equals(third));
|
||||
assertTrue("generator is not equal to its copy", second.equals(third));
|
||||
assertThat("generator copy's hashcode is different from original hashcode", second.hashCode(), equalTo(third.hashCode()));
|
||||
assertTrue("equals is not transitive", first.equals(third));
|
||||
assertThat("generator copy's hashcode is different from original hashcode", first.hashCode(), equalTo(third.hashCode()));
|
||||
assertTrue("equals is not symmetric", third.equals(second));
|
||||
assertTrue("equals is not symmetric", third.equals(first));
|
||||
|
||||
// test for non-equality, check that all fields are covered by changing one by one
|
||||
first = new DirectCandidateGeneratorBuilder("aaa");
|
||||
assertEquals(first, serializedCopy(first));
|
||||
second = new DirectCandidateGeneratorBuilder("bbb");
|
||||
assertNotEquals(first, second);
|
||||
assertNotEquals(first.accuracy(0.1f), serializedCopy(first).accuracy(0.2f));
|
||||
assertNotEquals(first.maxEdits(1), serializedCopy(first).maxEdits(2));
|
||||
assertNotEquals(first.maxInspections(1), serializedCopy(first).maxInspections(2));
|
||||
assertNotEquals(first.maxTermFreq(0.1f), serializedCopy(first).maxTermFreq(0.2f));
|
||||
assertNotEquals(first.minDocFreq(0.1f), serializedCopy(first).minDocFreq(0.2f));
|
||||
assertNotEquals(first.minWordLength(1), serializedCopy(first).minWordLength(2));
|
||||
assertNotEquals(first.postFilter("postFilter"), serializedCopy(first).postFilter("postFilter_other"));
|
||||
assertNotEquals(first.preFilter("preFilter"), serializedCopy(first).preFilter("preFilter_other"));
|
||||
assertNotEquals(first.prefixLength(1), serializedCopy(first).prefixLength(2));
|
||||
assertNotEquals(first.size(1), serializedCopy(first).size(2));
|
||||
assertNotEquals(first.sort("score"), serializedCopy(first).sort("frequency"));
|
||||
assertNotEquals(first.stringDistance("levenstein"), serializedCopy(first).sort("ngram"));
|
||||
assertNotEquals(first.suggestMode("missing"), serializedCopy(first).suggestMode("always"));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* creates random candidate generator, renders it to xContent and back to new instance that should be equal to original
|
||||
*/
|
||||
public void testFromXContent() throws IOException {
|
||||
QueryParseContext context = new QueryParseContext(new IndicesQueriesRegistry(Settings.EMPTY, Collections.emptyMap()));
|
||||
context.parseFieldMatcher(new ParseFieldMatcher(Settings.EMPTY));
|
||||
for (int runs = 0; runs < NUMBER_OF_RUNS; runs++) {
|
||||
DirectCandidateGeneratorBuilder generator = randomCandidateGenerator();
|
||||
XContentBuilder builder = XContentFactory.contentBuilder(randomFrom(XContentType.values()));
|
||||
if (randomBoolean()) {
|
||||
builder.prettyPrint();
|
||||
}
|
||||
generator.toXContent(builder, ToXContent.EMPTY_PARAMS);
|
||||
|
||||
XContentParser parser = XContentHelper.createParser(builder.bytes());
|
||||
context.reset(parser);
|
||||
parser.nextToken();
|
||||
DirectCandidateGeneratorBuilder secondGenerator = DirectCandidateGeneratorBuilder.PROTOTYPE
|
||||
.fromXContent(context);
|
||||
assertNotSame(generator, secondGenerator);
|
||||
assertEquals(generator, secondGenerator);
|
||||
assertEquals(generator.hashCode(), secondGenerator.hashCode());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* test that build() outputs a {@link DirectCandidateGenerator} that is similar to the one
|
||||
* we would get when parsing the xContent the test generator is rendering out
|
||||
*/
|
||||
public void testBuild() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings(randomAsciiOfLengthBetween(1, 10), Settings.EMPTY);
|
||||
|
||||
AnalysisService mockAnalysisService = new AnalysisService(idxSettings, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap()) {
|
||||
@Override
|
||||
public NamedAnalyzer analyzer(String name) {
|
||||
return new NamedAnalyzer(name, new WhitespaceAnalyzer());
|
||||
}
|
||||
};
|
||||
|
||||
MapperService mockMapperService = new MapperService(idxSettings, mockAnalysisService , null, new IndicesModule().getMapperRegistry(), null) {
|
||||
@Override
|
||||
public MappedFieldType fullName(String fullName) {
|
||||
return new StringFieldType();
|
||||
}
|
||||
};
|
||||
|
||||
QueryShardContext mockShardContext = new QueryShardContext(idxSettings, null, null, null, mockMapperService, null, null, null) {
|
||||
@Override
|
||||
public MappedFieldType fieldMapper(String name) {
|
||||
StringFieldMapper.Builder builder = MapperBuilders.stringField(name);
|
||||
return builder.build(new Mapper.BuilderContext(idxSettings.getSettings(), new ContentPath(1))).fieldType();
|
||||
}
|
||||
};
|
||||
mockShardContext.setMapUnmappedFieldAsString(true);
|
||||
|
||||
for (int runs = 0; runs < NUMBER_OF_RUNS; runs++) {
|
||||
DirectCandidateGeneratorBuilder generator = randomCandidateGenerator();
|
||||
// first, build via DirectCandidateGenerator#build()
|
||||
DirectCandidateGenerator contextGenerator = generator.build(mockShardContext);
|
||||
|
||||
// second, render random test generator to xContent and parse using
|
||||
// PhraseSuggestParser
|
||||
XContentBuilder builder = XContentFactory.contentBuilder(randomFrom(XContentType.values()));
|
||||
if (randomBoolean()) {
|
||||
builder.prettyPrint();
|
||||
}
|
||||
generator.toXContent(builder, ToXContent.EMPTY_PARAMS);
|
||||
XContentParser parser = XContentHelper.createParser(builder.bytes());
|
||||
|
||||
DirectCandidateGenerator secondGenerator = PhraseSuggestParser.parseCandidateGenerator(parser,
|
||||
mockShardContext.getMapperService(), mockShardContext.parseFieldMatcher());
|
||||
|
||||
// compare their properties
|
||||
assertNotSame(contextGenerator, secondGenerator);
|
||||
assertEquals(contextGenerator.field(), secondGenerator.field());
|
||||
assertEquals(contextGenerator.accuracy(), secondGenerator.accuracy(), Float.MIN_VALUE);
|
||||
assertEquals(contextGenerator.maxTermFreq(), secondGenerator.maxTermFreq(), Float.MIN_VALUE);
|
||||
assertEquals(contextGenerator.maxEdits(), secondGenerator.maxEdits());
|
||||
assertEquals(contextGenerator.maxInspections(), secondGenerator.maxInspections());
|
||||
assertEquals(contextGenerator.minDocFreq(), secondGenerator.minDocFreq(), Float.MIN_VALUE);
|
||||
assertEquals(contextGenerator.minWordLength(), secondGenerator.minWordLength());
|
||||
assertEquals(contextGenerator.postFilter(), secondGenerator.postFilter());
|
||||
assertEquals(contextGenerator.prefixLength(), secondGenerator.prefixLength());
|
||||
assertEquals(contextGenerator.preFilter(), secondGenerator.preFilter());
|
||||
assertEquals(contextGenerator.sort(), secondGenerator.sort());
|
||||
assertEquals(contextGenerator.size(), secondGenerator.size());
|
||||
// some instances of StringDistance don't support equals, just checking the class here
|
||||
assertEquals(contextGenerator.stringDistance().getClass(), secondGenerator.stringDistance().getClass());
|
||||
assertEquals(contextGenerator.suggestMode(), secondGenerator.suggestMode());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* test that bad xContent throws exception
|
||||
*/
|
||||
public void testIllegalXContent() throws IOException {
|
||||
QueryParseContext context = new QueryParseContext(new IndicesQueriesRegistry(Settings.EMPTY, Collections.emptyMap()));
|
||||
context.parseFieldMatcher(new ParseFieldMatcher(Settings.EMPTY));
|
||||
|
||||
// test missing fieldname
|
||||
String directGenerator = "{ }";
|
||||
XContentParser parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
|
||||
|
||||
context.reset(parser);
|
||||
try {
|
||||
DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
|
||||
fail("expected an exception");
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals("[direct_generator] expects exactly one field parameter, but found []", e.getMessage());
|
||||
}
|
||||
|
||||
// test two fieldnames
|
||||
directGenerator = "{ \"field\" : \"f1\", \"field\" : \"f2\" }";
|
||||
parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
|
||||
|
||||
context.reset(parser);
|
||||
try {
|
||||
DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
|
||||
fail("expected an exception");
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals("[direct_generator] expects exactly one field parameter, but found [f2, f1]", e.getMessage());
|
||||
}
|
||||
|
||||
// test unknown field
|
||||
directGenerator = "{ \"unknown_param\" : \"f1\" }";
|
||||
parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
|
||||
|
||||
context.reset(parser);
|
||||
try {
|
||||
DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
|
||||
fail("expected an exception");
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals("[direct_generator] unknown field [unknown_param], parser not found", e.getMessage());
|
||||
}
|
||||
|
||||
// test bad value for field (e.g. size expects an int)
|
||||
directGenerator = "{ \"size\" : \"xxl\" }";
|
||||
parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
|
||||
|
||||
context.reset(parser);
|
||||
try {
|
||||
DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
|
||||
fail("expected an exception");
|
||||
} catch (ParsingException e) {
|
||||
assertEquals("[direct_generator] failed to parse field [size]", e.getMessage());
|
||||
}
|
||||
|
||||
// test unexpected token
|
||||
directGenerator = "{ \"size\" : [ \"xxl\" ] }";
|
||||
parser = XContentFactory.xContent(directGenerator).createParser(directGenerator);
|
||||
|
||||
context.reset(parser);
|
||||
try {
|
||||
DirectCandidateGeneratorBuilder.PROTOTYPE.fromXContent(context);
|
||||
fail("expected an exception");
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals("[direct_generator] size doesn't support values of type: START_ARRAY", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create random {@link DirectCandidateGeneratorBuilder}
|
||||
*/
|
||||
public static DirectCandidateGeneratorBuilder randomCandidateGenerator() {
|
||||
DirectCandidateGeneratorBuilder generator = new DirectCandidateGeneratorBuilder(randomAsciiOfLength(10));
|
||||
maybeSet(generator::accuracy, randomFloat());
|
||||
maybeSet(generator::maxEdits, randomIntBetween(1, 2));
|
||||
maybeSet(generator::maxInspections, randomIntBetween(1, 20));
|
||||
maybeSet(generator::maxTermFreq, randomFloat());
|
||||
maybeSet(generator::minDocFreq, randomFloat());
|
||||
maybeSet(generator::minWordLength, randomIntBetween(1, 20));
|
||||
maybeSet(generator::prefixLength, randomIntBetween(1, 20));
|
||||
maybeSet(generator::preFilter, randomAsciiOfLengthBetween(1, 20));
|
||||
maybeSet(generator::postFilter, randomAsciiOfLengthBetween(1, 20));
|
||||
maybeSet(generator::size, randomIntBetween(1, 20));
|
||||
maybeSet(generator::sort, randomFrom(Arrays.asList(new String[]{ "score", "frequency" })));
|
||||
maybeSet(generator::stringDistance, randomFrom(Arrays.asList(new String[]{ "internal", "damerau_levenshtein", "levenstein", "jarowinkler", "ngram"})));
|
||||
maybeSet(generator::suggestMode, randomFrom(Arrays.asList(new String[]{ "missing", "popular", "always"})));
|
||||
return generator;
|
||||
}
|
||||
|
||||
private static <T> void maybeSet(Consumer<T> consumer, T value) {
|
||||
if (randomBoolean()) {
|
||||
consumer.accept(value);
|
||||
}
|
||||
}
|
||||
|
||||
private static DirectCandidateGeneratorBuilder serializedCopy(DirectCandidateGeneratorBuilder original) throws IOException {
|
||||
try (BytesStreamOutput output = new BytesStreamOutput()) {
|
||||
original.writeTo(output);
|
||||
try (StreamInput in = StreamInput.wrap(output.bytes())) {
|
||||
return DirectCandidateGeneratorBuilder.PROTOTYPE.readFrom(in);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -578,6 +578,10 @@ to index a document only if it doesn't already exist.
|
|||
|
||||
`RecoreBuilder.Rescorer` was merged with `RescoreBuilder`, which now is an abstract superclass. QueryRescoreBuilder currently is its only implementation.
|
||||
|
||||
==== PhraseSuggestionBuilder
|
||||
|
||||
The inner DirectCandidateGenerator class has been moved out to its own class called DirectCandidateGeneratorBuilder.
|
||||
|
||||
[[breaking_30_cache_concurrency]]
|
||||
=== Cache concurrency level settings removed
|
||||
|
||||
|
|
|
@ -20,43 +20,6 @@
|
|||
package org.elasticsearch.messy.tests;
|
||||
|
||||
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.search.ReduceSearchPhaseException;
|
||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.action.search.ShardSearchFailure;
|
||||
import org.elasticsearch.action.suggest.SuggestRequestBuilder;
|
||||
import org.elasticsearch.action.suggest.SuggestResponse;
|
||||
import org.elasticsearch.common.io.PathUtils;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.script.mustache.MustachePlugin;
|
||||
import org.elasticsearch.search.suggest.Suggest;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilder.SuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.DirectCandidateGenerator;
|
||||
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||
import static org.elasticsearch.common.settings.Settings.settingsBuilder;
|
||||
|
@ -76,6 +39,43 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.search.ReduceSearchPhaseException;
|
||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.action.search.ShardSearchFailure;
|
||||
import org.elasticsearch.action.suggest.SuggestRequestBuilder;
|
||||
import org.elasticsearch.action.suggest.SuggestResponse;
|
||||
import org.elasticsearch.common.io.PathUtils;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.script.mustache.MustachePlugin;
|
||||
import org.elasticsearch.search.suggest.Suggest;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilder.SuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
|
||||
|
||||
/**
|
||||
* Integration tests for term and phrase suggestions. Many of these tests many requests that vary only slightly from one another. Where
|
||||
* possible these tests should declare for the first request, make the request, modify the configuration for the next request, make that
|
||||
|
@ -213,7 +213,7 @@ public class SuggestSearchTests extends ESIntegTestCase {
|
|||
index("test", "type1", "3", "name", "I like ice cream.");
|
||||
refresh();
|
||||
|
||||
DirectCandidateGenerator generator = candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2);
|
||||
DirectCandidateGeneratorBuilder generator = candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2);
|
||||
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("did_you_mean").field("name.shingled")
|
||||
.addCandidateGenerator(generator)
|
||||
.gramSize(3);
|
||||
|
|
Loading…
Reference in New Issue