Support fuzzy queries in CompletionSuggest
Added the FuzzySuggester in order to support completion queries The following options have been added for the fuxxy suggester * edit_distance: Maximum edit distance * transpositions: Sets if transpositions should be counted as one or two changes * min_prefix_len: Minimum length of the input before fuzzy suggestions are returned * non_prefix_len: Minimum length of the input, which is not checked for fuzzy alternatives Closes #3465
This commit is contained in:
parent
a7b643305a
commit
f58f165522
|
@ -0,0 +1,226 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.suggest.analyzing;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.automaton.*;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Implements a fuzzy {@link AnalyzingSuggester}. The similarity measurement is
|
||||
* based on the Damerau-Levenshtein (optimal string alignment) algorithm, though
|
||||
* you can explicitly choose classic Levenshtein by passing <code>false</code>
|
||||
* for the <code>transpositions</code> parameter.
|
||||
* <p>
|
||||
* At most, this query will match terms up to
|
||||
* {@value org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}
|
||||
* edits. Higher distances are not supported. Note that the
|
||||
* fuzzy distance is measured in "byte space" on the bytes
|
||||
* returned by the {@link org.apache.lucene.analysis.TokenStream}'s {@link
|
||||
* org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute}, usually UTF8. By default
|
||||
* the analyzed bytes must be at least 3 {@link
|
||||
* #DEFAULT_MIN_FUZZY_LENGTH} bytes before any edits are
|
||||
* considered. Furthermore, the first 1 {@link
|
||||
* #DEFAULT_NON_FUZZY_PREFIX} byte is not allowed to be
|
||||
* edited. We allow up to 1 (@link
|
||||
* #DEFAULT_MAX_EDITS} edit.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: This suggester does not boost suggestions that
|
||||
* required no edits over suggestions that did require
|
||||
* edits. This is a known limitation.
|
||||
*
|
||||
* <p>
|
||||
* Note: complex query analyzers can have a significant impact on the lookup
|
||||
* performance. It's recommended to not use analyzers that drop or inject terms
|
||||
* like synonyms to keep the complexity of the prefix intersection low for good
|
||||
* lookup performance. At index time, complex analyzers can safely be used.
|
||||
* </p>
|
||||
*/
|
||||
public final class XFuzzySuggester extends XAnalyzingSuggester {
|
||||
private final int maxEdits;
|
||||
private final boolean transpositions;
|
||||
private final int nonFuzzyPrefix;
|
||||
private final int minFuzzyLength;
|
||||
|
||||
/**
|
||||
* The default minimum length of the key passed to {@link
|
||||
* #lookup} before any edits are allowed.
|
||||
*/
|
||||
public static final int DEFAULT_MIN_FUZZY_LENGTH = 3;
|
||||
|
||||
/**
|
||||
* The default prefix length where edits are not allowed.
|
||||
*/
|
||||
public static final int DEFAULT_NON_FUZZY_PREFIX = 1;
|
||||
|
||||
/**
|
||||
* The default maximum number of edits for fuzzy
|
||||
* suggestions.
|
||||
*/
|
||||
public static final int DEFAULT_MAX_EDITS = 1;
|
||||
|
||||
/**
|
||||
* The default transposition value passed to {@link org.apache.lucene.util.automaton.LevenshteinAutomata}
|
||||
*/
|
||||
public static final boolean DEFAULT_TRANSPOSITIONS = true;
|
||||
|
||||
/**
|
||||
* Creates a {@link FuzzySuggester} instance initialized with default values.
|
||||
*
|
||||
* @param analyzer the analyzer used for this suggester
|
||||
*/
|
||||
public XFuzzySuggester(Analyzer analyzer) {
|
||||
this(analyzer, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link FuzzySuggester} instance with an index & a query analyzer initialized with default values.
|
||||
*
|
||||
* @param indexAnalyzer
|
||||
* Analyzer that will be used for analyzing suggestions while building the index.
|
||||
* @param queryAnalyzer
|
||||
* Analyzer that will be used for analyzing query text during lookup
|
||||
*/
|
||||
public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
|
||||
this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
|
||||
DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, null, false, 0);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link FuzzySuggester} instance.
|
||||
*
|
||||
* @param indexAnalyzer Analyzer that will be used for
|
||||
* analyzing suggestions while building the index.
|
||||
* @param queryAnalyzer Analyzer that will be used for
|
||||
* analyzing query text during lookup
|
||||
* @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
|
||||
* @param maxSurfaceFormsPerAnalyzedForm Maximum number of
|
||||
* surface forms to keep for a single analyzed form.
|
||||
* When there are too many surface forms we discard the
|
||||
* lowest weighted ones.
|
||||
* @param maxGraphExpansions Maximum number of graph paths
|
||||
* to expand from the analyzed form. Set this to -1 for
|
||||
* no limit.
|
||||
* @param maxEdits must be >= 0 and <= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
|
||||
* @param transpositions <code>true</code> if transpositions should be treated as a primitive
|
||||
* edit operation. If this is false, comparisons will implement the classic
|
||||
* Levenshtein algorithm.
|
||||
* @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
|
||||
* @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
|
||||
*/
|
||||
public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
|
||||
int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength,
|
||||
FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput) {
|
||||
super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, fst, hasPayloads, maxAnalyzedPathsForOneInput);
|
||||
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
|
||||
}
|
||||
if (nonFuzzyPrefix < 0) {
|
||||
throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
|
||||
}
|
||||
if (minFuzzyLength < 0) {
|
||||
throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
|
||||
}
|
||||
|
||||
this.maxEdits = maxEdits;
|
||||
this.transpositions = transpositions;
|
||||
this.nonFuzzyPrefix = nonFuzzyPrefix;
|
||||
this.minFuzzyLength = minFuzzyLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths,
|
||||
Automaton lookupAutomaton,
|
||||
FST<PairOutputs.Pair<Long,BytesRef>> fst)
|
||||
throws IOException {
|
||||
|
||||
// TODO: right now there's no penalty for fuzzy/edits,
|
||||
// ie a completion whose prefix matched exactly what the
|
||||
// user typed gets no boost over completions that
|
||||
// required an edit, which get no boost over completions
|
||||
// requiring two edits. I suspect a multiplicative
|
||||
// factor is appropriate (eg, say a fuzzy match must be at
|
||||
// least 2X better weight than the non-fuzzy match to
|
||||
// "compete") ... in which case I think the wFST needs
|
||||
// to be log weights or something ...
|
||||
|
||||
Automaton levA = toLevenshteinAutomata(lookupAutomaton);
|
||||
/*
|
||||
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
|
||||
w.write(levA.toDot());
|
||||
w.close();
|
||||
System.out.println("Wrote LevA to out.dot");
|
||||
*/
|
||||
return FSTUtil.intersectPrefixPaths(levA, fst);
|
||||
}
|
||||
|
||||
Automaton toLevenshteinAutomata(Automaton automaton) {
|
||||
final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
|
||||
Automaton subs[] = new Automaton[ref.size()];
|
||||
int upto = 0;
|
||||
for (IntsRef path : ref) {
|
||||
if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
|
||||
subs[upto] = BasicAutomata.makeString(path.ints, path.offset, path.length);
|
||||
upto++;
|
||||
} else {
|
||||
Automaton prefix = BasicAutomata.makeString(path.ints, path.offset, nonFuzzyPrefix);
|
||||
int ints[] = new int[path.length-nonFuzzyPrefix];
|
||||
System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length);
|
||||
// TODO: maybe add alphaMin to LevenshteinAutomata,
|
||||
// and pass 1 instead of 0? We probably don't want
|
||||
// to allow the trailing dedup bytes to be
|
||||
// edited... but then 0 byte is "in general" allowed
|
||||
// on input (but not in UTF8).
|
||||
LevenshteinAutomata lev = new LevenshteinAutomata(ints, 255, transpositions);
|
||||
Automaton levAutomaton = lev.toAutomaton(maxEdits);
|
||||
Automaton combined = BasicOperations.concatenate(Arrays.asList(prefix, levAutomaton));
|
||||
combined.setDeterministic(true); // its like the special case in concatenate itself, except we cloneExpanded already
|
||||
subs[upto] = combined;
|
||||
upto++;
|
||||
}
|
||||
}
|
||||
|
||||
if (subs.length == 0) {
|
||||
// automaton is empty, there is no accepted paths through it
|
||||
return BasicAutomata.makeEmpty(); // matches nothing
|
||||
} else if (subs.length == 1) {
|
||||
// no synonyms or anything: just a single path through the tokenstream
|
||||
return subs[0];
|
||||
} else {
|
||||
// multiple paths: this is really scary! is it slow?
|
||||
// maybe we should not do this and throw UOE?
|
||||
Automaton a = BasicOperations.union(Arrays.asList(subs));
|
||||
// TODO: we could call toLevenshteinAutomata() before det?
|
||||
// this only happens if you have multiple paths anyway (e.g. synonyms)
|
||||
BasicOperations.determinize(a);
|
||||
|
||||
return a;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.codecs.*;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
|
||||
import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -212,19 +213,28 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
|||
}
|
||||
return new LookupFactory() {
|
||||
@Override
|
||||
public Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst) {
|
||||
public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
|
||||
AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().fullName());
|
||||
if (analyzingSuggestHolder == null) {
|
||||
return null;
|
||||
}
|
||||
int flags = exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
|
||||
if (analyzingSuggestHolder.preserveSep) {
|
||||
flags |= XAnalyzingSuggester.PRESERVE_SEP;
|
||||
}
|
||||
XAnalyzingSuggester suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
|
||||
int flags = analyzingSuggestHolder.preserveSep? XAnalyzingSuggester.PRESERVE_SEP : 0;
|
||||
|
||||
XAnalyzingSuggester suggester;
|
||||
if (suggestionContext.isFuzzy()) {
|
||||
suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
|
||||
analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
|
||||
suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(),
|
||||
suggestionContext.getFuzzyNonPrefixLength(), suggestionContext.getFuzzyMinPrefixLength(),
|
||||
analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput);
|
||||
|
||||
} else {
|
||||
suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
|
||||
analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
|
||||
analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput);
|
||||
}
|
||||
suggester.setPreservePositionIncrements(analyzingSuggestHolder.preservePositionIncrements);
|
||||
return suggester;
|
||||
}
|
||||
|
@ -240,8 +250,8 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
|||
final int maxAnalyzedPathsForOneInput;
|
||||
final FST<Pair<Long, BytesRef>> fst;
|
||||
|
||||
public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads,
|
||||
int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) {
|
||||
public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
|
||||
boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) {
|
||||
this.preserveSep = preserveSep;
|
||||
this.preservePositionIncrements = preservePositionIncrements;
|
||||
this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
|
||||
|
|
|
@ -271,8 +271,8 @@ public class Completion090PostingsFormat extends PostingsFormat {
|
|||
this.lookup = lookup;
|
||||
}
|
||||
|
||||
public Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst) {
|
||||
return lookup.getLookup(mapper, exactFirst);
|
||||
public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
|
||||
return lookup.getLookup(mapper, suggestionContext);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -327,6 +327,6 @@ public class Completion090PostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
public static abstract class LookupFactory {
|
||||
public abstract Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst);
|
||||
public abstract Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,12 +48,35 @@ public class CompletionSuggestParser implements SuggestContextParser {
|
|||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
fieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
parseSuggestContext(parser, mapperService, fieldName, suggestion);
|
||||
suggestion.mapper(mapperService.smartNameFieldMapper(suggestion.getField()));
|
||||
if (!parseSuggestContext(parser, mapperService, fieldName, suggestion)) {
|
||||
if (token == XContentParser.Token.VALUE_BOOLEAN && "fuzzy".equals(fieldName)) {
|
||||
suggestion.setFuzzy(parser.booleanValue());
|
||||
}
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_OBJECT && "fuzzy".equals(fieldName)) {
|
||||
suggestion.setFuzzy(true);
|
||||
String fuzzyConfigName = null;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
fuzzyConfigName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if ("edit_distance".equals(fuzzyConfigName) || "editDistance".equals(fuzzyConfigName)) {
|
||||
suggestion.setFuzzyEditDistance(parser.intValue());
|
||||
} else if ("transpositions".equals(fuzzyConfigName)) {
|
||||
suggestion.setFuzzyTranspositions(parser.booleanValue());
|
||||
} else if ("min_prefix_len".equals(fuzzyConfigName) || "minPrefixLen".equals(fuzzyConfigName)) {
|
||||
suggestion.setFuzzyMinPrefixLength(parser.intValue());
|
||||
} else if ("non_prefix_len".equals(fuzzyConfigName) || "non_prefix_len".equals(fuzzyConfigName)) {
|
||||
suggestion.setFuzzyNonPrefixLength(parser.intValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("suggester[completion] doesn't support field [" + fieldName + "]");
|
||||
}
|
||||
}
|
||||
suggestion.mapper(mapperService.smartNameFieldMapper(suggestion.getField()));
|
||||
|
||||
return suggestion;
|
||||
}
|
||||
|
||||
|
|
|
@ -48,6 +48,7 @@ public class CompletionSuggester implements Suggester<CompletionSuggestionContex
|
|||
@Override
|
||||
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> execute(String name,
|
||||
CompletionSuggestionContext suggestionContext, IndexReader indexReader, CharsRef spare) throws IOException {
|
||||
|
||||
CompletionSuggestion completionSuggestionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
|
||||
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(suggestionContext.getText()
|
||||
.utf8ToString()), 0, suggestionContext.getText().toString().length());
|
||||
|
@ -65,7 +66,7 @@ public class CompletionSuggester implements Suggester<CompletionSuggestionContex
|
|||
Terms terms = atomicReader.fields().terms(fieldName);
|
||||
if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
|
||||
Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
|
||||
Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), false);
|
||||
Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
|
||||
List<Lookup.LookupResult> lookupResults = lookup.lookup(prefix, false, suggestionContext.getSize());
|
||||
for (Lookup.LookupResult res : lookupResults) {
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilder;
|
||||
|
@ -34,8 +35,7 @@ public class CompletionSuggestionBuilder extends SuggestBuilder.SuggestionBuilde
|
|||
}
|
||||
|
||||
@Override
|
||||
protected XContentBuilder innerToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
|
||||
protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
return builder;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.suggest.Suggester;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
||||
|
@ -28,6 +29,11 @@ import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
|||
public class CompletionSuggestionContext extends SuggestionSearchContext.SuggestionContext {
|
||||
|
||||
private FieldMapper<?> mapper;
|
||||
private int fuzzyEditDistance = XFuzzySuggester.DEFAULT_MAX_EDITS;
|
||||
private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS;
|
||||
private int fuzzyMinPrefixLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH;
|
||||
private int fuzzyNonPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX;
|
||||
private boolean fuzzy = false;
|
||||
|
||||
public CompletionSuggestionContext(Suggester suggester) {
|
||||
super(suggester);
|
||||
|
@ -40,4 +46,44 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
|
|||
public void mapper(FieldMapper<?> mapper) {
|
||||
this.mapper = mapper;
|
||||
}
|
||||
|
||||
public void setFuzzyEditDistance(int fuzzyEditDistance) {
|
||||
this.fuzzyEditDistance = fuzzyEditDistance;
|
||||
}
|
||||
|
||||
public int getFuzzyEditDistance() {
|
||||
return fuzzyEditDistance;
|
||||
}
|
||||
|
||||
public void setFuzzyTranspositions(boolean fuzzyTranspositions) {
|
||||
this.fuzzyTranspositions = fuzzyTranspositions;
|
||||
}
|
||||
|
||||
public boolean isFuzzyTranspositions() {
|
||||
return fuzzyTranspositions;
|
||||
}
|
||||
|
||||
public void setFuzzyMinPrefixLength(int fuzzyMinPrefixLength) {
|
||||
this.fuzzyMinPrefixLength = fuzzyMinPrefixLength;
|
||||
}
|
||||
|
||||
public int getFuzzyMinPrefixLength() {
|
||||
return fuzzyMinPrefixLength;
|
||||
}
|
||||
|
||||
public void setFuzzyNonPrefixLength(int fuzzyNonPrefixLength) {
|
||||
this.fuzzyNonPrefixLength = fuzzyNonPrefixLength;
|
||||
}
|
||||
|
||||
public int getFuzzyNonPrefixLength() {
|
||||
return fuzzyNonPrefixLength;
|
||||
}
|
||||
|
||||
public void setFuzzy(boolean fuzzy) {
|
||||
this.fuzzy = fuzzy;
|
||||
}
|
||||
|
||||
public boolean isFuzzy() {
|
||||
return fuzzy;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class CompletionSuggestionFuzzyBuilder extends SuggestBuilder.SuggestionBuilder<CompletionSuggestionFuzzyBuilder> {
|
||||
|
||||
public CompletionSuggestionFuzzyBuilder(String name) {
|
||||
super(name, "completion");
|
||||
}
|
||||
|
||||
private int fuzzyEditDistance = XFuzzySuggester.DEFAULT_MAX_EDITS;
|
||||
private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS;
|
||||
private int fuzzyMinPrefixLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH;
|
||||
private int fuzzyNonPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX;
|
||||
|
||||
public int getFuzzyEditDistance() {
|
||||
return fuzzyEditDistance;
|
||||
}
|
||||
|
||||
public CompletionSuggestionFuzzyBuilder setFuzzyEditDistance(int fuzzyEditDistance) {
|
||||
this.fuzzyEditDistance = fuzzyEditDistance;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean isFuzzyTranspositions() {
|
||||
return fuzzyTranspositions;
|
||||
}
|
||||
|
||||
public CompletionSuggestionFuzzyBuilder setFuzzyTranspositions(boolean fuzzyTranspositions) {
|
||||
this.fuzzyTranspositions = fuzzyTranspositions;
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getFuzzyMinPrefixLength() {
|
||||
return fuzzyMinPrefixLength;
|
||||
}
|
||||
|
||||
public CompletionSuggestionFuzzyBuilder setFuzzyMinPrefixLength(int fuzzyMinPrefixLength) {
|
||||
this.fuzzyMinPrefixLength = fuzzyMinPrefixLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getFuzzyNonPrefixLength() {
|
||||
return fuzzyNonPrefixLength;
|
||||
}
|
||||
|
||||
public CompletionSuggestionFuzzyBuilder setFuzzyNonPrefixLength(int fuzzyNonPrefixLength) {
|
||||
this.fuzzyNonPrefixLength = fuzzyNonPrefixLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected XContentBuilder innerToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
|
||||
builder.startObject("fuzzy");
|
||||
|
||||
if (fuzzyEditDistance != XFuzzySuggester.DEFAULT_MAX_EDITS) {
|
||||
builder.field("edit_distance", fuzzyEditDistance);
|
||||
}
|
||||
if (fuzzyTranspositions != XFuzzySuggester.DEFAULT_TRANSPOSITIONS) {
|
||||
builder.field("transpositions", fuzzyTranspositions);
|
||||
}
|
||||
if (fuzzyMinPrefixLength != XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH) {
|
||||
builder.field("min_prefix_len", fuzzyMinPrefixLength);
|
||||
}
|
||||
if (fuzzyNonPrefixLength != XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX) {
|
||||
builder.field("non_prefix_len", fuzzyNonPrefixLength);
|
||||
}
|
||||
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
}
|
|
@ -47,6 +47,7 @@ import org.elasticsearch.search.suggest.SuggestUtils;
|
|||
import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider;
|
||||
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat;
|
||||
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggestionContext;
|
||||
import org.elasticsearch.test.integration.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -90,7 +91,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
|||
LookupFactory load = provider.load(input);
|
||||
PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
|
||||
NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||
Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), false);
|
||||
Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), new CompletionSuggestionContext(null));
|
||||
List<LookupResult> result = lookup.lookup("ge", false, 10);
|
||||
assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters"));
|
||||
assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10"));
|
||||
|
@ -240,7 +241,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
|||
assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length));
|
||||
AtomicReaderContext atomicReaderContext = reader.leaves().get(0);
|
||||
Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name());
|
||||
Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper, false);
|
||||
Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper, new CompletionSuggestionContext(null));
|
||||
reader.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.elasticsearch.index.mapper.MapperException;
|
|||
import org.elasticsearch.search.suggest.Suggest;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder;
|
||||
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -333,6 +334,124 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
|
|||
assertSuggestions(afterReindexingResponse, "suggs", "Foo Fighters");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatFuzzySuggesterWorks() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", true, true, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Nirvana").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nirv").size(10)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo", "Nirvana");
|
||||
|
||||
suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nirw").size(10)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo", "Nirvana");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatFuzzySuggesterSupportsEditDistances() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", true, true, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Nirvana").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
// edit distance 1
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Norw").size(10)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo");
|
||||
|
||||
// edit distance 2
|
||||
suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Norw").size(10).setFuzzyEditDistance(2)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo", "Nirvana");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatFuzzySuggesterSupportsTranspositions() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", true, true, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Nirvana").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(false).setFuzzyEditDistance(1)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo");
|
||||
|
||||
suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriv").size(10).setFuzzyTranspositions(true).setFuzzyEditDistance(1)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo", "Nirvana");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatFuzzySuggesterSupportsMinPrefixLength() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", true, true, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Nirvana").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nriva").size(10).setFuzzyMinPrefixLength(6)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo");
|
||||
|
||||
suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nrivan").size(10).setFuzzyMinPrefixLength(6)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo", "Nirvana");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatFuzzySuggesterSupportsNonPrefixLength() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", true, true, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Nirvana").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nirw").size(10).setFuzzyNonPrefixLength(4)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo");
|
||||
|
||||
suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionFuzzyBuilder("foo").field(FIELD).text("Nirvo").size(10).setFuzzyNonPrefixLength(4)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, false, "foo", "Nirvana");
|
||||
}
|
||||
|
||||
|
||||
public void assertSuggestions(String suggestion, String ... suggestions) {
|
||||
String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10);
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
|
|
Loading…
Reference in New Issue