Merge pull request #19914 from javanna/enhancement/suggestutils_cleanup
Move SuggestUtils methods to their respective caller classes
This commit is contained in:
commit
05d0402fae
|
@ -24,12 +24,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.JaroWinklerDistance;
|
||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
|
||||
import org.apache.lucene.search.spell.NGramDistance;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
|
@ -40,25 +34,19 @@ import org.apache.lucene.util.CharsRefBuilder;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.FastCharArrayReader;
|
||||
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Locale;
|
||||
|
||||
public final class SuggestUtils {
|
||||
public static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
|
||||
public static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||
private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
|
||||
private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||
|
||||
private SuggestUtils() {
|
||||
// utils!!
|
||||
}
|
||||
|
||||
public static DirectSpellChecker getDirectSpellChecker(DirectSpellcheckerSettings suggestion) {
|
||||
|
||||
DirectSpellChecker directSpellChecker = new DirectSpellChecker();
|
||||
directSpellChecker.setAccuracy(suggestion.accuracy());
|
||||
Comparator<SuggestWord> comparator;
|
||||
|
@ -142,6 +130,7 @@ public final class SuggestUtils {
|
|||
numTokens++;
|
||||
}
|
||||
consumer.end();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
stream.close();
|
||||
|
@ -152,37 +141,6 @@ public final class SuggestUtils {
|
|||
return numTokens;
|
||||
}
|
||||
|
||||
public static SuggestMode resolveSuggestMode(String suggestMode) {
|
||||
suggestMode = suggestMode.toLowerCase(Locale.US);
|
||||
if ("missing".equals(suggestMode)) {
|
||||
return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
|
||||
} else if ("popular".equals(suggestMode)) {
|
||||
return SuggestMode.SUGGEST_MORE_POPULAR;
|
||||
} else if ("always".equals(suggestMode)) {
|
||||
return SuggestMode.SUGGEST_ALWAYS;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Illegal suggest mode " + suggestMode);
|
||||
}
|
||||
}
|
||||
|
||||
public static StringDistance resolveDistance(String distanceVal) {
|
||||
distanceVal = distanceVal.toLowerCase(Locale.US);
|
||||
if ("internal".equals(distanceVal)) {
|
||||
return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
|
||||
} else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
|
||||
return new LuceneLevenshteinDistance();
|
||||
} else if ("levenstein".equals(distanceVal)) {
|
||||
return new LevensteinDistance();
|
||||
//TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein
|
||||
} else if ("jarowinkler".equals(distanceVal)) {
|
||||
return new JaroWinklerDistance();
|
||||
} else if ("ngram".equals(distanceVal)) {
|
||||
return new NGramDistance();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Illegal distance option " + distanceVal);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Fields {
|
||||
public static final ParseField STRING_DISTANCE = new ParseField("string_distance");
|
||||
public static final ParseField SUGGEST_MODE = new ParseField("suggest_mode");
|
||||
|
@ -201,22 +159,4 @@ public final class SuggestUtils {
|
|||
public static final ParseField SORT = new ParseField("sort");
|
||||
public static final ParseField ACCURACY = new ParseField("accuracy");
|
||||
}
|
||||
|
||||
public static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
|
||||
if (analyzer instanceof NamedAnalyzer) {
|
||||
analyzer = ((NamedAnalyzer)analyzer).analyzer();
|
||||
}
|
||||
if (analyzer instanceof CustomAnalyzer) {
|
||||
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
|
||||
final TokenFilterFactory[] tokenFilters = a.tokenFilters();
|
||||
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
|
||||
if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
|
||||
return ((ShingleTokenFilterFactory)tokenFilterFactory).getInnerFactory();
|
||||
} else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
|
||||
return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,13 @@
|
|||
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.JaroWinklerDistance;
|
||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
|
||||
import org.apache.lucene.search.spell.NGramDistance;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
|
@ -31,11 +38,11 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
|||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
import org.elasticsearch.search.suggest.SortBy;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.CandidateGenerator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Consumer;
|
||||
|
@ -401,13 +408,13 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
|
|||
}
|
||||
transferIfNotNull(this.accuracy, generator::accuracy);
|
||||
if (this.suggestMode != null) {
|
||||
generator.suggestMode(SuggestUtils.resolveSuggestMode(this.suggestMode));
|
||||
generator.suggestMode(resolveSuggestMode(this.suggestMode));
|
||||
}
|
||||
if (this.sort != null) {
|
||||
generator.sort(SortBy.resolve(this.sort));
|
||||
}
|
||||
if (this.stringDistance != null) {
|
||||
generator.stringDistance(SuggestUtils.resolveDistance(this.stringDistance));
|
||||
generator.stringDistance(resolveDistance(this.stringDistance));
|
||||
}
|
||||
transferIfNotNull(this.maxEdits, generator::maxEdits);
|
||||
if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
|
@ -421,11 +428,42 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
|
|||
return generator;
|
||||
}
|
||||
|
||||
private static <T> void transferIfNotNull(T value, Consumer<T> consumer) {
|
||||
if (value != null) {
|
||||
consumer.accept(value);
|
||||
}
|
||||
}
|
||||
private static SuggestMode resolveSuggestMode(String suggestMode) {
|
||||
suggestMode = suggestMode.toLowerCase(Locale.US);
|
||||
if ("missing".equals(suggestMode)) {
|
||||
return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
|
||||
} else if ("popular".equals(suggestMode)) {
|
||||
return SuggestMode.SUGGEST_MORE_POPULAR;
|
||||
} else if ("always".equals(suggestMode)) {
|
||||
return SuggestMode.SUGGEST_ALWAYS;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Illegal suggest mode " + suggestMode);
|
||||
}
|
||||
}
|
||||
|
||||
private static StringDistance resolveDistance(String distanceVal) {
|
||||
distanceVal = distanceVal.toLowerCase(Locale.US);
|
||||
if ("internal".equals(distanceVal)) {
|
||||
return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
|
||||
} else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
|
||||
return new LuceneLevenshteinDistance();
|
||||
} else if ("levenstein".equals(distanceVal)) {
|
||||
return new LevensteinDistance();
|
||||
//TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein
|
||||
} else if ("jarowinkler".equals(distanceVal)) {
|
||||
return new JaroWinklerDistance();
|
||||
} else if ("ngram".equals(distanceVal)) {
|
||||
return new NGramDistance();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Illegal distance option " + distanceVal);
|
||||
}
|
||||
}
|
||||
|
||||
private static <T> void transferIfNotNull(T value, Consumer<T> consumer) {
|
||||
if (value != null) {
|
||||
consumer.accept(value);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
|
@ -31,7 +32,10 @@ import org.elasticsearch.common.xcontent.ToXContent;
|
|||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
||||
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
|
@ -65,7 +69,6 @@ public class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSuggestionB
|
|||
protected static final ParseField RWE_LIKELIHOOD_FIELD = new ParseField("real_word_error_likelihood");
|
||||
protected static final ParseField SEPARATOR_FIELD = new ParseField("separator");
|
||||
protected static final ParseField CONFIDENCE_FIELD = new ParseField("confidence");
|
||||
protected static final ParseField GENERATORS_FIELD = new ParseField("shard_size");
|
||||
protected static final ParseField GRAMSIZE_FIELD = new ParseField("gram_size");
|
||||
protected static final ParseField SMOOTHING_MODEL_FIELD = new ParseField("smoothing");
|
||||
protected static final ParseField FORCE_UNIGRAM_FIELD = new ParseField("force_unigrams");
|
||||
|
@ -641,8 +644,7 @@ public class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSuggestionB
|
|||
}
|
||||
|
||||
if (this.gramSize == null || suggestionContext.generators().isEmpty()) {
|
||||
final ShingleTokenFilterFactory.Factory shingleFilterFactory = SuggestUtils
|
||||
.getShingleFilterFactory(suggestionContext.getAnalyzer());
|
||||
final ShingleTokenFilterFactory.Factory shingleFilterFactory = getShingleFilterFactory(suggestionContext.getAnalyzer());
|
||||
if (this.gramSize == null) {
|
||||
// try to detect the shingle size
|
||||
if (shingleFilterFactory != null) {
|
||||
|
@ -670,6 +672,24 @@ public class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSuggestionB
|
|||
return suggestionContext;
|
||||
}
|
||||
|
||||
private static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
|
||||
if (analyzer instanceof NamedAnalyzer) {
|
||||
analyzer = ((NamedAnalyzer)analyzer).analyzer();
|
||||
}
|
||||
if (analyzer instanceof CustomAnalyzer) {
|
||||
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
|
||||
final TokenFilterFactory[] tokenFilters = a.tokenFilters();
|
||||
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
|
||||
if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
|
||||
return ((ShingleTokenFilterFactory)tokenFilterFactory).getInnerFactory();
|
||||
} else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
|
||||
return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static void ensureNoSmoothing(PhraseSuggestionBuilder suggestion) {
|
||||
if (suggestion.smoothingModel() != null) {
|
||||
throw new IllegalArgumentException("only one smoothing model supported");
|
||||
|
|
Loading…
Reference in New Issue