Merge pull request #19914 from javanna/enhancement/suggestutils_cleanup

Move SuggestUtils methods to their respective caller classes
This commit is contained in:
Luca Cavanna 2016-08-10 16:01:28 +02:00 committed by GitHub
commit 05d0402fae
3 changed files with 72 additions and 74 deletions

View File

@ -24,12 +24,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
import org.apache.lucene.search.spell.NGramDistance;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.search.spell.SuggestWordQueue;
@ -40,25 +34,19 @@ import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.FastCharArrayReader;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import java.io.IOException;
import java.util.Comparator;
import java.util.Locale;
public final class SuggestUtils {
public static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
public static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;
private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;
private SuggestUtils() {
// utils!!
}
public static DirectSpellChecker getDirectSpellChecker(DirectSpellcheckerSettings suggestion) {
DirectSpellChecker directSpellChecker = new DirectSpellChecker();
directSpellChecker.setAccuracy(suggestion.accuracy());
Comparator<SuggestWord> comparator;
@ -142,6 +130,7 @@ public final class SuggestUtils {
numTokens++;
}
consumer.end();
success = true;
} finally {
if (success) {
stream.close();
@ -152,37 +141,6 @@ public final class SuggestUtils {
return numTokens;
}
public static SuggestMode resolveSuggestMode(String suggestMode) {
suggestMode = suggestMode.toLowerCase(Locale.US);
if ("missing".equals(suggestMode)) {
return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
} else if ("popular".equals(suggestMode)) {
return SuggestMode.SUGGEST_MORE_POPULAR;
} else if ("always".equals(suggestMode)) {
return SuggestMode.SUGGEST_ALWAYS;
} else {
throw new IllegalArgumentException("Illegal suggest mode " + suggestMode);
}
}
public static StringDistance resolveDistance(String distanceVal) {
distanceVal = distanceVal.toLowerCase(Locale.US);
if ("internal".equals(distanceVal)) {
return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
} else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
return new LuceneLevenshteinDistance();
} else if ("levenstein".equals(distanceVal)) {
return new LevensteinDistance();
//TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein
} else if ("jarowinkler".equals(distanceVal)) {
return new JaroWinklerDistance();
} else if ("ngram".equals(distanceVal)) {
return new NGramDistance();
} else {
throw new IllegalArgumentException("Illegal distance option " + distanceVal);
}
}
public static class Fields {
public static final ParseField STRING_DISTANCE = new ParseField("string_distance");
public static final ParseField SUGGEST_MODE = new ParseField("suggest_mode");
@ -201,22 +159,4 @@ public final class SuggestUtils {
public static final ParseField SORT = new ParseField("sort");
public static final ParseField ACCURACY = new ParseField("accuracy");
}
public static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
if (analyzer instanceof NamedAnalyzer) {
analyzer = ((NamedAnalyzer)analyzer).analyzer();
}
if (analyzer instanceof CustomAnalyzer) {
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
final TokenFilterFactory[] tokenFilters = a.tokenFilters();
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
return ((ShingleTokenFilterFactory)tokenFilterFactory).getInnerFactory();
} else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
}
}
}
return null;
}
}

View File

@ -19,6 +19,13 @@
package org.elasticsearch.search.suggest.phrase;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
import org.apache.lucene.search.spell.NGramDistance;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.ParseField;
@ -31,11 +38,11 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.search.suggest.SortBy;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.CandidateGenerator;
import java.io.IOException;
import java.util.HashSet;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import java.util.function.Consumer;
@ -401,13 +408,13 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
}
transferIfNotNull(this.accuracy, generator::accuracy);
if (this.suggestMode != null) {
generator.suggestMode(SuggestUtils.resolveSuggestMode(this.suggestMode));
generator.suggestMode(resolveSuggestMode(this.suggestMode));
}
if (this.sort != null) {
generator.sort(SortBy.resolve(this.sort));
}
if (this.stringDistance != null) {
generator.stringDistance(SuggestUtils.resolveDistance(this.stringDistance));
generator.stringDistance(resolveDistance(this.stringDistance));
}
transferIfNotNull(this.maxEdits, generator::maxEdits);
if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
@ -421,11 +428,42 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
return generator;
}
private static <T> void transferIfNotNull(T value, Consumer<T> consumer) {
if (value != null) {
consumer.accept(value);
}
}
private static SuggestMode resolveSuggestMode(String suggestMode) {
suggestMode = suggestMode.toLowerCase(Locale.US);
if ("missing".equals(suggestMode)) {
return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
} else if ("popular".equals(suggestMode)) {
return SuggestMode.SUGGEST_MORE_POPULAR;
} else if ("always".equals(suggestMode)) {
return SuggestMode.SUGGEST_ALWAYS;
} else {
throw new IllegalArgumentException("Illegal suggest mode " + suggestMode);
}
}
private static StringDistance resolveDistance(String distanceVal) {
distanceVal = distanceVal.toLowerCase(Locale.US);
if ("internal".equals(distanceVal)) {
return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
} else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
return new LuceneLevenshteinDistance();
} else if ("levenstein".equals(distanceVal)) {
return new LevensteinDistance();
//TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein
} else if ("jarowinkler".equals(distanceVal)) {
return new JaroWinklerDistance();
} else if ("ngram".equals(distanceVal)) {
return new NGramDistance();
} else {
throw new IllegalArgumentException("Illegal distance option " + distanceVal);
}
}
private static <T> void transferIfNotNull(T value, Consumer<T> consumer) {
if (value != null) {
consumer.accept(value);
}
}
@Override
public String toString() {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.suggest.phrase;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;
@ -31,7 +32,10 @@ import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.query.QueryShardContext;
@ -65,7 +69,6 @@ public class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSuggestionB
protected static final ParseField RWE_LIKELIHOOD_FIELD = new ParseField("real_word_error_likelihood");
protected static final ParseField SEPARATOR_FIELD = new ParseField("separator");
protected static final ParseField CONFIDENCE_FIELD = new ParseField("confidence");
protected static final ParseField GENERATORS_FIELD = new ParseField("shard_size");
protected static final ParseField GRAMSIZE_FIELD = new ParseField("gram_size");
protected static final ParseField SMOOTHING_MODEL_FIELD = new ParseField("smoothing");
protected static final ParseField FORCE_UNIGRAM_FIELD = new ParseField("force_unigrams");
@ -641,8 +644,7 @@ public class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSuggestionB
}
if (this.gramSize == null || suggestionContext.generators().isEmpty()) {
final ShingleTokenFilterFactory.Factory shingleFilterFactory = SuggestUtils
.getShingleFilterFactory(suggestionContext.getAnalyzer());
final ShingleTokenFilterFactory.Factory shingleFilterFactory = getShingleFilterFactory(suggestionContext.getAnalyzer());
if (this.gramSize == null) {
// try to detect the shingle size
if (shingleFilterFactory != null) {
@ -670,6 +672,24 @@ public class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSuggestionB
return suggestionContext;
}
private static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
if (analyzer instanceof NamedAnalyzer) {
analyzer = ((NamedAnalyzer)analyzer).analyzer();
}
if (analyzer instanceof CustomAnalyzer) {
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
final TokenFilterFactory[] tokenFilters = a.tokenFilters();
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
return ((ShingleTokenFilterFactory)tokenFilterFactory).getInnerFactory();
} else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
}
}
}
return null;
}
private static void ensureNoSmoothing(PhraseSuggestionBuilder suggestion) {
if (suggestion.smoothingModel() != null) {
throw new IllegalArgumentException("only one smoothing model supported");