Merge pull request #19920 from cbuescher/remove-SuggestUtil
Remove SuggestUtil helper class
This commit is contained in:
commit
563bf0154c
|
@ -21,8 +21,13 @@ package org.elasticsearch.search.suggest;
|
|||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public class DirectSpellcheckerSettings {
|
||||
|
||||
// NB: If this changes, make sure to change the default in TermBuilderSuggester
|
||||
|
@ -49,6 +54,9 @@ public class DirectSpellcheckerSettings {
|
|||
private int minWordLength = DEFAULT_MIN_WORD_LENGTH;
|
||||
private float minDocFreq = DEFAULT_MIN_DOC_FREQ;
|
||||
|
||||
private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
|
||||
private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||
|
||||
public SuggestMode suggestMode() {
|
||||
return suggestMode;
|
||||
}
|
||||
|
@ -129,6 +137,33 @@ public class DirectSpellcheckerSettings {
|
|||
this.minDocFreq = minDocFreq;
|
||||
}
|
||||
|
||||
public DirectSpellChecker createDirectSpellChecker() {
|
||||
|
||||
DirectSpellChecker directSpellChecker = new DirectSpellChecker();
|
||||
directSpellChecker.setAccuracy(accuracy());
|
||||
Comparator<SuggestWord> comparator;
|
||||
switch (sort()) {
|
||||
case SCORE:
|
||||
comparator = SCORE_COMPARATOR;
|
||||
break;
|
||||
case FREQUENCY:
|
||||
comparator = LUCENE_FREQUENCY;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Illegal suggest sort: " + sort());
|
||||
}
|
||||
directSpellChecker.setComparator(comparator);
|
||||
directSpellChecker.setDistance(stringDistance());
|
||||
directSpellChecker.setMaxEdits(maxEdits());
|
||||
directSpellChecker.setMaxInspections(maxInspections());
|
||||
directSpellChecker.setMaxQueryFrequency(maxTermFreq());
|
||||
directSpellChecker.setMinPrefix(prefixLength());
|
||||
directSpellChecker.setMinQueryLength(minWordLength());
|
||||
directSpellChecker.setThresholdFrequency(minDocFreq());
|
||||
directSpellChecker.setLowerCaseTerms(false);
|
||||
return directSpellChecker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "[" +
|
||||
|
|
|
@ -1,162 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.FastCharArrayReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
public final class SuggestUtils {
|
||||
private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
|
||||
private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||
|
||||
private SuggestUtils() {
|
||||
// utils!!
|
||||
}
|
||||
|
||||
public static DirectSpellChecker getDirectSpellChecker(DirectSpellcheckerSettings suggestion) {
|
||||
DirectSpellChecker directSpellChecker = new DirectSpellChecker();
|
||||
directSpellChecker.setAccuracy(suggestion.accuracy());
|
||||
Comparator<SuggestWord> comparator;
|
||||
switch (suggestion.sort()) {
|
||||
case SCORE:
|
||||
comparator = SCORE_COMPARATOR;
|
||||
break;
|
||||
case FREQUENCY:
|
||||
comparator = LUCENE_FREQUENCY;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Illegal suggest sort: " + suggestion.sort());
|
||||
}
|
||||
directSpellChecker.setComparator(comparator);
|
||||
directSpellChecker.setDistance(suggestion.stringDistance());
|
||||
directSpellChecker.setMaxEdits(suggestion.maxEdits());
|
||||
directSpellChecker.setMaxInspections(suggestion.maxInspections());
|
||||
directSpellChecker.setMaxQueryFrequency(suggestion.maxTermFreq());
|
||||
directSpellChecker.setMinPrefix(suggestion.prefixLength());
|
||||
directSpellChecker.setMinQueryLength(suggestion.minWordLength());
|
||||
directSpellChecker.setThresholdFrequency(suggestion.minDocFreq());
|
||||
directSpellChecker.setLowerCaseTerms(false);
|
||||
return directSpellChecker;
|
||||
}
|
||||
|
||||
public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
|
||||
result.clear();
|
||||
for (int i = 0; i < toJoin.length - 1; i++) {
|
||||
result.append(toJoin[i]);
|
||||
result.append(separator);
|
||||
}
|
||||
result.append(toJoin[toJoin.length-1]);
|
||||
return result.get();
|
||||
}
|
||||
|
||||
public abstract static class TokenConsumer {
|
||||
protected CharTermAttribute charTermAttr;
|
||||
protected PositionIncrementAttribute posIncAttr;
|
||||
protected OffsetAttribute offsetAttr;
|
||||
|
||||
public void reset(TokenStream stream) {
|
||||
charTermAttr = stream.addAttribute(CharTermAttribute.class);
|
||||
posIncAttr = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
offsetAttr = stream.addAttribute(OffsetAttribute.class);
|
||||
}
|
||||
|
||||
protected BytesRef fillBytesRef(BytesRefBuilder spare) {
|
||||
spare.copyChars(charTermAttr);
|
||||
return spare.get();
|
||||
}
|
||||
|
||||
public abstract void nextToken() throws IOException;
|
||||
|
||||
public void end() {}
|
||||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException {
|
||||
spare.copyUTF8Bytes(toAnalyze);
|
||||
return analyze(analyzer, spare.get(), field, consumer);
|
||||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException {
|
||||
try (TokenStream ts = analyzer.tokenStream(
|
||||
field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length))) {
|
||||
return analyze(ts, consumer);
|
||||
}
|
||||
}
|
||||
|
||||
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
|
||||
* because really the caller who called {@link Analyzer#tokenStream} should close it,
|
||||
* but when trying that there are recursion issues when we try to use the same
|
||||
* TokenStream twice in the same recursion... */
|
||||
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
|
||||
int numTokens = 0;
|
||||
boolean success = false;
|
||||
try {
|
||||
stream.reset();
|
||||
consumer.reset(stream);
|
||||
while (stream.incrementToken()) {
|
||||
consumer.nextToken();
|
||||
numTokens++;
|
||||
}
|
||||
consumer.end();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
stream.close();
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
}
|
||||
}
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
public static class Fields {
|
||||
public static final ParseField STRING_DISTANCE = new ParseField("string_distance");
|
||||
public static final ParseField SUGGEST_MODE = new ParseField("suggest_mode");
|
||||
public static final ParseField MAX_EDITS = new ParseField("max_edits");
|
||||
public static final ParseField MAX_INSPECTIONS = new ParseField("max_inspections");
|
||||
// TODO some of these constants are the same as MLT constants and
|
||||
// could be moved to a shared class for consistency
|
||||
public static final ParseField MAX_TERM_FREQ = new ParseField("max_term_freq");
|
||||
public static final ParseField PREFIX_LENGTH = new ParseField("prefix_length", "prefix_len");
|
||||
public static final ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len");
|
||||
public static final ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq");
|
||||
public static final ParseField SHARD_SIZE = new ParseField("shard_size");
|
||||
public static final ParseField ANALYZER = new ParseField("analyzer");
|
||||
public static final ParseField FIELD = new ParseField("field");
|
||||
public static final ParseField SIZE = new ParseField("size");
|
||||
public static final ParseField SORT = new ParseField("sort");
|
||||
public static final ParseField ACCURACY = new ParseField("accuracy");
|
||||
}
|
||||
}
|
|
@ -37,7 +37,6 @@ import org.elasticsearch.index.mapper.MappedFieldType;
|
|||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.SuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
|
||||
import org.elasticsearch.search.suggest.completion.context.ContextMapping;
|
||||
|
@ -48,7 +47,6 @@ import org.elasticsearch.search.suggest.completion2x.context.GeolocationContextM
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -90,10 +88,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
|||
TLP_PARSER.declareField((parser, completionSuggestionContext, context) ->
|
||||
completionSuggestionContext.regexOptions = RegexOptions.parse(parser, context),
|
||||
RegexOptions.REGEX_OPTIONS, ObjectParser.ValueType.OBJECT);
|
||||
TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::field, SuggestUtils.Fields.FIELD);
|
||||
TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::analyzer, SuggestUtils.Fields.ANALYZER);
|
||||
TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::size, SuggestUtils.Fields.SIZE);
|
||||
TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::shardSize, SuggestUtils.Fields.SHARD_SIZE);
|
||||
TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::field, FIELDNAME_FIELD);
|
||||
TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::analyzer, ANALYZER_FIELD);
|
||||
TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::size, SIZE_FIELD);
|
||||
TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::shardSize, SHARDSIZE_FIELD);
|
||||
TLP_PARSER.declareField((p, v, c) -> {
|
||||
// Copy the current structure. We will parse, once the mapping is provided
|
||||
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
|
||||
|
@ -353,7 +351,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
|||
// now we should have field name, check and copy fields over to the suggestion builder we return
|
||||
if (field == null) {
|
||||
throw new ElasticsearchParseException(
|
||||
"the required field option [" + SuggestUtils.Fields.FIELD.getPreferredName() + "] is missing");
|
||||
"the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing");
|
||||
}
|
||||
return new CompletionSuggestionBuilder(field, builder);
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.elasticsearch.search.suggest.phrase;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
@ -73,7 +72,7 @@ public final class Correction implements Comparable<Correction> {
|
|||
len += toJoin[i].length;
|
||||
}
|
||||
result.grow(len);
|
||||
return SuggestUtils.join(separator, result, toJoin);
|
||||
return WordScorer.join(separator, result, toJoin);
|
||||
}
|
||||
|
||||
/** Lower scores sorts first; if scores are equal,
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -29,8 +33,10 @@ import org.apache.lucene.search.spell.SuggestMode;
|
|||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.common.io.FastCharArrayReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -44,7 +50,7 @@ import static java.lang.Math.log10;
|
|||
import static java.lang.Math.max;
|
||||
import static java.lang.Math.round;
|
||||
|
||||
final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
|
||||
private final DirectSpellChecker spellchecker;
|
||||
private final String field;
|
||||
|
@ -140,7 +146,7 @@ final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
return term;
|
||||
}
|
||||
final BytesRefBuilder result = byteSpare;
|
||||
SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() {
|
||||
analyze(preFilter, term, field, new TokenConsumer() {
|
||||
|
||||
@Override
|
||||
public void nextToken() throws IOException {
|
||||
|
@ -156,7 +162,7 @@ final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
candidates.add(candidate);
|
||||
} else {
|
||||
final BytesRefBuilder result = byteSpare;
|
||||
SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() {
|
||||
analyze(postFilter, candidate.term, field, new TokenConsumer() {
|
||||
@Override
|
||||
public void nextToken() throws IOException {
|
||||
this.fillBytesRef(result);
|
||||
|
@ -189,6 +195,27 @@ final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
|
||||
}
|
||||
|
||||
public abstract static class TokenConsumer {
|
||||
protected CharTermAttribute charTermAttr;
|
||||
protected PositionIncrementAttribute posIncAttr;
|
||||
protected OffsetAttribute offsetAttr;
|
||||
|
||||
public void reset(TokenStream stream) {
|
||||
charTermAttr = stream.addAttribute(CharTermAttribute.class);
|
||||
posIncAttr = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
offsetAttr = stream.addAttribute(OffsetAttribute.class);
|
||||
}
|
||||
|
||||
protected BytesRef fillBytesRef(BytesRefBuilder spare) {
|
||||
spare.copyChars(charTermAttr);
|
||||
return spare.get();
|
||||
}
|
||||
|
||||
public abstract void nextToken() throws IOException;
|
||||
|
||||
public void end() {}
|
||||
}
|
||||
|
||||
public static class CandidateSet {
|
||||
public Candidate[] candidates;
|
||||
public final Candidate originalTerm;
|
||||
|
@ -283,4 +310,40 @@ final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput);
|
||||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare)
|
||||
throws IOException {
|
||||
spare.copyUTF8Bytes(toAnalyze);
|
||||
CharsRef charsRef = spare.get();
|
||||
try (TokenStream ts = analyzer.tokenStream(
|
||||
field, new FastCharArrayReader(charsRef.chars, charsRef.offset, charsRef.length))) {
|
||||
return analyze(ts, consumer);
|
||||
}
|
||||
}
|
||||
|
||||
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
|
||||
* because really the caller who called {@link Analyzer#tokenStream} should close it,
|
||||
* but when trying that there are recursion issues when we try to use the same
|
||||
* TokenStream twice in the same recursion... */
|
||||
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
|
||||
int numTokens = 0;
|
||||
boolean success = false;
|
||||
try {
|
||||
stream.reset();
|
||||
consumer.reset(stream);
|
||||
while (stream.incrementToken()) {
|
||||
consumer.nextToken();
|
||||
numTokens++;
|
||||
}
|
||||
consumer.end();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
stream.close();
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
}
|
||||
}
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -51,21 +51,21 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
|
|||
|
||||
private static final String TYPE = "direct_generator";
|
||||
|
||||
static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE);
|
||||
static final ParseField FIELDNAME_FIELD = new ParseField("field");
|
||||
static final ParseField PREFILTER_FIELD = new ParseField("pre_filter");
|
||||
static final ParseField POSTFILTER_FIELD = new ParseField("post_filter");
|
||||
static final ParseField SUGGESTMODE_FIELD = new ParseField("suggest_mode");
|
||||
static final ParseField MIN_DOC_FREQ_FIELD = new ParseField("min_doc_freq");
|
||||
static final ParseField ACCURACY_FIELD = new ParseField("accuracy");
|
||||
static final ParseField SIZE_FIELD = new ParseField("size");
|
||||
static final ParseField SORT_FIELD = new ParseField("sort");
|
||||
static final ParseField STRING_DISTANCE_FIELD = new ParseField("string_distance");
|
||||
static final ParseField MAX_EDITS_FIELD = new ParseField("max_edits");
|
||||
static final ParseField MAX_INSPECTIONS_FIELD = new ParseField("max_inspections");
|
||||
static final ParseField MAX_TERM_FREQ_FIELD = new ParseField("max_term_freq");
|
||||
static final ParseField PREFIX_LENGTH_FIELD = new ParseField("prefix_length");
|
||||
static final ParseField MIN_WORD_LENGTH_FIELD = new ParseField("min_word_length");
|
||||
public static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE);
|
||||
public static final ParseField FIELDNAME_FIELD = new ParseField("field");
|
||||
public static final ParseField PREFILTER_FIELD = new ParseField("pre_filter");
|
||||
public static final ParseField POSTFILTER_FIELD = new ParseField("post_filter");
|
||||
public static final ParseField SUGGESTMODE_FIELD = new ParseField("suggest_mode");
|
||||
public static final ParseField MIN_DOC_FREQ_FIELD = new ParseField("min_doc_freq");
|
||||
public static final ParseField ACCURACY_FIELD = new ParseField("accuracy");
|
||||
public static final ParseField SIZE_FIELD = new ParseField("size");
|
||||
public static final ParseField SORT_FIELD = new ParseField("sort");
|
||||
public static final ParseField STRING_DISTANCE_FIELD = new ParseField("string_distance");
|
||||
public static final ParseField MAX_EDITS_FIELD = new ParseField("max_edits");
|
||||
public static final ParseField MAX_INSPECTIONS_FIELD = new ParseField("max_inspections");
|
||||
public static final ParseField MAX_TERM_FREQ_FIELD = new ParseField("max_term_freq");
|
||||
public static final ParseField PREFIX_LENGTH_FIELD = new ParseField("prefix_length");
|
||||
public static final ParseField MIN_WORD_LENGTH_FIELD = new ParseField("min_word_length");
|
||||
|
||||
private final String field;
|
||||
private String preFilter;
|
||||
|
@ -449,7 +449,8 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
|
|||
return new LuceneLevenshteinDistance();
|
||||
} else if ("levenstein".equals(distanceVal)) {
|
||||
return new LevensteinDistance();
|
||||
//TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein
|
||||
// TODO Jaro and Winkler are 2 people - so apply same naming logic
|
||||
// as damerau_levenshtein
|
||||
} else if ("jarowinkler".equals(distanceVal)) {
|
||||
return new JaroWinklerDistance();
|
||||
} else if ("ngram".equals(distanceVal)) {
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -41,15 +40,15 @@ final class LaplaceScorer extends WordScorer {
|
|||
|
||||
@Override
|
||||
protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
|
||||
SuggestUtils.join(separator, spare, w_1.term, word.term);
|
||||
join(separator, spare, w_1.term, word.term);
|
||||
return (alpha + frequency(spare.get())) / (alpha + w_1.frequency + vocabluarySize);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double scoreTrigram(Candidate word, Candidate w_1, Candidate w_2) throws IOException {
|
||||
SuggestUtils.join(separator, spare, w_2.term, w_1.term, word.term);
|
||||
join(separator, spare, w_2.term, w_1.term, word.term);
|
||||
long trigramCount = frequency(spare.get());
|
||||
SuggestUtils.join(separator, spare, w_1.term, word.term);
|
||||
join(separator, spare, w_1.term, word.term);
|
||||
return (alpha + trigramCount) / (alpha + frequency(spare.get()) + vocabluarySize);
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -56,7 +55,7 @@ public final class LinearInterpolatingScorer extends WordScorer {
|
|||
|
||||
@Override
|
||||
protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
|
||||
SuggestUtils.join(separator, spare, w_1.term, word.term);
|
||||
join(separator, spare, w_1.term, word.term);
|
||||
final long count = frequency(spare.get());
|
||||
if (count < 1) {
|
||||
return unigramLambda * scoreUnigram(word);
|
||||
|
@ -66,12 +65,12 @@ public final class LinearInterpolatingScorer extends WordScorer {
|
|||
|
||||
@Override
|
||||
protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException {
|
||||
SuggestUtils.join(separator, spare, w.term, w_1.term, w_2.term);
|
||||
join(separator, spare, w.term, w_1.term, w_2.term);
|
||||
final long count = frequency(spare.get());
|
||||
if (count < 1) {
|
||||
return scoreBigram(w, w_1);
|
||||
}
|
||||
SuggestUtils.join(separator, spare, w.term, w_1.term);
|
||||
join(separator, spare, w.term, w_1.term);
|
||||
return trigramLambda * (count / (1.d + frequency(spare.get()))) + scoreBigram(w, w_1);
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.common.io.FastCharArrayReader;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
|
||||
|
||||
|
@ -63,7 +62,7 @@ public final class NoisyChannelSpellChecker {
|
|||
float maxErrors, int numCorrections, WordScorer wordScorer, float confidence, int gramSize) throws IOException {
|
||||
|
||||
final List<CandidateSet> candidateSetsList = new ArrayList<>();
|
||||
SuggestUtils.analyze(stream, new SuggestUtils.TokenConsumer() {
|
||||
DirectCandidateGenerator.analyze(stream, new DirectCandidateGenerator.TokenConsumer() {
|
||||
CandidateSet currentSet = null;
|
||||
private TypeAttribute typeAttribute;
|
||||
private final BytesRefBuilder termsRef = new BytesRefBuilder();
|
||||
|
|
|
@ -45,7 +45,6 @@ import org.elasticsearch.script.ScriptService;
|
|||
import org.elasticsearch.search.suggest.Suggest.Suggestion;
|
||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
|
||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.Suggester;
|
||||
import org.elasticsearch.search.suggest.SuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
|
||||
|
@ -84,7 +83,7 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
|||
final List<CandidateGenerator> gens = new ArrayList<>(generators.size());
|
||||
for (int i = 0; i < numGenerators; i++) {
|
||||
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
|
||||
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
|
||||
DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker();
|
||||
Terms terms = MultiFields.getTerms(indexReader, generator.field());
|
||||
if (terms != null) {
|
||||
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(),
|
||||
|
|
|
@ -43,7 +43,6 @@ import org.elasticsearch.script.CompiledScript;
|
|||
import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.script.ScriptContext;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.SuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
|
||||
|
@ -596,7 +595,7 @@ public class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSuggestionB
|
|||
// now we should have field name, check and copy fields over to the suggestion builder we return
|
||||
if (fieldname == null) {
|
||||
throw new ElasticsearchParseException(
|
||||
"the required field option [" + SuggestUtils.Fields.FIELD.getPreferredName() + "] is missing");
|
||||
"the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing");
|
||||
}
|
||||
return new PhraseSuggestionBuilder(fieldname, tmpSuggestion);
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -41,7 +40,7 @@ class StupidBackoffScorer extends WordScorer {
|
|||
|
||||
@Override
|
||||
protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
|
||||
SuggestUtils.join(separator, spare, w_1.term, word.term);
|
||||
join(separator, spare, w_1.term, word.term);
|
||||
final long count = frequency(spare.get());
|
||||
if (count < 1) {
|
||||
return discount * scoreUnigram(word);
|
||||
|
@ -53,12 +52,12 @@ class StupidBackoffScorer extends WordScorer {
|
|||
protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException {
|
||||
// First see if there are bigrams. If there aren't then skip looking up the trigram. This saves lookups
|
||||
// when the bigrams and trigrams are rare and we need both anyway.
|
||||
SuggestUtils.join(separator, spare, w_1.term, w.term);
|
||||
join(separator, spare, w_1.term, w.term);
|
||||
long bigramCount = frequency(spare.get());
|
||||
if (bigramCount < 1) {
|
||||
return discount * scoreUnigram(w);
|
||||
}
|
||||
SuggestUtils.join(separator, spare, w_2.term, w_1.term, w.term);
|
||||
join(separator, spare, w_2.term, w_1.term, w.term);
|
||||
long trigramCount = frequency(spare.get());
|
||||
if (trigramCount < 1) {
|
||||
return discount * (bigramCount / (w_1.frequency + 0.00000000001d));
|
||||
|
|
|
@ -100,6 +100,16 @@ public abstract class WordScorer {
|
|||
return scoreBigram(word, w_1);
|
||||
}
|
||||
|
||||
public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
|
||||
result.clear();
|
||||
for (int i = 0; i < toJoin.length - 1; i++) {
|
||||
result.append(toJoin[i]);
|
||||
result.append(separator);
|
||||
}
|
||||
result.append(toJoin[toJoin.length-1]);
|
||||
return result.get();
|
||||
}
|
||||
|
||||
public interface WordScorerFactory {
|
||||
WordScorer newScorer(IndexReader reader, Terms terms,
|
||||
String field, double realWordLikelyhood, BytesRef separator) throws IOException;
|
||||
|
|
|
@ -30,10 +30,10 @@ import org.elasticsearch.common.bytes.BytesArray;
|
|||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.Suggester;
|
||||
import org.elasticsearch.search.suggest.SuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -48,7 +48,7 @@ public final class TermSuggester extends Suggester<TermSuggestionContext> {
|
|||
@Override
|
||||
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare)
|
||||
throws IOException {
|
||||
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
|
||||
DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker();
|
||||
final IndexReader indexReader = searcher.getIndexReader();
|
||||
TermSuggestion response = new TermSuggestion(
|
||||
name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
|
||||
|
@ -70,10 +70,11 @@ public final class TermSuggester extends Suggester<TermSuggestionContext> {
|
|||
return response;
|
||||
}
|
||||
|
||||
private List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException {
|
||||
private static List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException {
|
||||
final List<Token> result = new ArrayList<>();
|
||||
final String field = suggestion.getField();
|
||||
SuggestUtils.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() {
|
||||
DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field,
|
||||
new DirectCandidateGenerator.TokenConsumer() {
|
||||
@Override
|
||||
public void nextToken() {
|
||||
Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder())));
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.elasticsearch.index.query.QueryParseContext;
|
|||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.suggest.DirectSpellcheckerSettings;
|
||||
import org.elasticsearch.search.suggest.SortBy;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.SuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
|
||||
|
||||
|
@ -52,16 +51,16 @@ import static org.elasticsearch.search.suggest.DirectSpellcheckerSettings.DEFAUL
|
|||
import static org.elasticsearch.search.suggest.DirectSpellcheckerSettings.DEFAULT_MIN_DOC_FREQ;
|
||||
import static org.elasticsearch.search.suggest.DirectSpellcheckerSettings.DEFAULT_MIN_WORD_LENGTH;
|
||||
import static org.elasticsearch.search.suggest.DirectSpellcheckerSettings.DEFAULT_PREFIX_LENGTH;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.ACCURACY;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MAX_EDITS;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MAX_INSPECTIONS;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MAX_TERM_FREQ;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MIN_DOC_FREQ;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.MIN_WORD_LENGTH;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.PREFIX_LENGTH;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.SORT;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.STRING_DISTANCE;
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.Fields.SUGGEST_MODE;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.ACCURACY_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MAX_EDITS_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MAX_INSPECTIONS_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MAX_TERM_FREQ_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MIN_DOC_FREQ_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.MIN_WORD_LENGTH_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.PREFIX_LENGTH_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.SORT_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.STRING_DISTANCE_FIELD;
|
||||
import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.SUGGESTMODE_FIELD;
|
||||
|
||||
/**
|
||||
* Defines the actual suggest command. Each command uses the global options
|
||||
|
@ -376,16 +375,16 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
|
|||
|
||||
@Override
|
||||
public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(SUGGEST_MODE.getPreferredName(), suggestMode);
|
||||
builder.field(ACCURACY.getPreferredName(), accuracy);
|
||||
builder.field(SORT.getPreferredName(), sort);
|
||||
builder.field(STRING_DISTANCE.getPreferredName(), stringDistance);
|
||||
builder.field(MAX_EDITS.getPreferredName(), maxEdits);
|
||||
builder.field(MAX_INSPECTIONS.getPreferredName(), maxInspections);
|
||||
builder.field(MAX_TERM_FREQ.getPreferredName(), maxTermFreq);
|
||||
builder.field(PREFIX_LENGTH.getPreferredName(), prefixLength);
|
||||
builder.field(MIN_WORD_LENGTH.getPreferredName(), minWordLength);
|
||||
builder.field(MIN_DOC_FREQ.getPreferredName(), minDocFreq);
|
||||
builder.field(SUGGESTMODE_FIELD.getPreferredName(), suggestMode);
|
||||
builder.field(ACCURACY_FIELD.getPreferredName(), accuracy);
|
||||
builder.field(SORT_FIELD.getPreferredName(), sort);
|
||||
builder.field(STRING_DISTANCE_FIELD.getPreferredName(), stringDistance);
|
||||
builder.field(MAX_EDITS_FIELD.getPreferredName(), maxEdits);
|
||||
builder.field(MAX_INSPECTIONS_FIELD.getPreferredName(), maxInspections);
|
||||
builder.field(MAX_TERM_FREQ_FIELD.getPreferredName(), maxTermFreq);
|
||||
builder.field(PREFIX_LENGTH_FIELD.getPreferredName(), prefixLength);
|
||||
builder.field(MIN_WORD_LENGTH_FIELD.getPreferredName(), minWordLength);
|
||||
builder.field(MIN_DOC_FREQ_FIELD.getPreferredName(), minDocFreq);
|
||||
return builder;
|
||||
}
|
||||
|
||||
|
@ -408,25 +407,25 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
|
|||
tmpSuggestion.size(parser.intValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, SuggestionBuilder.SHARDSIZE_FIELD)) {
|
||||
tmpSuggestion.shardSize(parser.intValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, SUGGEST_MODE)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, SUGGESTMODE_FIELD)) {
|
||||
tmpSuggestion.suggestMode(SuggestMode.resolve(parser.text()));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, ACCURACY)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, ACCURACY_FIELD)) {
|
||||
tmpSuggestion.accuracy(parser.floatValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, SORT)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, SORT_FIELD)) {
|
||||
tmpSuggestion.sort(SortBy.resolve(parser.text()));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, STRING_DISTANCE)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, STRING_DISTANCE_FIELD)) {
|
||||
tmpSuggestion.stringDistance(StringDistanceImpl.resolve(parser.text()));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MAX_EDITS)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MAX_EDITS_FIELD)) {
|
||||
tmpSuggestion.maxEdits(parser.intValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MAX_INSPECTIONS)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MAX_INSPECTIONS_FIELD)) {
|
||||
tmpSuggestion.maxInspections(parser.intValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MAX_TERM_FREQ)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MAX_TERM_FREQ_FIELD)) {
|
||||
tmpSuggestion.maxTermFreq(parser.floatValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, PREFIX_LENGTH)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, PREFIX_LENGTH_FIELD)) {
|
||||
tmpSuggestion.prefixLength(parser.intValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MIN_WORD_LENGTH)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MIN_WORD_LENGTH_FIELD)) {
|
||||
tmpSuggestion.minWordLength(parser.intValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MIN_DOC_FREQ)) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, MIN_DOC_FREQ_FIELD)) {
|
||||
tmpSuggestion.minDocFreq(parser.floatValue());
|
||||
} else {
|
||||
throw new ParsingException(parser.getTokenLocation(),
|
||||
|
@ -440,7 +439,7 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
|
|||
// now we should have field name, check and copy fields over to the suggestion builder we return
|
||||
if (fieldname == null) {
|
||||
throw new ElasticsearchParseException(
|
||||
"the required field option [" + SuggestUtils.Fields.FIELD.getPreferredName() + "] is missing");
|
||||
"the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing");
|
||||
}
|
||||
return new TermSuggestionBuilder(fieldname, tmpSuggestion);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue