Prevent Phrase Suggester from failing on missing fields.
Unless the field is not mapped phrase suggester should return empty results or skip candidate generation if a field in not in the index rather than failing hard with an illegal argument exception. Some shards might not have a value in a certain field. Closes #3469
This commit is contained in:
parent
5d91bb04b6
commit
57c0d29114
|
@ -18,18 +18,8 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.suggest.phrase;
|
package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.MultiFields;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||||
import org.apache.lucene.search.spell.SuggestMode;
|
import org.apache.lucene.search.spell.SuggestMode;
|
||||||
import org.apache.lucene.search.spell.SuggestWord;
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
|
@ -38,6 +28,12 @@ import org.apache.lucene.util.CharsRef;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
//TODO public for tests
|
//TODO public for tests
|
||||||
public final class DirectCandidateGenerator extends CandidateGenerator {
|
public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||||
|
|
||||||
|
@ -58,20 +54,19 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||||
private final int numCandidates;
|
private final int numCandidates;
|
||||||
|
|
||||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
|
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
|
||||||
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null);
|
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter) throws IOException {
|
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
|
||||||
|
if (terms == null) {
|
||||||
|
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
|
||||||
|
}
|
||||||
this.spellchecker = spellchecker;
|
this.spellchecker = spellchecker;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.numCandidates = numCandidates;
|
this.numCandidates = numCandidates;
|
||||||
this.suggestMode = suggestMode;
|
this.suggestMode = suggestMode;
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
Terms terms = MultiFields.getTerms(reader, field);
|
|
||||||
if (terms == null) {
|
|
||||||
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
|
|
||||||
}
|
|
||||||
final long dictSize = terms.getSumTotalTermFreq();
|
final long dictSize = terms.getSumTotalTermFreq();
|
||||||
this.useTotalTermFrequency = dictSize != -1;
|
this.useTotalTermFrequency = dictSize != -1;
|
||||||
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
|
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
|
||||||
|
|
|
@ -18,27 +18,28 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.suggest.phrase;
|
package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
//TODO public for tests
|
//TODO public for tests
|
||||||
public final class LaplaceScorer extends WordScorer {
|
public final class LaplaceScorer extends WordScorer {
|
||||||
|
|
||||||
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
|
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
|
||||||
@Override
|
@Override
|
||||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
|
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
|
||||||
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, 0.5);
|
return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, 0.5);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private double alpha;
|
private double alpha;
|
||||||
|
|
||||||
public LaplaceScorer(IndexReader reader, String field,
|
public LaplaceScorer(IndexReader reader, Terms terms, String field,
|
||||||
double realWordLikelyhood, BytesRef separator, double alpha) throws IOException {
|
double realWordLikelyhood, BytesRef separator, double alpha) throws IOException {
|
||||||
super(reader, field, realWordLikelyhood, separator);
|
super(reader, terms, field, realWordLikelyhood, separator);
|
||||||
this.alpha = alpha;
|
this.alpha = alpha;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,13 +18,14 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.suggest.phrase;
|
package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
//TODO public for tests
|
//TODO public for tests
|
||||||
public final class LinearInterpoatingScorer extends WordScorer {
|
public final class LinearInterpoatingScorer extends WordScorer {
|
||||||
|
|
||||||
|
@ -32,9 +33,9 @@ public final class LinearInterpoatingScorer extends WordScorer {
|
||||||
private final double bigramLambda;
|
private final double bigramLambda;
|
||||||
private final double trigramLambda;
|
private final double trigramLambda;
|
||||||
|
|
||||||
public LinearInterpoatingScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
|
public LinearInterpoatingScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(reader, field, realWordLikelyhood, separator);
|
super(reader, terms, field, realWordLikelyhood, separator);
|
||||||
double sum = unigramLambda + bigramLambda + trigramLambda;
|
double sum = unigramLambda + bigramLambda + trigramLambda;
|
||||||
this.unigramLambda = unigramLambda / sum;
|
this.unigramLambda = unigramLambda / sum;
|
||||||
this.bigramLambda = bigramLambda / sum;
|
this.bigramLambda = bigramLambda / sum;
|
||||||
|
|
|
@ -18,10 +18,9 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.suggest.phrase;
|
package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
@ -33,6 +32,8 @@ import org.elasticsearch.search.suggest.SuggestUtils;
|
||||||
import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
||||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
|
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
public final class PhraseSuggestParser implements SuggestContextParser {
|
public final class PhraseSuggestParser implements SuggestContextParser {
|
||||||
|
|
||||||
private PhraseSuggester suggester;
|
private PhraseSuggester suggester;
|
||||||
|
@ -135,6 +136,10 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
||||||
throw new ElasticSearchIllegalArgumentException("The required field option is missing");
|
throw new ElasticSearchIllegalArgumentException("The required field option is missing");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mapperService.smartNameFieldMapper(suggestion.getField()) == null) {
|
||||||
|
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + suggestion.getField() + "]");
|
||||||
|
}
|
||||||
|
|
||||||
if (suggestion.model() == null) {
|
if (suggestion.model() == null) {
|
||||||
suggestion.setModel(StupidBackoffScorer.FACTORY);
|
suggestion.setModel(StupidBackoffScorer.FACTORY);
|
||||||
}
|
}
|
||||||
|
@ -209,9 +214,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
||||||
}
|
}
|
||||||
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
||||||
@Override
|
@Override
|
||||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
|
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return new LinearInterpoatingScorer(reader, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
|
return new LinearInterpoatingScorer(reader, terms, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
|
||||||
lambdas[2]);
|
lambdas[2]);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -230,9 +235,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
||||||
final double alpha = theAlpha;
|
final double alpha = theAlpha;
|
||||||
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
||||||
@Override
|
@Override
|
||||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
|
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, alpha);
|
return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, alpha);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -250,9 +255,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
||||||
final double discount = theDiscount;
|
final double discount = theDiscount;
|
||||||
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
||||||
@Override
|
@Override
|
||||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
|
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, discount);
|
return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, discount);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -281,6 +286,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
||||||
if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator)) {
|
if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator)) {
|
||||||
if ("field".equals(fieldName)) {
|
if ("field".equals(fieldName)) {
|
||||||
generator.setField(parser.text());
|
generator.setField(parser.text());
|
||||||
|
if (mapperService.smartNameFieldMapper(generator.field()) == null) {
|
||||||
|
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + generator.field() + "]");
|
||||||
|
}
|
||||||
} else if ("size".equals(fieldName)) {
|
} else if ("size".equals(fieldName)) {
|
||||||
generator.size(parser.intValue());
|
generator.size(parser.intValue());
|
||||||
} else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {
|
} else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {
|
||||||
|
|
|
@ -21,6 +21,8 @@ package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
@ -30,9 +32,11 @@ import org.elasticsearch.common.text.Text;
|
||||||
import org.elasticsearch.search.suggest.Suggest.Suggestion;
|
import org.elasticsearch.search.suggest.Suggest.Suggestion;
|
||||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
|
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
|
||||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
|
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
|
||||||
import org.elasticsearch.search.suggest.SuggestContextParser;
|
import org.elasticsearch.search.suggest.*;
|
||||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
|
||||||
import org.elasticsearch.search.suggest.Suggester;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -52,38 +56,47 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
||||||
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion,
|
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion,
|
||||||
IndexReader indexReader, CharsRef spare) throws IOException {
|
IndexReader indexReader, CharsRef spare) throws IOException {
|
||||||
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
|
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
|
||||||
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
|
|
||||||
CandidateGenerator[] gens = new CandidateGenerator[generators.size()];
|
|
||||||
for (int i = 0; i < gens.length; i++) {
|
|
||||||
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
|
|
||||||
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
|
|
||||||
gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
|
|
||||||
final BytesRef separator = suggestion.separator();
|
|
||||||
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
|
|
||||||
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator);
|
|
||||||
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens), suggestion.maxErrors(),
|
|
||||||
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
|
|
||||||
|
|
||||||
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
|
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
|
||||||
|
|
||||||
Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
|
Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
|
||||||
BytesRef byteSpare = new BytesRef();
|
|
||||||
for (Correction correction : corrections) {
|
|
||||||
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
|
|
||||||
Text phrase = new StringText(spare.toString());
|
|
||||||
Text highlighted = null;
|
|
||||||
if (suggestion.getPreTag() != null) {
|
|
||||||
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
|
|
||||||
highlighted = new StringText(spare.toString());
|
|
||||||
}
|
|
||||||
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
|
|
||||||
}
|
|
||||||
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
|
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
|
||||||
response.addTerm(resultEntry);
|
response.addTerm(resultEntry);
|
||||||
|
|
||||||
|
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
|
||||||
|
final int numGenerators = generators.size();
|
||||||
|
final List<CandidateGenerator> gens = new ArrayList<CandidateGenerator>(generators.size());
|
||||||
|
for (int i = 0; i < numGenerators; i++) {
|
||||||
|
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
|
||||||
|
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
|
||||||
|
Terms terms = MultiFields.getTerms(indexReader, generator.field());
|
||||||
|
if (terms != null) {
|
||||||
|
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(),
|
||||||
|
indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final String suggestField = suggestion.getField();
|
||||||
|
final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
|
||||||
|
if (gens.size() > 0 && suggestTerms != null) {
|
||||||
|
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
|
||||||
|
final BytesRef separator = suggestion.separator();
|
||||||
|
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
|
||||||
|
|
||||||
|
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
|
||||||
|
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
|
||||||
|
gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
|
||||||
|
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
|
||||||
|
|
||||||
|
BytesRef byteSpare = new BytesRef();
|
||||||
|
for (Correction correction : corrections) {
|
||||||
|
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
|
||||||
|
Text phrase = new StringText(spare.toString());
|
||||||
|
Text highlighted = null;
|
||||||
|
if (suggestion.getPreTag() != null) {
|
||||||
|
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
|
||||||
|
highlighted = new StringText(spare.toString());
|
||||||
|
}
|
||||||
|
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
|
||||||
|
}
|
||||||
|
}
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,26 +18,27 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.suggest.phrase;
|
package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
public class StupidBackoffScorer extends WordScorer {
|
public class StupidBackoffScorer extends WordScorer {
|
||||||
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
|
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
|
||||||
@Override
|
@Override
|
||||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
|
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
|
||||||
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, 0.4f);
|
return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, 0.4f);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private final double discount;
|
private final double discount;
|
||||||
|
|
||||||
public StupidBackoffScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double discount)
|
public StupidBackoffScorer(IndexReader reader, Terms terms,String field, double realWordLikelyhood, BytesRef separator, double discount)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(reader, field, realWordLikelyhood, separator);
|
super(reader, terms, field, realWordLikelyhood, separator);
|
||||||
this.discount = discount;
|
this.discount = discount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,6 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.suggest.phrase;
|
package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -29,6 +27,8 @@ import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
|
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
//TODO public for tests
|
//TODO public for tests
|
||||||
public abstract class WordScorer {
|
public abstract class WordScorer {
|
||||||
protected final IndexReader reader;
|
protected final IndexReader reader;
|
||||||
|
@ -43,11 +43,15 @@ public abstract class WordScorer {
|
||||||
private final boolean useTotalTermFreq;
|
private final boolean useTotalTermFreq;
|
||||||
|
|
||||||
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
||||||
|
this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator);
|
||||||
|
}
|
||||||
|
|
||||||
|
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.terms = MultiFields.getTerms(reader, field);
|
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
throw new ElasticSearchIllegalArgumentException("Field: [" + field + "] does not exist");
|
throw new ElasticSearchIllegalArgumentException("Field: [" + field + "] does not exist");
|
||||||
}
|
}
|
||||||
|
this.terms = terms;
|
||||||
final long vocSize = terms.getSumTotalTermFreq();
|
final long vocSize = terms.getSumTotalTermFreq();
|
||||||
this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
|
this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
|
||||||
this.useTotalTermFreq = vocSize != -1;
|
this.useTotalTermFreq = vocSize != -1;
|
||||||
|
@ -95,7 +99,7 @@ public abstract class WordScorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static interface WordScorerFactory {
|
public static interface WordScorerFactory {
|
||||||
public WordScorer newScorer(IndexReader reader, String field,
|
public WordScorer newScorer(IndexReader reader, Terms terms,
|
||||||
double realWordLikelyhood, BytesRef separator) throws IOException;
|
String field, double realWordLikelyhood, BytesRef separator) throws IOException;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,8 +40,10 @@ import org.junit.Test;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
|
||||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||||
|
@ -50,6 +52,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
|
||||||
import static org.elasticsearch.search.suggest.SuggestBuilder.phraseSuggestion;
|
import static org.elasticsearch.search.suggest.SuggestBuilder.phraseSuggestion;
|
||||||
import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion;
|
import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion;
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionSize;
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionSize;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows;
|
||||||
import static org.hamcrest.Matchers.*;
|
import static org.hamcrest.Matchers.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -112,7 +115,6 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
||||||
|
|
||||||
@Test // see #2729
|
@Test // see #2729
|
||||||
public void testSizeOneShard() throws Exception {
|
public void testSizeOneShard() throws Exception {
|
||||||
client().admin().indices().prepareDelete().execute().actionGet();
|
|
||||||
client().admin().indices().prepareCreate("test")
|
client().admin().indices().prepareCreate("test")
|
||||||
.setSettings(settingsBuilder()
|
.setSettings(settingsBuilder()
|
||||||
.put(SETTING_NUMBER_OF_SHARDS, 1)
|
.put(SETTING_NUMBER_OF_SHARDS, 1)
|
||||||
|
@ -160,8 +162,70 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSimple() throws Exception {
|
public void testUnmappedField() throws IOException, InterruptedException, ExecutionException {
|
||||||
|
int numShards = between(1,5);
|
||||||
|
Builder builder = ImmutableSettings.builder();
|
||||||
|
builder.put("index.number_of_shards", numShards).put("index.number_of_replicas", between(0, 2));
|
||||||
|
builder.put("index.analysis.analyzer.biword.tokenizer", "standard");
|
||||||
|
builder.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase");
|
||||||
|
builder.put("index.analysis.filter.shingler.type", "shingle");
|
||||||
|
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
|
||||||
|
builder.put("index.analysis.filter.shingler.max_shingle_size", 3);
|
||||||
|
|
||||||
|
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("name")
|
||||||
|
.field("type", "multi_field")
|
||||||
|
.field("path", "just_name")
|
||||||
|
.startObject("fields")
|
||||||
|
.startObject("name")
|
||||||
|
.field("type", "string")
|
||||||
|
.endObject()
|
||||||
|
.startObject("name_shingled")
|
||||||
|
.field("type", "string")
|
||||||
|
.field("index_analyzer", "biword")
|
||||||
|
.field("search_analyzer", "standard")
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
client().admin().indices().prepareDelete().execute().actionGet();
|
client().admin().indices().prepareDelete().execute().actionGet();
|
||||||
|
client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
|
||||||
|
client().admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
|
||||||
|
indexRandom("test", true,
|
||||||
|
client().prepareIndex("test", "type1")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like iced tea").endObject()),
|
||||||
|
client().prepareIndex("test", "type1")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like tea.").endObject()),
|
||||||
|
client().prepareIndex("test", "type1")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like ice cream.").endObject()));
|
||||||
|
Suggest searchSuggest = searchSuggest(client(),
|
||||||
|
"ice tea",
|
||||||
|
phraseSuggestion("did_you_mean").field("name_shingled")
|
||||||
|
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
|
||||||
|
.gramSize(3));
|
||||||
|
ElasticsearchAssertions.assertSuggestion(searchSuggest, 0, 0, "did_you_mean", "iced tea");
|
||||||
|
{
|
||||||
|
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
|
||||||
|
suggestBuilder.setSuggestText("tetsting sugestion");
|
||||||
|
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("nosuchField")
|
||||||
|
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
|
||||||
|
.gramSize(3));
|
||||||
|
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
|
||||||
|
suggestBuilder.setSuggestText("tetsting sugestion");
|
||||||
|
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("nosuchField")
|
||||||
|
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
|
||||||
|
.gramSize(3));
|
||||||
|
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimple() throws Exception {
|
||||||
client().admin().indices().prepareCreate("test")
|
client().admin().indices().prepareCreate("test")
|
||||||
.setSettings(settingsBuilder()
|
.setSettings(settingsBuilder()
|
||||||
.put(SETTING_NUMBER_OF_SHARDS, 5)
|
.put(SETTING_NUMBER_OF_SHARDS, 5)
|
||||||
|
@ -1033,6 +1097,7 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
||||||
builder.addSuggestion(suggestion);
|
builder.addSuggestion(suggestion);
|
||||||
}
|
}
|
||||||
SearchResponse actionGet = builder.execute().actionGet();
|
SearchResponse actionGet = builder.execute().actionGet();
|
||||||
|
assertThat(Arrays.toString(actionGet.getShardFailures()), actionGet.getFailedShards(), equalTo(expectShardsFailed));
|
||||||
return actionGet.getSuggest();
|
return actionGet.getSuggest();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1081,6 +1146,65 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
||||||
assertThat(suggest.getSuggestion("simple").getEntries().get(0).getOptions().size(), equalTo(3));
|
assertThat(suggest.getSuggestion("simple").getEntries().get(0).getOptions().size(), equalTo(3));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test // see #3469
|
||||||
|
public void testShardFailures() throws IOException, InterruptedException {
|
||||||
|
Builder builder = ImmutableSettings.builder();
|
||||||
|
builder.put("index.number_of_shards", between(1,5)).put("index.number_of_replicas", between(0,3));
|
||||||
|
builder.put("index.analysis.analyzer.suggest.tokenizer", "standard");
|
||||||
|
builder.putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler");
|
||||||
|
builder.put("index.analysis.filter.shingler.type", "shingle");
|
||||||
|
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
|
||||||
|
builder.put("index.analysis.filter.shingler.max_shingle_size", 5);
|
||||||
|
builder.put("index.analysis.filter.shingler.output_unigrams", true);
|
||||||
|
|
||||||
|
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("name")
|
||||||
|
.field("type", "multi_field")
|
||||||
|
.field("path", "just_name")
|
||||||
|
.startObject("fields")
|
||||||
|
.startObject("name")
|
||||||
|
.field("type", "string")
|
||||||
|
.field("analyzer", "suggest")
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
client().admin().indices().prepareDelete().execute().actionGet();
|
||||||
|
client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
|
||||||
|
client().admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
|
||||||
|
client().prepareIndex("test", "type2", "1")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||||
|
client().prepareIndex("test", "type2", "2")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||||
|
client().prepareIndex("test", "type2", "3")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||||
|
client().prepareIndex("test", "type2", "4")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||||
|
client().prepareIndex("test", "type2", "5")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||||
|
client().prepareIndex("test", "type1", "1")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "Just testing the suggestions api").endObject()).execute().actionGet();
|
||||||
|
client().prepareIndex("test", "type1", "2")
|
||||||
|
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "An other title").endObject()).execute().actionGet();
|
||||||
|
client().admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
|
||||||
|
// When searching on a shard with a non existing mapping, we should fail
|
||||||
|
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
|
||||||
|
suggestBuilder.setSuggestText("tetsting sugestion");
|
||||||
|
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("fielddoesnotexist").maxErrors(5.0f));
|
||||||
|
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
|
||||||
|
// When searching on a shard which does not hold yet any document of an existing type, we should not fail
|
||||||
|
suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
|
||||||
|
suggestBuilder.setSuggestText("tetsting sugestion");
|
||||||
|
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("name").maxErrors(5.0f));
|
||||||
|
SearchResponse searchResponse = suggestBuilder.execute().actionGet();
|
||||||
|
ElasticsearchAssertions.assertNoFailures(searchResponse);
|
||||||
|
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test // see #3469
|
@Test // see #3469
|
||||||
public void testEmptyShards() throws IOException, InterruptedException {
|
public void testEmptyShards() throws IOException, InterruptedException {
|
||||||
Builder builder = ImmutableSettings.builder();
|
Builder builder = ImmutableSettings.builder();
|
||||||
|
@ -1127,5 +1251,4 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
||||||
ElasticsearchAssertions.assertNoFailures(searchResponse);
|
ElasticsearchAssertions.assertNoFailures(searchResponse);
|
||||||
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
|
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,16 +17,7 @@ package org.elasticsearch.test.unit.search.suggest.phrase;
|
||||||
* specific language governing permissions and limitations
|
* specific language governing permissions and limitations
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import com.google.common.base.Charsets;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
@ -44,24 +35,21 @@ import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||||
import org.apache.lucene.search.spell.SuggestMode;
|
import org.apache.lucene.search.spell.SuggestMode;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.search.suggest.phrase.CandidateGenerator;
|
import org.elasticsearch.search.suggest.phrase.*;
|
||||||
import org.elasticsearch.search.suggest.phrase.Correction;
|
|
||||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator;
|
|
||||||
import org.elasticsearch.search.suggest.phrase.LaplaceScorer;
|
|
||||||
import org.elasticsearch.search.suggest.phrase.LinearInterpoatingScorer;
|
|
||||||
import org.elasticsearch.search.suggest.phrase.MultiCandidateGeneratorWrapper;
|
|
||||||
import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker;
|
|
||||||
import org.elasticsearch.search.suggest.phrase.StupidBackoffScorer;
|
|
||||||
import org.elasticsearch.search.suggest.phrase.WordScorer;
|
|
||||||
import org.elasticsearch.test.integration.ElasticsearchTestCase;
|
import org.elasticsearch.test.integration.ElasticsearchTestCase;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import com.google.common.base.Charsets;
|
import java.io.*;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
private final BytesRef space = new BytesRef(" ");
|
private final BytesRef space = new BytesRef(" ");
|
||||||
private final BytesRef preTag = new BytesRef("<em>");
|
private final BytesRef preTag = new BytesRef("<em>");
|
||||||
|
@ -106,7 +94,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader ir = DirectoryReader.open(writer, false);
|
DirectoryReader ir = DirectoryReader.open(writer, false);
|
||||||
WordScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
|
WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
|
||||||
|
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||||
|
@ -123,7 +111,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame"));
|
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame"));
|
||||||
|
|
||||||
suggester = new NoisyChannelSpellChecker(0.85);
|
suggester = new NoisyChannelSpellChecker(0.85);
|
||||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
|
||||||
assertThat(corrections.length, equalTo(4));
|
assertThat(corrections.length, equalTo(4));
|
||||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
|
@ -144,7 +132,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
|
|
||||||
// Test some of the highlighting corner cases
|
// Test some of the highlighting corner cases
|
||||||
suggester = new NoisyChannelSpellChecker(0.85);
|
suggester = new NoisyChannelSpellChecker(0.85);
|
||||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections.length, equalTo(4));
|
assertThat(corrections.length, equalTo(4));
|
||||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
|
@ -179,18 +167,18 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
spellchecker.setMinPrefix(1);
|
spellchecker.setMinPrefix(1);
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
suggester = new NoisyChannelSpellChecker(0.85);
|
suggester = new NoisyChannelSpellChecker(0.85);
|
||||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
|
||||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
||||||
|
|
||||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
|
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
||||||
|
|
||||||
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
|
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
|
||||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
|
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>"));
|
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>"));
|
||||||
|
@ -245,12 +233,12 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader ir = DirectoryReader.open(writer, false);
|
DirectoryReader ir = DirectoryReader.open(writer, false);
|
||||||
LaplaceScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
|
LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
|
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
|
||||||
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper);
|
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
|
||||||
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
|
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
|
||||||
|
|
||||||
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
|
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
|
||||||
|
@ -329,7 +317,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader ir = DirectoryReader.open(writer, false);
|
DirectoryReader ir = DirectoryReader.open(writer, false);
|
||||||
WordScorer wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
WordScorer wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||||
|
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||||
|
@ -343,7 +331,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
assertThat(corrections.length, equalTo(0));
|
assertThat(corrections.length, equalTo(0));
|
||||||
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
|
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
|
||||||
|
|
||||||
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3);
|
||||||
assertThat(corrections.length, equalTo(4));
|
assertThat(corrections.length, equalTo(4));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
|
@ -390,16 +378,16 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||||
spellchecker.setMinPrefix(1);
|
spellchecker.setMinPrefix(1);
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
suggester = new NoisyChannelSpellChecker(0.95);
|
suggester = new NoisyChannelSpellChecker(0.95);
|
||||||
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
|
||||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer);
|
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
|
||||||
|
|
||||||
wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4);
|
wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.4);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3);
|
||||||
assertThat(corrections.length, equalTo(2));
|
assertThat(corrections.length, equalTo(2));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
|
|
Loading…
Reference in New Issue