Prevent Phrase Suggester from failing on missing fields.

Unless the field is not mapped phrase suggester should return
empty results or skip candidate generation if a field in not in
the index rather than failing hard with an illegal argument exception.
Some shards might not have a value in a certain field.

Closes #3469
This commit is contained in:
Simon Willnauer 2013-08-16 13:12:24 +02:00
parent 5d91bb04b6
commit 57c0d29114
9 changed files with 248 additions and 114 deletions

View File

@ -18,18 +18,8 @@
*/ */
package org.elasticsearch.search.suggest.phrase; package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.*;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode; import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWord;
@ -38,6 +28,12 @@ import org.apache.lucene.util.CharsRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.SuggestUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
//TODO public for tests //TODO public for tests
public final class DirectCandidateGenerator extends CandidateGenerator { public final class DirectCandidateGenerator extends CandidateGenerator {
@ -58,20 +54,19 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
private final int numCandidates; private final int numCandidates;
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null); this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
} }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter) throws IOException { public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
if (terms == null) {
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
}
this.spellchecker = spellchecker; this.spellchecker = spellchecker;
this.field = field; this.field = field;
this.numCandidates = numCandidates; this.numCandidates = numCandidates;
this.suggestMode = suggestMode; this.suggestMode = suggestMode;
this.reader = reader; this.reader = reader;
Terms terms = MultiFields.getTerms(reader, field);
if (terms == null) {
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
}
final long dictSize = terms.getSumTotalTermFreq(); final long dictSize = terms.getSumTotalTermFreq();
this.useTotalTermFrequency = dictSize != -1; this.useTotalTermFrequency = dictSize != -1;
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize; this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;

View File

@ -18,27 +18,28 @@
*/ */
package org.elasticsearch.search.suggest.phrase; package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import java.io.IOException;
//TODO public for tests //TODO public for tests
public final class LaplaceScorer extends WordScorer { public final class LaplaceScorer extends WordScorer {
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() { public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
@Override @Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException { public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, 0.5); return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, 0.5);
} }
}; };
private double alpha; private double alpha;
public LaplaceScorer(IndexReader reader, String field, public LaplaceScorer(IndexReader reader, Terms terms, String field,
double realWordLikelyhood, BytesRef separator, double alpha) throws IOException { double realWordLikelyhood, BytesRef separator, double alpha) throws IOException {
super(reader, field, realWordLikelyhood, separator); super(reader, terms, field, realWordLikelyhood, separator);
this.alpha = alpha; this.alpha = alpha;
} }

View File

@ -18,13 +18,14 @@
*/ */
package org.elasticsearch.search.suggest.phrase; package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import java.io.IOException;
//TODO public for tests //TODO public for tests
public final class LinearInterpoatingScorer extends WordScorer { public final class LinearInterpoatingScorer extends WordScorer {
@ -32,9 +33,9 @@ public final class LinearInterpoatingScorer extends WordScorer {
private final double bigramLambda; private final double bigramLambda;
private final double trigramLambda; private final double trigramLambda;
public LinearInterpoatingScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda) public LinearInterpoatingScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
throws IOException { throws IOException {
super(reader, field, realWordLikelyhood, separator); super(reader, terms, field, realWordLikelyhood, separator);
double sum = unigramLambda + bigramLambda + trigramLambda; double sum = unigramLambda + bigramLambda + trigramLambda;
this.unigramLambda = unigramLambda / sum; this.unigramLambda = unigramLambda / sum;
this.bigramLambda = bigramLambda / sum; this.bigramLambda = bigramLambda / sum;

View File

@ -18,10 +18,9 @@
*/ */
package org.elasticsearch.search.suggest.phrase; package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
@ -33,6 +32,8 @@ import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.SuggestionSearchContext; import org.elasticsearch.search.suggest.SuggestionSearchContext;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator; import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
import java.io.IOException;
public final class PhraseSuggestParser implements SuggestContextParser { public final class PhraseSuggestParser implements SuggestContextParser {
private PhraseSuggester suggester; private PhraseSuggester suggester;
@ -135,6 +136,10 @@ public final class PhraseSuggestParser implements SuggestContextParser {
throw new ElasticSearchIllegalArgumentException("The required field option is missing"); throw new ElasticSearchIllegalArgumentException("The required field option is missing");
} }
if (mapperService.smartNameFieldMapper(suggestion.getField()) == null) {
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + suggestion.getField() + "]");
}
if (suggestion.model() == null) { if (suggestion.model() == null) {
suggestion.setModel(StupidBackoffScorer.FACTORY); suggestion.setModel(StupidBackoffScorer.FACTORY);
} }
@ -209,9 +214,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
} }
suggestion.setModel(new WordScorer.WordScorerFactory() { suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override @Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException { throws IOException {
return new LinearInterpoatingScorer(reader, field, realWordLikelyhood, separator, lambdas[0], lambdas[1], return new LinearInterpoatingScorer(reader, terms, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
lambdas[2]); lambdas[2]);
} }
}); });
@ -230,9 +235,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
final double alpha = theAlpha; final double alpha = theAlpha;
suggestion.setModel(new WordScorer.WordScorerFactory() { suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override @Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException { throws IOException {
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, alpha); return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, alpha);
} }
}); });
@ -250,9 +255,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
final double discount = theDiscount; final double discount = theDiscount;
suggestion.setModel(new WordScorer.WordScorerFactory() { suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override @Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException { throws IOException {
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, discount); return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, discount);
} }
}); });
@ -281,6 +286,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator)) { if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator)) {
if ("field".equals(fieldName)) { if ("field".equals(fieldName)) {
generator.setField(parser.text()); generator.setField(parser.text());
if (mapperService.smartNameFieldMapper(generator.field()) == null) {
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + generator.field() + "]");
}
} else if ("size".equals(fieldName)) { } else if ("size".equals(fieldName)) {
generator.size(parser.intValue()); generator.size(parser.intValue());
} else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) { } else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {

View File

@ -21,6 +21,8 @@ package org.elasticsearch.search.suggest.phrase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
@ -30,9 +32,11 @@ import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.suggest.Suggest.Suggestion; import org.elasticsearch.search.suggest.Suggest.Suggestion;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry; import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option; import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
import org.elasticsearch.search.suggest.SuggestContextParser; import org.elasticsearch.search.suggest.*;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.Suggester; import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
@ -52,38 +56,47 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion, public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion,
IndexReader indexReader, CharsRef spare) throws IOException { IndexReader indexReader, CharsRef spare) throws IOException {
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood(); double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
CandidateGenerator[] gens = new CandidateGenerator[generators.size()];
for (int i = 0; i < gens.length; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter());
}
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator);
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens), suggestion.maxErrors(),
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare); UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length); Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
BytesRef byteSpare = new BytesRef();
for (Correction correction : corrections) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
Text phrase = new StringText(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
highlighted = new StringText(spare.toString());
}
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize()); final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
response.addTerm(resultEntry); response.addTerm(resultEntry);
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
final int numGenerators = generators.size();
final List<CandidateGenerator> gens = new ArrayList<CandidateGenerator>(generators.size());
for (int i = 0; i < numGenerators; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
Terms terms = MultiFields.getTerms(indexReader, generator.field());
if (terms != null) {
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(),
indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
}
}
final String suggestField = suggestion.getField();
final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
if (gens.size() > 0 && suggestTerms != null) {
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
BytesRef byteSpare = new BytesRef();
for (Correction correction : corrections) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
Text phrase = new StringText(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
highlighted = new StringText(spare.toString());
}
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
}
return response; return response;
} }

View File

@ -18,26 +18,27 @@
*/ */
package org.elasticsearch.search.suggest.phrase; package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import java.io.IOException;
public class StupidBackoffScorer extends WordScorer { public class StupidBackoffScorer extends WordScorer {
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() { public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
@Override @Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException { public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, 0.4f); return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, 0.4f);
} }
}; };
private final double discount; private final double discount;
public StupidBackoffScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double discount) public StupidBackoffScorer(IndexReader reader, Terms terms,String field, double realWordLikelyhood, BytesRef separator, double discount)
throws IOException { throws IOException {
super(reader, field, realWordLikelyhood, separator); super(reader, terms, field, realWordLikelyhood, separator);
this.discount = discount; this.discount = discount;
} }

View File

@ -18,8 +18,6 @@
*/ */
package org.elasticsearch.search.suggest.phrase; package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
@ -29,6 +27,8 @@ import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet; import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
import java.io.IOException;
//TODO public for tests //TODO public for tests
public abstract class WordScorer { public abstract class WordScorer {
protected final IndexReader reader; protected final IndexReader reader;
@ -43,11 +43,15 @@ public abstract class WordScorer {
private final boolean useTotalTermFreq; private final boolean useTotalTermFreq;
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException { public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator);
}
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
this.field = field; this.field = field;
this.terms = MultiFields.getTerms(reader, field);
if (terms == null) { if (terms == null) {
throw new ElasticSearchIllegalArgumentException("Field: [" + field + "] does not exist"); throw new ElasticSearchIllegalArgumentException("Field: [" + field + "] does not exist");
} }
this.terms = terms;
final long vocSize = terms.getSumTotalTermFreq(); final long vocSize = terms.getSumTotalTermFreq();
this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize; this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
this.useTotalTermFreq = vocSize != -1; this.useTotalTermFreq = vocSize != -1;
@ -95,7 +99,7 @@ public abstract class WordScorer {
} }
public static interface WordScorerFactory { public static interface WordScorerFactory {
public WordScorer newScorer(IndexReader reader, String field, public WordScorer newScorer(IndexReader reader, Terms terms,
double realWordLikelyhood, BytesRef separator) throws IOException; String field, double realWordLikelyhood, BytesRef separator) throws IOException;
} }
} }

View File

@ -40,8 +40,10 @@ import org.junit.Test;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
@ -50,6 +52,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.search.suggest.SuggestBuilder.phraseSuggestion; import static org.elasticsearch.search.suggest.SuggestBuilder.phraseSuggestion;
import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion; import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionSize; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionSize;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows;
import static org.hamcrest.Matchers.*; import static org.hamcrest.Matchers.*;
/** /**
@ -112,7 +115,6 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
@Test // see #2729 @Test // see #2729
public void testSizeOneShard() throws Exception { public void testSizeOneShard() throws Exception {
client().admin().indices().prepareDelete().execute().actionGet();
client().admin().indices().prepareCreate("test") client().admin().indices().prepareCreate("test")
.setSettings(settingsBuilder() .setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, 1) .put(SETTING_NUMBER_OF_SHARDS, 1)
@ -160,8 +162,70 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
} }
@Test @Test
public void testSimple() throws Exception { public void testUnmappedField() throws IOException, InterruptedException, ExecutionException {
int numShards = between(1,5);
Builder builder = ImmutableSettings.builder();
builder.put("index.number_of_shards", numShards).put("index.number_of_replicas", between(0, 2));
builder.put("index.analysis.analyzer.biword.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase");
builder.put("index.analysis.filter.shingler.type", "shingle");
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
builder.put("index.analysis.filter.shingler.max_shingle_size", 3);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("name")
.field("type", "multi_field")
.field("path", "just_name")
.startObject("fields")
.startObject("name")
.field("type", "string")
.endObject()
.startObject("name_shingled")
.field("type", "string")
.field("index_analyzer", "biword")
.field("search_analyzer", "standard")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject();
client().admin().indices().prepareDelete().execute().actionGet(); client().admin().indices().prepareDelete().execute().actionGet();
client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
client().admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
indexRandom("test", true,
client().prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like iced tea").endObject()),
client().prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like tea.").endObject()),
client().prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like ice cream.").endObject()));
Suggest searchSuggest = searchSuggest(client(),
"ice tea",
phraseSuggestion("did_you_mean").field("name_shingled")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3));
ElasticsearchAssertions.assertSuggestion(searchSuggest, 0, 0, "did_you_mean", "iced tea");
{
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
suggestBuilder.setSuggestText("tetsting sugestion");
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("nosuchField")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3));
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
}
{
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
suggestBuilder.setSuggestText("tetsting sugestion");
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("nosuchField")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3));
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
}
}
@Test
public void testSimple() throws Exception {
client().admin().indices().prepareCreate("test") client().admin().indices().prepareCreate("test")
.setSettings(settingsBuilder() .setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, 5) .put(SETTING_NUMBER_OF_SHARDS, 5)
@ -1033,6 +1097,7 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
builder.addSuggestion(suggestion); builder.addSuggestion(suggestion);
} }
SearchResponse actionGet = builder.execute().actionGet(); SearchResponse actionGet = builder.execute().actionGet();
assertThat(Arrays.toString(actionGet.getShardFailures()), actionGet.getFailedShards(), equalTo(expectShardsFailed));
return actionGet.getSuggest(); return actionGet.getSuggest();
} }
@ -1081,6 +1146,65 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
assertThat(suggest.getSuggestion("simple").getEntries().get(0).getOptions().size(), equalTo(3)); assertThat(suggest.getSuggestion("simple").getEntries().get(0).getOptions().size(), equalTo(3));
} }
@Test // see #3469
public void testShardFailures() throws IOException, InterruptedException {
Builder builder = ImmutableSettings.builder();
builder.put("index.number_of_shards", between(1,5)).put("index.number_of_replicas", between(0,3));
builder.put("index.analysis.analyzer.suggest.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler");
builder.put("index.analysis.filter.shingler.type", "shingle");
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
builder.put("index.analysis.filter.shingler.max_shingle_size", 5);
builder.put("index.analysis.filter.shingler.output_unigrams", true);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("name")
.field("type", "multi_field")
.field("path", "just_name")
.startObject("fields")
.startObject("name")
.field("type", "string")
.field("analyzer", "suggest")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject();
client().admin().indices().prepareDelete().execute().actionGet();
client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
client().admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
client().prepareIndex("test", "type2", "1")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type2", "2")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type2", "3")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type2", "4")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type2", "5")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type1", "1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "Just testing the suggestions api").endObject()).execute().actionGet();
client().prepareIndex("test", "type1", "2")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "An other title").endObject()).execute().actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
// When searching on a shard with a non existing mapping, we should fail
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
suggestBuilder.setSuggestText("tetsting sugestion");
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("fielddoesnotexist").maxErrors(5.0f));
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
// When searching on a shard which does not hold yet any document of an existing type, we should not fail
suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
suggestBuilder.setSuggestText("tetsting sugestion");
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("name").maxErrors(5.0f));
SearchResponse searchResponse = suggestBuilder.execute().actionGet();
ElasticsearchAssertions.assertNoFailures(searchResponse);
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
}
@Test // see #3469 @Test // see #3469
public void testEmptyShards() throws IOException, InterruptedException { public void testEmptyShards() throws IOException, InterruptedException {
Builder builder = ImmutableSettings.builder(); Builder builder = ImmutableSettings.builder();
@ -1127,5 +1251,4 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
ElasticsearchAssertions.assertNoFailures(searchResponse); ElasticsearchAssertions.assertNoFailures(searchResponse);
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions"); ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
} }
} }

View File

@ -17,16 +17,7 @@ package org.elasticsearch.test.unit.search.suggest.phrase;
* specific language governing permissions and limitations * specific language governing permissions and limitations
* under the License. * under the License.
*/ */
import static org.hamcrest.Matchers.equalTo; import com.google.common.base.Charsets;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
@ -44,24 +35,21 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode; import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.elasticsearch.search.suggest.phrase.CandidateGenerator; import org.elasticsearch.search.suggest.phrase.*;
import org.elasticsearch.search.suggest.phrase.Correction;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator;
import org.elasticsearch.search.suggest.phrase.LaplaceScorer;
import org.elasticsearch.search.suggest.phrase.LinearInterpoatingScorer;
import org.elasticsearch.search.suggest.phrase.MultiCandidateGeneratorWrapper;
import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker;
import org.elasticsearch.search.suggest.phrase.StupidBackoffScorer;
import org.elasticsearch.search.suggest.phrase.WordScorer;
import org.elasticsearch.test.integration.ElasticsearchTestCase; import org.elasticsearch.test.integration.ElasticsearchTestCase;
import org.junit.Test; import org.junit.Test;
import com.google.common.base.Charsets; import java.io.*;
import java.util.HashMap;
import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
private final BytesRef space = new BytesRef(" "); private final BytesRef space = new BytesRef(" ");
private final BytesRef preTag = new BytesRef("<em>"); private final BytesRef preTag = new BytesRef("<em>");
@ -106,7 +94,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
} }
DirectoryReader ir = DirectoryReader.open(writer, false); DirectoryReader ir = DirectoryReader.open(writer, false);
WordScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f); WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker();
@ -123,7 +111,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame"));
suggester = new NoisyChannelSpellChecker(0.85); suggester = new NoisyChannelSpellChecker(0.85);
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f); wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
assertThat(corrections.length, equalTo(4)); assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
@ -144,7 +132,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
// Test some of the highlighting corner cases // Test some of the highlighting corner cases
suggester = new NoisyChannelSpellChecker(0.85); suggester = new NoisyChannelSpellChecker(0.85);
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f); wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections.length, equalTo(4)); assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
@ -179,18 +167,18 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
spellchecker.setMinPrefix(1); spellchecker.setMinPrefix(1);
spellchecker.setMinQueryLength(1); spellchecker.setMinQueryLength(1);
suggester = new NoisyChannelSpellChecker(0.85); suggester = new NoisyChannelSpellChecker(0.85);
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f); wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer); generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter // Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer); generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>"));
@ -245,12 +233,12 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
} }
DirectoryReader ir = DirectoryReader.open(writer, false); DirectoryReader ir = DirectoryReader.open(writer, false);
LaplaceScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f); LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1); spellchecker.setMinQueryLength(1);
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10); DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper); DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse); CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2); Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
@ -329,7 +317,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
} }
DirectoryReader ir = DirectoryReader.open(writer, false); DirectoryReader ir = DirectoryReader.open(writer, false);
WordScorer wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1); WordScorer wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker();
@ -343,7 +331,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
assertThat(corrections.length, equalTo(0)); assertThat(corrections.length, equalTo(0));
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape")); // assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1); wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3);
assertThat(corrections.length, equalTo(4)); assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
@ -390,16 +378,16 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
spellchecker.setMinPrefix(1); spellchecker.setMinPrefix(1);
spellchecker.setMinQueryLength(1); spellchecker.setMinQueryLength(1);
suggester = new NoisyChannelSpellChecker(0.95); suggester = new NoisyChannelSpellChecker(0.95);
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1); wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer); generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4); wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.4);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3);
assertThat(corrections.length, equalTo(2)); assertThat(corrections.length, equalTo(2));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));