Prevent Phrase Suggester from failing on missing fields.

Unless the field is not mapped phrase suggester should return
empty results or skip candidate generation if a field in not in
the index rather than failing hard with an illegal argument exception.
Some shards might not have a value in a certain field.

Closes #3469
This commit is contained in:
Simon Willnauer 2013-08-16 13:12:24 +02:00
parent 5d91bb04b6
commit 57c0d29114
9 changed files with 248 additions and 114 deletions

View File

@ -18,18 +18,8 @@
*/
package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.*;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
@ -38,6 +28,12 @@ import org.apache.lucene.util.CharsRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.search.suggest.SuggestUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
//TODO public for tests
public final class DirectCandidateGenerator extends CandidateGenerator {
@ -58,20 +54,19 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
private final int numCandidates;
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null);
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter) throws IOException {
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
if (terms == null) {
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
}
this.spellchecker = spellchecker;
this.field = field;
this.numCandidates = numCandidates;
this.suggestMode = suggestMode;
this.reader = reader;
Terms terms = MultiFields.getTerms(reader, field);
if (terms == null) {
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
}
final long dictSize = terms.getSumTotalTermFreq();
this.useTotalTermFrequency = dictSize != -1;
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;

View File

@ -18,27 +18,28 @@
*/
package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import java.io.IOException;
//TODO public for tests
public final class LaplaceScorer extends WordScorer {
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, 0.5);
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, 0.5);
}
};
private double alpha;
public LaplaceScorer(IndexReader reader, String field,
public LaplaceScorer(IndexReader reader, Terms terms, String field,
double realWordLikelyhood, BytesRef separator, double alpha) throws IOException {
super(reader, field, realWordLikelyhood, separator);
super(reader, terms, field, realWordLikelyhood, separator);
this.alpha = alpha;
}

View File

@ -18,13 +18,14 @@
*/
package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import java.io.IOException;
//TODO public for tests
public final class LinearInterpoatingScorer extends WordScorer {
@ -32,9 +33,9 @@ public final class LinearInterpoatingScorer extends WordScorer {
private final double bigramLambda;
private final double trigramLambda;
public LinearInterpoatingScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
public LinearInterpoatingScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
throws IOException {
super(reader, field, realWordLikelyhood, separator);
super(reader, terms, field, realWordLikelyhood, separator);
double sum = unigramLambda + bigramLambda + trigramLambda;
this.unigramLambda = unigramLambda / sum;
this.bigramLambda = bigramLambda / sum;

View File

@ -18,10 +18,9 @@
*/
package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentParser;
@ -33,6 +32,8 @@ import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.SuggestionSearchContext;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
import java.io.IOException;
public final class PhraseSuggestParser implements SuggestContextParser {
private PhraseSuggester suggester;
@ -135,6 +136,10 @@ public final class PhraseSuggestParser implements SuggestContextParser {
throw new ElasticSearchIllegalArgumentException("The required field option is missing");
}
if (mapperService.smartNameFieldMapper(suggestion.getField()) == null) {
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + suggestion.getField() + "]");
}
if (suggestion.model() == null) {
suggestion.setModel(StupidBackoffScorer.FACTORY);
}
@ -209,9 +214,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
}
suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException {
return new LinearInterpoatingScorer(reader, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
return new LinearInterpoatingScorer(reader, terms, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
lambdas[2]);
}
});
@ -230,9 +235,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
final double alpha = theAlpha;
suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException {
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, alpha);
return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, alpha);
}
});
@ -250,9 +255,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
final double discount = theDiscount;
suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException {
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, discount);
return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, discount);
}
});
@ -281,6 +286,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator)) {
if ("field".equals(fieldName)) {
generator.setField(parser.text());
if (mapperService.smartNameFieldMapper(generator.field()) == null) {
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + generator.field() + "]");
}
} else if ("size".equals(fieldName)) {
generator.size(parser.intValue());
} else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {

View File

@ -21,6 +21,8 @@ package org.elasticsearch.search.suggest.phrase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
@ -30,9 +32,11 @@ import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.suggest.Suggest.Suggestion;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
import org.elasticsearch.search.suggest.SuggestContextParser;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.search.suggest.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.io.IOException;
import java.util.List;
@ -52,41 +56,50 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion,
IndexReader indexReader, CharsRef spare) throws IOException {
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
CandidateGenerator[] gens = new CandidateGenerator[generators.size()];
for (int i = 0; i < gens.length; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter());
}
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator);
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens), suggestion.maxErrors(),
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
BytesRef byteSpare = new BytesRef();
for (Correction correction : corrections) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
Text phrase = new StringText(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
highlighted = new StringText(spare.toString());
}
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
response.addTerm(resultEntry);
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
final int numGenerators = generators.size();
final List<CandidateGenerator> gens = new ArrayList<CandidateGenerator>(generators.size());
for (int i = 0; i < numGenerators; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
Terms terms = MultiFields.getTerms(indexReader, generator.field());
if (terms != null) {
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(),
indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
}
}
final String suggestField = suggestion.getField();
final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
if (gens.size() > 0 && suggestTerms != null) {
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
BytesRef byteSpare = new BytesRef();
for (Correction correction : corrections) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
Text phrase = new StringText(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
highlighted = new StringText(spare.toString());
}
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
}
return response;
}
@Override
public String[] names() {
return new String[] {"phrase"};

View File

@ -18,26 +18,27 @@
*/
package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import java.io.IOException;
public class StupidBackoffScorer extends WordScorer {
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, 0.4f);
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, 0.4f);
}
};
private final double discount;
public StupidBackoffScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double discount)
public StupidBackoffScorer(IndexReader reader, Terms terms,String field, double realWordLikelyhood, BytesRef separator, double discount)
throws IOException {
super(reader, field, realWordLikelyhood, separator);
super(reader, terms, field, realWordLikelyhood, separator);
this.discount = discount;
}

View File

@ -18,8 +18,6 @@
*/
package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
@ -29,6 +27,8 @@ import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
import java.io.IOException;
//TODO public for tests
public abstract class WordScorer {
protected final IndexReader reader;
@ -43,11 +43,15 @@ public abstract class WordScorer {
private final boolean useTotalTermFreq;
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator);
}
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
this.field = field;
this.terms = MultiFields.getTerms(reader, field);
if (terms == null) {
throw new ElasticSearchIllegalArgumentException("Field: [" + field + "] does not exist");
}
this.terms = terms;
final long vocSize = terms.getSumTotalTermFreq();
this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
this.useTotalTermFreq = vocSize != -1;
@ -95,7 +99,7 @@ public abstract class WordScorer {
}
public static interface WordScorerFactory {
public WordScorer newScorer(IndexReader reader, String field,
double realWordLikelyhood, BytesRef separator) throws IOException;
public WordScorer newScorer(IndexReader reader, Terms terms,
String field, double realWordLikelyhood, BytesRef separator) throws IOException;
}
}

View File

@ -40,8 +40,10 @@ import org.junit.Test;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
@ -50,6 +52,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.search.suggest.SuggestBuilder.phraseSuggestion;
import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionSize;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows;
import static org.hamcrest.Matchers.*;
/**
@ -112,7 +115,6 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
@Test // see #2729
public void testSizeOneShard() throws Exception {
client().admin().indices().prepareDelete().execute().actionGet();
client().admin().indices().prepareCreate("test")
.setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
@ -158,10 +160,72 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd"));
assertThat(suggest.getSuggestion("test").getEntries().get(0).getOptions().size(), equalTo(5));
}
@Test
public void testUnmappedField() throws IOException, InterruptedException, ExecutionException {
int numShards = between(1,5);
Builder builder = ImmutableSettings.builder();
builder.put("index.number_of_shards", numShards).put("index.number_of_replicas", between(0, 2));
builder.put("index.analysis.analyzer.biword.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase");
builder.put("index.analysis.filter.shingler.type", "shingle");
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
builder.put("index.analysis.filter.shingler.max_shingle_size", 3);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("name")
.field("type", "multi_field")
.field("path", "just_name")
.startObject("fields")
.startObject("name")
.field("type", "string")
.endObject()
.startObject("name_shingled")
.field("type", "string")
.field("index_analyzer", "biword")
.field("search_analyzer", "standard")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject();
client().admin().indices().prepareDelete().execute().actionGet();
client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
client().admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
indexRandom("test", true,
client().prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like iced tea").endObject()),
client().prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like tea.").endObject()),
client().prepareIndex("test", "type1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like ice cream.").endObject()));
Suggest searchSuggest = searchSuggest(client(),
"ice tea",
phraseSuggestion("did_you_mean").field("name_shingled")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3));
ElasticsearchAssertions.assertSuggestion(searchSuggest, 0, 0, "did_you_mean", "iced tea");
{
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
suggestBuilder.setSuggestText("tetsting sugestion");
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("nosuchField")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3));
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
}
{
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
suggestBuilder.setSuggestText("tetsting sugestion");
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("nosuchField")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3));
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
}
}
@Test
public void testSimple() throws Exception {
client().admin().indices().prepareDelete().execute().actionGet();
client().admin().indices().prepareCreate("test")
.setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, 5)
@ -1033,6 +1097,7 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
builder.addSuggestion(suggestion);
}
SearchResponse actionGet = builder.execute().actionGet();
assertThat(Arrays.toString(actionGet.getShardFailures()), actionGet.getFailedShards(), equalTo(expectShardsFailed));
return actionGet.getSuggest();
}
@ -1080,7 +1145,66 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
assertThat(suggest.getSuggestion("simple").getEntries().size(), equalTo(1));
assertThat(suggest.getSuggestion("simple").getEntries().get(0).getOptions().size(), equalTo(3));
}
@Test // see #3469
public void testShardFailures() throws IOException, InterruptedException {
Builder builder = ImmutableSettings.builder();
builder.put("index.number_of_shards", between(1,5)).put("index.number_of_replicas", between(0,3));
builder.put("index.analysis.analyzer.suggest.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler");
builder.put("index.analysis.filter.shingler.type", "shingle");
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
builder.put("index.analysis.filter.shingler.max_shingle_size", 5);
builder.put("index.analysis.filter.shingler.output_unigrams", true);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("name")
.field("type", "multi_field")
.field("path", "just_name")
.startObject("fields")
.startObject("name")
.field("type", "string")
.field("analyzer", "suggest")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject();
client().admin().indices().prepareDelete().execute().actionGet();
client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
client().admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
client().prepareIndex("test", "type2", "1")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type2", "2")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type2", "3")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type2", "4")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type2", "5")
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
client().prepareIndex("test", "type1", "1")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "Just testing the suggestions api").endObject()).execute().actionGet();
client().prepareIndex("test", "type1", "2")
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "An other title").endObject()).execute().actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
// When searching on a shard with a non existing mapping, we should fail
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
suggestBuilder.setSuggestText("tetsting sugestion");
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("fielddoesnotexist").maxErrors(5.0f));
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
// When searching on a shard which does not hold yet any document of an existing type, we should not fail
suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
suggestBuilder.setSuggestText("tetsting sugestion");
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("name").maxErrors(5.0f));
SearchResponse searchResponse = suggestBuilder.execute().actionGet();
ElasticsearchAssertions.assertNoFailures(searchResponse);
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
}
@Test // see #3469
public void testEmptyShards() throws IOException, InterruptedException {
Builder builder = ImmutableSettings.builder();
@ -1127,5 +1251,4 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
ElasticsearchAssertions.assertNoFailures(searchResponse);
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
}
}

View File

@ -17,16 +17,7 @@ package org.elasticsearch.test.unit.search.suggest.phrase;
* specific language governing permissions and limitations
* under the License.
*/
import static org.hamcrest.Matchers.equalTo;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import com.google.common.base.Charsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
@ -44,24 +35,21 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.elasticsearch.search.suggest.phrase.CandidateGenerator;
import org.elasticsearch.search.suggest.phrase.Correction;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator;
import org.elasticsearch.search.suggest.phrase.LaplaceScorer;
import org.elasticsearch.search.suggest.phrase.LinearInterpoatingScorer;
import org.elasticsearch.search.suggest.phrase.MultiCandidateGeneratorWrapper;
import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker;
import org.elasticsearch.search.suggest.phrase.StupidBackoffScorer;
import org.elasticsearch.search.suggest.phrase.WordScorer;
import org.elasticsearch.search.suggest.phrase.*;
import org.elasticsearch.test.integration.ElasticsearchTestCase;
import org.junit.Test;
import com.google.common.base.Charsets;
import java.io.*;
import java.util.HashMap;
import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
private final BytesRef space = new BytesRef(" ");
private final BytesRef preTag = new BytesRef("<em>");
@ -106,7 +94,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
}
DirectoryReader ir = DirectoryReader.open(writer, false);
WordScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker();
@ -123,7 +111,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame"));
suggester = new NoisyChannelSpellChecker(0.85);
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
@ -144,7 +132,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
// Test some of the highlighting corner cases
suggester = new NoisyChannelSpellChecker(0.85);
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
@ -179,18 +167,18 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
spellchecker.setMinPrefix(1);
spellchecker.setMinQueryLength(1);
suggester = new NoisyChannelSpellChecker(0.85);
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>"));
@ -245,12 +233,12 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
}
DirectoryReader ir = DirectoryReader.open(writer, false);
LaplaceScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1);
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper);
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
@ -329,7 +317,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
}
DirectoryReader ir = DirectoryReader.open(writer, false);
WordScorer wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
WordScorer wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker();
@ -343,7 +331,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
assertThat(corrections.length, equalTo(0));
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3);
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
@ -390,16 +378,16 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
spellchecker.setMinPrefix(1);
spellchecker.setMinQueryLength(1);
suggester = new NoisyChannelSpellChecker(0.95);
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer);
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4);
wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.4);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3);
assertThat(corrections.length, equalTo(2));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));