Prevent Phrase Suggester from failing on missing fields.
Unless the field is not mapped phrase suggester should return empty results or skip candidate generation if a field in not in the index rather than failing hard with an illegal argument exception. Some shards might not have a value in a certain field. Closes #3469
This commit is contained in:
parent
5d91bb04b6
commit
57c0d29114
|
@ -18,18 +18,8 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
|
@ -38,6 +28,12 @@ import org.apache.lucene.util.CharsRef;
|
|||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
//TODO public for tests
|
||||
public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
|
||||
|
@ -58,20 +54,19 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
private final int numCandidates;
|
||||
|
||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
|
||||
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null);
|
||||
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
|
||||
}
|
||||
|
||||
|
||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter) throws IOException {
|
||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
|
||||
if (terms == null) {
|
||||
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
|
||||
}
|
||||
this.spellchecker = spellchecker;
|
||||
this.field = field;
|
||||
this.numCandidates = numCandidates;
|
||||
this.suggestMode = suggestMode;
|
||||
this.reader = reader;
|
||||
Terms terms = MultiFields.getTerms(reader, field);
|
||||
if (terms == null) {
|
||||
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
|
||||
}
|
||||
final long dictSize = terms.getSumTotalTermFreq();
|
||||
this.useTotalTermFrequency = dictSize != -1;
|
||||
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
|
||||
|
|
|
@ -18,27 +18,28 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
import java.io.IOException;
|
||||
//TODO public for tests
|
||||
public final class LaplaceScorer extends WordScorer {
|
||||
|
||||
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
|
||||
@Override
|
||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
|
||||
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, 0.5);
|
||||
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
|
||||
return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, 0.5);
|
||||
}
|
||||
};
|
||||
|
||||
private double alpha;
|
||||
|
||||
public LaplaceScorer(IndexReader reader, String field,
|
||||
public LaplaceScorer(IndexReader reader, Terms terms, String field,
|
||||
double realWordLikelyhood, BytesRef separator, double alpha) throws IOException {
|
||||
super(reader, field, realWordLikelyhood, separator);
|
||||
super(reader, terms, field, realWordLikelyhood, separator);
|
||||
this.alpha = alpha;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,13 +18,14 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
//TODO public for tests
|
||||
public final class LinearInterpoatingScorer extends WordScorer {
|
||||
|
||||
|
@ -32,9 +33,9 @@ public final class LinearInterpoatingScorer extends WordScorer {
|
|||
private final double bigramLambda;
|
||||
private final double trigramLambda;
|
||||
|
||||
public LinearInterpoatingScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
|
||||
public LinearInterpoatingScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
|
||||
throws IOException {
|
||||
super(reader, field, realWordLikelyhood, separator);
|
||||
super(reader, terms, field, realWordLikelyhood, separator);
|
||||
double sum = unigramLambda + bigramLambda + trigramLambda;
|
||||
this.unigramLambda = unigramLambda / sum;
|
||||
this.bigramLambda = bigramLambda / sum;
|
||||
|
|
|
@ -18,10 +18,9 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
@ -33,6 +32,8 @@ import org.elasticsearch.search.suggest.SuggestUtils;
|
|||
import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public final class PhraseSuggestParser implements SuggestContextParser {
|
||||
|
||||
private PhraseSuggester suggester;
|
||||
|
@ -135,6 +136,10 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
|||
throw new ElasticSearchIllegalArgumentException("The required field option is missing");
|
||||
}
|
||||
|
||||
if (mapperService.smartNameFieldMapper(suggestion.getField()) == null) {
|
||||
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + suggestion.getField() + "]");
|
||||
}
|
||||
|
||||
if (suggestion.model() == null) {
|
||||
suggestion.setModel(StupidBackoffScorer.FACTORY);
|
||||
}
|
||||
|
@ -209,9 +214,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
|||
}
|
||||
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
||||
@Override
|
||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
|
||||
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
|
||||
throws IOException {
|
||||
return new LinearInterpoatingScorer(reader, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
|
||||
return new LinearInterpoatingScorer(reader, terms, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
|
||||
lambdas[2]);
|
||||
}
|
||||
});
|
||||
|
@ -230,9 +235,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
|||
final double alpha = theAlpha;
|
||||
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
||||
@Override
|
||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
|
||||
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
|
||||
throws IOException {
|
||||
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, alpha);
|
||||
return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, alpha);
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -250,9 +255,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
|||
final double discount = theDiscount;
|
||||
suggestion.setModel(new WordScorer.WordScorerFactory() {
|
||||
@Override
|
||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
|
||||
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
|
||||
throws IOException {
|
||||
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, discount);
|
||||
return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, discount);
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -281,6 +286,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
|||
if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator)) {
|
||||
if ("field".equals(fieldName)) {
|
||||
generator.setField(parser.text());
|
||||
if (mapperService.smartNameFieldMapper(generator.field()) == null) {
|
||||
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + generator.field() + "]");
|
||||
}
|
||||
} else if ("size".equals(fieldName)) {
|
||||
generator.size(parser.intValue());
|
||||
} else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {
|
||||
|
|
|
@ -21,6 +21,8 @@ package org.elasticsearch.search.suggest.phrase;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
@ -30,9 +32,11 @@ import org.elasticsearch.common.text.Text;
|
|||
import org.elasticsearch.search.suggest.Suggest.Suggestion;
|
||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
|
||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
|
||||
import org.elasticsearch.search.suggest.SuggestContextParser;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.Suggester;
|
||||
import org.elasticsearch.search.suggest.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
@ -52,25 +56,35 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
|||
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion,
|
||||
IndexReader indexReader, CharsRef spare) throws IOException {
|
||||
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
|
||||
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
|
||||
Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
|
||||
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
|
||||
response.addTerm(resultEntry);
|
||||
|
||||
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
|
||||
CandidateGenerator[] gens = new CandidateGenerator[generators.size()];
|
||||
for (int i = 0; i < gens.length; i++) {
|
||||
final int numGenerators = generators.size();
|
||||
final List<CandidateGenerator> gens = new ArrayList<CandidateGenerator>(generators.size());
|
||||
for (int i = 0; i < numGenerators; i++) {
|
||||
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
|
||||
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
|
||||
gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter());
|
||||
Terms terms = MultiFields.getTerms(indexReader, generator.field());
|
||||
if (terms != null) {
|
||||
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(),
|
||||
indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
final String suggestField = suggestion.getField();
|
||||
final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
|
||||
if (gens.size() > 0 && suggestTerms != null) {
|
||||
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
|
||||
final BytesRef separator = suggestion.separator();
|
||||
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
|
||||
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator);
|
||||
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens), suggestion.maxErrors(),
|
||||
|
||||
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
|
||||
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
|
||||
gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
|
||||
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
|
||||
|
||||
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
|
||||
|
||||
Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
|
||||
BytesRef byteSpare = new BytesRef();
|
||||
for (Correction correction : corrections) {
|
||||
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
|
||||
|
@ -82,8 +96,7 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
|||
}
|
||||
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
|
||||
}
|
||||
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
|
||||
response.addTerm(resultEntry);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,26 +18,27 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class StupidBackoffScorer extends WordScorer {
|
||||
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
|
||||
@Override
|
||||
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
|
||||
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, 0.4f);
|
||||
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
|
||||
return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, 0.4f);
|
||||
}
|
||||
};
|
||||
|
||||
private final double discount;
|
||||
|
||||
public StupidBackoffScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double discount)
|
||||
public StupidBackoffScorer(IndexReader reader, Terms terms,String field, double realWordLikelyhood, BytesRef separator, double discount)
|
||||
throws IOException {
|
||||
super(reader, field, realWordLikelyhood, separator);
|
||||
super(reader, terms, field, realWordLikelyhood, separator);
|
||||
this.discount = discount;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,8 +18,6 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -29,6 +27,8 @@ import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
|||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
//TODO public for tests
|
||||
public abstract class WordScorer {
|
||||
protected final IndexReader reader;
|
||||
|
@ -43,11 +43,15 @@ public abstract class WordScorer {
|
|||
private final boolean useTotalTermFreq;
|
||||
|
||||
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
||||
this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator);
|
||||
}
|
||||
|
||||
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
||||
this.field = field;
|
||||
this.terms = MultiFields.getTerms(reader, field);
|
||||
if (terms == null) {
|
||||
throw new ElasticSearchIllegalArgumentException("Field: [" + field + "] does not exist");
|
||||
}
|
||||
this.terms = terms;
|
||||
final long vocSize = terms.getSumTotalTermFreq();
|
||||
this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
|
||||
this.useTotalTermFreq = vocSize != -1;
|
||||
|
@ -95,7 +99,7 @@ public abstract class WordScorer {
|
|||
}
|
||||
|
||||
public static interface WordScorerFactory {
|
||||
public WordScorer newScorer(IndexReader reader, String field,
|
||||
double realWordLikelyhood, BytesRef separator) throws IOException;
|
||||
public WordScorer newScorer(IndexReader reader, Terms terms,
|
||||
String field, double realWordLikelyhood, BytesRef separator) throws IOException;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,8 +40,10 @@ import org.junit.Test;
|
|||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||
|
@ -50,6 +52,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
|
|||
import static org.elasticsearch.search.suggest.SuggestBuilder.phraseSuggestion;
|
||||
import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionSize;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
|
@ -112,7 +115,6 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
|||
|
||||
@Test // see #2729
|
||||
public void testSizeOneShard() throws Exception {
|
||||
client().admin().indices().prepareDelete().execute().actionGet();
|
||||
client().admin().indices().prepareCreate("test")
|
||||
.setSettings(settingsBuilder()
|
||||
.put(SETTING_NUMBER_OF_SHARDS, 1)
|
||||
|
@ -160,8 +162,70 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testSimple() throws Exception {
|
||||
public void testUnmappedField() throws IOException, InterruptedException, ExecutionException {
|
||||
int numShards = between(1,5);
|
||||
Builder builder = ImmutableSettings.builder();
|
||||
builder.put("index.number_of_shards", numShards).put("index.number_of_replicas", between(0, 2));
|
||||
builder.put("index.analysis.analyzer.biword.tokenizer", "standard");
|
||||
builder.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase");
|
||||
builder.put("index.analysis.filter.shingler.type", "shingle");
|
||||
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
|
||||
builder.put("index.analysis.filter.shingler.max_shingle_size", 3);
|
||||
|
||||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||
.startObject("properties")
|
||||
.startObject("name")
|
||||
.field("type", "multi_field")
|
||||
.field("path", "just_name")
|
||||
.startObject("fields")
|
||||
.startObject("name")
|
||||
.field("type", "string")
|
||||
.endObject()
|
||||
.startObject("name_shingled")
|
||||
.field("type", "string")
|
||||
.field("index_analyzer", "biword")
|
||||
.field("search_analyzer", "standard")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
client().admin().indices().prepareDelete().execute().actionGet();
|
||||
client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
|
||||
client().admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
|
||||
indexRandom("test", true,
|
||||
client().prepareIndex("test", "type1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like iced tea").endObject()),
|
||||
client().prepareIndex("test", "type1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like tea.").endObject()),
|
||||
client().prepareIndex("test", "type1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "I like ice cream.").endObject()));
|
||||
Suggest searchSuggest = searchSuggest(client(),
|
||||
"ice tea",
|
||||
phraseSuggestion("did_you_mean").field("name_shingled")
|
||||
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
|
||||
.gramSize(3));
|
||||
ElasticsearchAssertions.assertSuggestion(searchSuggest, 0, 0, "did_you_mean", "iced tea");
|
||||
{
|
||||
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
|
||||
suggestBuilder.setSuggestText("tetsting sugestion");
|
||||
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("nosuchField")
|
||||
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
|
||||
.gramSize(3));
|
||||
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
|
||||
}
|
||||
{
|
||||
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
|
||||
suggestBuilder.setSuggestText("tetsting sugestion");
|
||||
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("nosuchField")
|
||||
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
|
||||
.gramSize(3));
|
||||
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimple() throws Exception {
|
||||
client().admin().indices().prepareCreate("test")
|
||||
.setSettings(settingsBuilder()
|
||||
.put(SETTING_NUMBER_OF_SHARDS, 5)
|
||||
|
@ -1033,6 +1097,7 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
|||
builder.addSuggestion(suggestion);
|
||||
}
|
||||
SearchResponse actionGet = builder.execute().actionGet();
|
||||
assertThat(Arrays.toString(actionGet.getShardFailures()), actionGet.getFailedShards(), equalTo(expectShardsFailed));
|
||||
return actionGet.getSuggest();
|
||||
}
|
||||
|
||||
|
@ -1081,6 +1146,65 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
|||
assertThat(suggest.getSuggestion("simple").getEntries().get(0).getOptions().size(), equalTo(3));
|
||||
}
|
||||
|
||||
@Test // see #3469
|
||||
public void testShardFailures() throws IOException, InterruptedException {
|
||||
Builder builder = ImmutableSettings.builder();
|
||||
builder.put("index.number_of_shards", between(1,5)).put("index.number_of_replicas", between(0,3));
|
||||
builder.put("index.analysis.analyzer.suggest.tokenizer", "standard");
|
||||
builder.putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler");
|
||||
builder.put("index.analysis.filter.shingler.type", "shingle");
|
||||
builder.put("index.analysis.filter.shingler.min_shingle_size", 2);
|
||||
builder.put("index.analysis.filter.shingler.max_shingle_size", 5);
|
||||
builder.put("index.analysis.filter.shingler.output_unigrams", true);
|
||||
|
||||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||
.startObject("properties")
|
||||
.startObject("name")
|
||||
.field("type", "multi_field")
|
||||
.field("path", "just_name")
|
||||
.startObject("fields")
|
||||
.startObject("name")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "suggest")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
client().admin().indices().prepareDelete().execute().actionGet();
|
||||
client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
|
||||
client().admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
|
||||
client().prepareIndex("test", "type2", "1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||
client().prepareIndex("test", "type2", "2")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||
client().prepareIndex("test", "type2", "3")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||
client().prepareIndex("test", "type2", "4")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||
client().prepareIndex("test", "type2", "5")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("foo", "bar").endObject()).execute().actionGet();
|
||||
client().prepareIndex("test", "type1", "1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "Just testing the suggestions api").endObject()).execute().actionGet();
|
||||
client().prepareIndex("test", "type1", "2")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("name", "An other title").endObject()).execute().actionGet();
|
||||
client().admin().indices().prepareRefresh().execute().actionGet();
|
||||
|
||||
// When searching on a shard with a non existing mapping, we should fail
|
||||
SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
|
||||
suggestBuilder.setSuggestText("tetsting sugestion");
|
||||
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("fielddoesnotexist").maxErrors(5.0f));
|
||||
assertThrows(suggestBuilder, SearchPhaseExecutionException.class);
|
||||
// When searching on a shard which does not hold yet any document of an existing type, we should not fail
|
||||
suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
|
||||
suggestBuilder.setSuggestText("tetsting sugestion");
|
||||
suggestBuilder.addSuggestion(phraseSuggestion("did_you_mean").field("name").maxErrors(5.0f));
|
||||
SearchResponse searchResponse = suggestBuilder.execute().actionGet();
|
||||
ElasticsearchAssertions.assertNoFailures(searchResponse);
|
||||
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
|
||||
|
||||
}
|
||||
|
||||
@Test // see #3469
|
||||
public void testEmptyShards() throws IOException, InterruptedException {
|
||||
Builder builder = ImmutableSettings.builder();
|
||||
|
@ -1127,5 +1251,4 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
|||
ElasticsearchAssertions.assertNoFailures(searchResponse);
|
||||
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,16 +17,7 @@ package org.elasticsearch.test.unit.search.suggest.phrase;
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.base.Charsets;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -44,24 +35,21 @@ import org.apache.lucene.document.TextField;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.search.suggest.phrase.CandidateGenerator;
|
||||
import org.elasticsearch.search.suggest.phrase.Correction;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator;
|
||||
import org.elasticsearch.search.suggest.phrase.LaplaceScorer;
|
||||
import org.elasticsearch.search.suggest.phrase.LinearInterpoatingScorer;
|
||||
import org.elasticsearch.search.suggest.phrase.MultiCandidateGeneratorWrapper;
|
||||
import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker;
|
||||
import org.elasticsearch.search.suggest.phrase.StupidBackoffScorer;
|
||||
import org.elasticsearch.search.suggest.phrase.WordScorer;
|
||||
import org.elasticsearch.search.suggest.phrase.*;
|
||||
import org.elasticsearch.test.integration.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.base.Charsets;
|
||||
import java.io.*;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||
private final BytesRef space = new BytesRef(" ");
|
||||
private final BytesRef preTag = new BytesRef("<em>");
|
||||
|
@ -106,7 +94,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
}
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(writer, false);
|
||||
WordScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
|
||||
WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
|
||||
|
||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||
|
@ -123,7 +111,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame"));
|
||||
|
||||
suggester = new NoisyChannelSpellChecker(0.85);
|
||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
|
||||
assertThat(corrections.length, equalTo(4));
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
|
@ -144,7 +132,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
|
||||
// Test some of the highlighting corner cases
|
||||
suggester = new NoisyChannelSpellChecker(0.85);
|
||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections.length, equalTo(4));
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
|
@ -179,18 +167,18 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
spellchecker.setMinPrefix(1);
|
||||
spellchecker.setMinQueryLength(1);
|
||||
suggester = new NoisyChannelSpellChecker(0.85);
|
||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
||||
|
||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
|
||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
||||
|
||||
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
|
||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
|
||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>"));
|
||||
|
@ -245,12 +233,12 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
}
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(writer, false);
|
||||
LaplaceScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
|
||||
LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
|
||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||
spellchecker.setMinQueryLength(1);
|
||||
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
|
||||
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper);
|
||||
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
|
||||
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
|
||||
|
||||
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
|
||||
|
@ -329,7 +317,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
}
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(writer, false);
|
||||
WordScorer wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||
WordScorer wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||
|
||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||
|
@ -343,7 +331,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
assertThat(corrections.length, equalTo(0));
|
||||
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
|
||||
|
||||
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||
wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3);
|
||||
assertThat(corrections.length, equalTo(4));
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
|
@ -390,16 +378,16 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
spellchecker.setMinPrefix(1);
|
||||
spellchecker.setMinQueryLength(1);
|
||||
suggester = new NoisyChannelSpellChecker(0.95);
|
||||
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||
wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||
|
||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer);
|
||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||
|
||||
|
||||
wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4);
|
||||
wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.4);
|
||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3);
|
||||
assertThat(corrections.length, equalTo(2));
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
|
|
Loading…
Reference in New Issue