PhraseSuggester: Collate option should allow returning phrases with no matching docs

A new option `prune` has been added to allow users to control phrase suggestion pruning when `collate`
is set. If the new option is set, the phrase suggestion option will contain a boolean `collate_match`
indicating whether the respective result had hits in collation.

CLoses 
This commit is contained in:
Areek Zillur 2014-07-18 19:16:31 -04:00
parent 0faffcf372
commit f39d4e1f89
8 changed files with 99 additions and 6 deletions
docs/reference/search/suggesters
src

@ -169,9 +169,14 @@ can contain misspellings (See parameter descriptions below).
automatically made available as the `{{suggestion}}` variable, which
should be used in your query/filter. You can still specify your own
template `params` -- the `suggestion` value will be added to the
variables you specify. You can also specify a `preference` to control
variables you specify. You can specify a `preference` to control
on which shards the query is executed (see <<search-request-preference>>).
The default value is `_only_local`.
The default value is `_only_local`. Additionally, you can specify
a `prune` to control if all phrase suggestions will be
returned, when set to `true` the suggestions will have an additional
option `collate_match`, which will be `true` if matching documents
for the phrase was found, `false` otherwise. The default value for
`prune` is `false`.
[source,js]
--------------------------------------------------
@ -195,6 +200,7 @@ curl -XPOST 'localhost:9200/_search' -d {
},
"params": {"field_name" : "title"}, <3>
"preference": "_primary", <4>
"prune": true <5>
}
}
}
@ -207,6 +213,9 @@ curl -XPOST 'localhost:9200/_search' -d {
<3> An additional `field_name` variable has been specified in
`params` and is used by the `match` query.
<4> The default `preference` has been changed to `_primary`.
<5> All suggestions will be returned with an extra `collate_match`
option indicating whether the generated phrase matched any
document.
==== Smoothing Models

@ -21,6 +21,7 @@ package org.elasticsearch.search.suggest;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
@ -521,17 +522,24 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
static final XContentBuilderString TEXT = new XContentBuilderString("text");
static final XContentBuilderString HIGHLIGHTED = new XContentBuilderString("highlighted");
static final XContentBuilderString SCORE = new XContentBuilderString("score");
static final XContentBuilderString COLLATE_MATCH = new XContentBuilderString("collate_match");
}
private Text text;
private Text highlighted;
private float score;
private Boolean collateMatch;
public Option(Text text, Text highlighted, float score) {
public Option(Text text, Text highlighted, float score, Boolean collateMatch) {
this.text = text;
this.highlighted = highlighted;
this.score = score;
this.collateMatch = collateMatch;
}
public Option(Text text, Text highlighted, float score) {
this(text, highlighted, score, null);
}
public Option(Text text, float score) {
@ -562,6 +570,14 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
public float getScore() {
return score;
}
/**
* @return true if collation has found a match for the entry.
* if collate was not set, the value defaults to <code>true</code>
*/
public boolean collateMatch() {
return (collateMatch != null) ? collateMatch : true;
}
protected void setScore(float score) {
this.score = score;
@ -572,6 +588,10 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
text = in.readText();
score = in.readFloat();
highlighted = in.readOptionalText();
if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
collateMatch = in.readOptionalBoolean();
}
}
@Override
@ -579,6 +599,10 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
out.writeText(text);
out.writeFloat(score);
out.writeOptionalText(highlighted);
if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
out.writeOptionalBoolean(collateMatch);
}
}
@Override
@ -595,6 +619,9 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
builder.field(Fields.HIGHLIGHTED, highlighted);
}
builder.field(Fields.SCORE, score);
if (collateMatch != null) {
builder.field(Fields.COLLATE_MATCH, collateMatch.booleanValue());
}
return builder;
}

@ -158,6 +158,12 @@ public final class PhraseSuggestParser implements SuggestContextParser {
suggestion.setPreference(parser.text());
} else if ("params".equals(fieldName)) {
suggestion.setCollateScriptParams(parser.map());
} else if ("prune".equals(fieldName)) {
if (parser.isBooleanValue()) {
suggestion.setCollatePrune(parser.booleanValue());
} else {
throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] prune must be either 'true' or 'false'");
}
} else {
throw new ElasticsearchIllegalArgumentException(
"suggester[phrase][collate] doesn't support field [" + fieldName + "]");

@ -106,9 +106,12 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
BytesRef byteSpare = new BytesRef();
MultiSearchResponse multiSearchResponse = collate(suggestion, checkerResult, byteSpare, spare);
final boolean collateEnabled = multiSearchResponse != null;
final boolean collatePrune = suggestion.collatePrune();
for (int i = 0; i < checkerResult.corrections.length; i++) {
if (!hasMatchingDocs(multiSearchResponse, i)) {
boolean collateMatch = hasMatchingDocs(multiSearchResponse, i);
if (!collateMatch && !collatePrune) {
continue;
}
Correction correction = checkerResult.corrections[i];
@ -119,7 +122,11 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
highlighted = new StringText(spare.toString());
}
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
if (collateEnabled && collatePrune) {
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
} else {
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
}
} else {
response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));

@ -46,6 +46,7 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
private String collateFilter;
private String collatePreference;
private Map<String, Object> collateParams;
private Boolean collatePrune;
public PhraseSuggestionBuilder(String name) {
super(name, "phrase");
@ -202,6 +203,14 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
return this;
}
/**
* Sets whether to prune suggestions after collation
*/
public PhraseSuggestionBuilder collatePrune(boolean collatePrune) {
this.collatePrune = collatePrune;
return this;
}
@Override
public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
if (realWordErrorLikelihood != null) {
@ -260,6 +269,9 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
if (collateParams != null) {
builder.field("params", collateParams);
}
if (collatePrune != null) {
builder.field("prune", collatePrune.booleanValue());
}
builder.endObject();
}
return builder;

@ -52,6 +52,7 @@ class PhraseSuggestionContext extends SuggestionContext {
private WordScorer.WordScorerFactory scorer;
private boolean requireUnigram = true;
private boolean prune = false;
public PhraseSuggestionContext(Suggester<? extends PhraseSuggestionContext> suggester) {
super(suggester);
@ -221,4 +222,12 @@ class PhraseSuggestionContext extends SuggestionContext {
this.collateScriptParams = collateScriptParams;
}
void setCollatePrune(boolean prune) {
this.prune = prune;
}
boolean collatePrune() {
return prune;
}
}

@ -1096,7 +1096,7 @@ public class SuggestSearchTests extends ElasticsearchIntegrationTest {
}
@Test
public void suggestPhrasesInIndex() throws InterruptedException, ExecutionException, IOException {
public void testPhraseSuggesterCollate() throws InterruptedException, ExecutionException, IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
.put(indexSettings())
.put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable.
@ -1253,6 +1253,13 @@ public class SuggestSearchTests extends ElasticsearchIntegrationTest {
} catch (ElasticsearchException e) {
// expected
}
// collate request with prune set to true
PhraseSuggestionBuilder phraseSuggestWithParamsAndReturn = suggest.collateFilter(null).collateQuery(collateWithParams).collateParams(params).collatePrune(true);
searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithParamsAndReturn);
assertSuggestionSize(searchSuggest, 0, 10, "title");
assertSuggestionPhraseCollateMatchExists(searchSuggest, "title", 2);
}
protected Suggest searchSuggest(SuggestionBuilder<?>... suggestion) {

@ -314,6 +314,22 @@ public class ElasticsearchAssertions {
assertVersionSerializable(searchSuggest);
}
public static void assertSuggestionPhraseCollateMatchExists(Suggest searchSuggest, String key, int numberOfPhraseExists) {
int counter = 0;
assertThat(searchSuggest, notNullValue());
String msg = "Suggest result: " + searchSuggest.toString();
assertThat(msg, searchSuggest.size(), greaterThanOrEqualTo(1));
assertThat(msg, searchSuggest.getSuggestion(key).getName(), equalTo(key));
for (Suggest.Suggestion.Entry.Option option : searchSuggest.getSuggestion(key).getEntries().get(0).getOptions()) {
if (option.collateMatch()) {
counter++;
}
}
assertThat(counter, equalTo(numberOfPhraseExists));
}
public static void assertSuggestion(Suggest searchSuggest, int entry, int ord, String key, String text) {
assertThat(searchSuggest, notNullValue());
String msg = "Suggest result: " + searchSuggest.toString();