mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-31 12:28:51 +00:00
PhraseSuggester: Collate option should allow returning phrases with no matching docs
A new option `prune` has been added to allow users to control phrase suggestion pruning when `collate` is set. If the new option is set, the phrase suggestion option will contain a boolean `collate_match` indicating whether the respective result had hits in collation. CLoses #6927
This commit is contained in:
parent
0faffcf372
commit
f39d4e1f89
docs/reference/search/suggesters
src
@ -169,9 +169,14 @@ can contain misspellings (See parameter descriptions below).
|
||||
automatically made available as the `{{suggestion}}` variable, which
|
||||
should be used in your query/filter. You can still specify your own
|
||||
template `params` -- the `suggestion` value will be added to the
|
||||
variables you specify. You can also specify a `preference` to control
|
||||
variables you specify. You can specify a `preference` to control
|
||||
on which shards the query is executed (see <<search-request-preference>>).
|
||||
The default value is `_only_local`.
|
||||
The default value is `_only_local`. Additionally, you can specify
|
||||
a `prune` to control if all phrase suggestions will be
|
||||
returned, when set to `true` the suggestions will have an additional
|
||||
option `collate_match`, which will be `true` if matching documents
|
||||
for the phrase was found, `false` otherwise. The default value for
|
||||
`prune` is `false`.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
@ -195,6 +200,7 @@ curl -XPOST 'localhost:9200/_search' -d {
|
||||
},
|
||||
"params": {"field_name" : "title"}, <3>
|
||||
"preference": "_primary", <4>
|
||||
"prune": true <5>
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -207,6 +213,9 @@ curl -XPOST 'localhost:9200/_search' -d {
|
||||
<3> An additional `field_name` variable has been specified in
|
||||
`params` and is used by the `match` query.
|
||||
<4> The default `preference` has been changed to `_primary`.
|
||||
<5> All suggestions will be returned with an extra `collate_match`
|
||||
option indicating whether the generated phrase matched any
|
||||
document.
|
||||
|
||||
==== Smoothing Models
|
||||
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.search.suggest;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Streamable;
|
||||
@ -521,17 +522,24 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
||||
static final XContentBuilderString TEXT = new XContentBuilderString("text");
|
||||
static final XContentBuilderString HIGHLIGHTED = new XContentBuilderString("highlighted");
|
||||
static final XContentBuilderString SCORE = new XContentBuilderString("score");
|
||||
static final XContentBuilderString COLLATE_MATCH = new XContentBuilderString("collate_match");
|
||||
|
||||
}
|
||||
|
||||
private Text text;
|
||||
private Text highlighted;
|
||||
private float score;
|
||||
private Boolean collateMatch;
|
||||
|
||||
public Option(Text text, Text highlighted, float score) {
|
||||
public Option(Text text, Text highlighted, float score, Boolean collateMatch) {
|
||||
this.text = text;
|
||||
this.highlighted = highlighted;
|
||||
this.score = score;
|
||||
this.collateMatch = collateMatch;
|
||||
}
|
||||
|
||||
public Option(Text text, Text highlighted, float score) {
|
||||
this(text, highlighted, score, null);
|
||||
}
|
||||
|
||||
public Option(Text text, float score) {
|
||||
@ -562,6 +570,14 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
||||
public float getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if collation has found a match for the entry.
|
||||
* if collate was not set, the value defaults to <code>true</code>
|
||||
*/
|
||||
public boolean collateMatch() {
|
||||
return (collateMatch != null) ? collateMatch : true;
|
||||
}
|
||||
|
||||
protected void setScore(float score) {
|
||||
this.score = score;
|
||||
@ -572,6 +588,10 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
||||
text = in.readText();
|
||||
score = in.readFloat();
|
||||
highlighted = in.readOptionalText();
|
||||
|
||||
if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
|
||||
collateMatch = in.readOptionalBoolean();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -579,6 +599,10 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
||||
out.writeText(text);
|
||||
out.writeFloat(score);
|
||||
out.writeOptionalText(highlighted);
|
||||
|
||||
if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
|
||||
out.writeOptionalBoolean(collateMatch);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -595,6 +619,9 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
||||
builder.field(Fields.HIGHLIGHTED, highlighted);
|
||||
}
|
||||
builder.field(Fields.SCORE, score);
|
||||
if (collateMatch != null) {
|
||||
builder.field(Fields.COLLATE_MATCH, collateMatch.booleanValue());
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
|
@ -158,6 +158,12 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
||||
suggestion.setPreference(parser.text());
|
||||
} else if ("params".equals(fieldName)) {
|
||||
suggestion.setCollateScriptParams(parser.map());
|
||||
} else if ("prune".equals(fieldName)) {
|
||||
if (parser.isBooleanValue()) {
|
||||
suggestion.setCollatePrune(parser.booleanValue());
|
||||
} else {
|
||||
throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] prune must be either 'true' or 'false'");
|
||||
}
|
||||
} else {
|
||||
throw new ElasticsearchIllegalArgumentException(
|
||||
"suggester[phrase][collate] doesn't support field [" + fieldName + "]");
|
||||
|
@ -106,9 +106,12 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
||||
BytesRef byteSpare = new BytesRef();
|
||||
|
||||
MultiSearchResponse multiSearchResponse = collate(suggestion, checkerResult, byteSpare, spare);
|
||||
final boolean collateEnabled = multiSearchResponse != null;
|
||||
final boolean collatePrune = suggestion.collatePrune();
|
||||
|
||||
for (int i = 0; i < checkerResult.corrections.length; i++) {
|
||||
if (!hasMatchingDocs(multiSearchResponse, i)) {
|
||||
boolean collateMatch = hasMatchingDocs(multiSearchResponse, i);
|
||||
if (!collateMatch && !collatePrune) {
|
||||
continue;
|
||||
}
|
||||
Correction correction = checkerResult.corrections[i];
|
||||
@ -119,7 +122,11 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
||||
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
|
||||
highlighted = new StringText(spare.toString());
|
||||
}
|
||||
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
|
||||
if (collateEnabled && collatePrune) {
|
||||
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
|
||||
} else {
|
||||
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));
|
||||
|
@ -46,6 +46,7 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
|
||||
private String collateFilter;
|
||||
private String collatePreference;
|
||||
private Map<String, Object> collateParams;
|
||||
private Boolean collatePrune;
|
||||
|
||||
public PhraseSuggestionBuilder(String name) {
|
||||
super(name, "phrase");
|
||||
@ -202,6 +203,14 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to prune suggestions after collation
|
||||
*/
|
||||
public PhraseSuggestionBuilder collatePrune(boolean collatePrune) {
|
||||
this.collatePrune = collatePrune;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
if (realWordErrorLikelihood != null) {
|
||||
@ -260,6 +269,9 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
|
||||
if (collateParams != null) {
|
||||
builder.field("params", collateParams);
|
||||
}
|
||||
if (collatePrune != null) {
|
||||
builder.field("prune", collatePrune.booleanValue());
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
return builder;
|
||||
|
@ -52,6 +52,7 @@ class PhraseSuggestionContext extends SuggestionContext {
|
||||
private WordScorer.WordScorerFactory scorer;
|
||||
|
||||
private boolean requireUnigram = true;
|
||||
private boolean prune = false;
|
||||
|
||||
public PhraseSuggestionContext(Suggester<? extends PhraseSuggestionContext> suggester) {
|
||||
super(suggester);
|
||||
@ -221,4 +222,12 @@ class PhraseSuggestionContext extends SuggestionContext {
|
||||
this.collateScriptParams = collateScriptParams;
|
||||
}
|
||||
|
||||
void setCollatePrune(boolean prune) {
|
||||
this.prune = prune;
|
||||
}
|
||||
|
||||
boolean collatePrune() {
|
||||
return prune;
|
||||
}
|
||||
|
||||
}
|
@ -1096,7 +1096,7 @@ public class SuggestSearchTests extends ElasticsearchIntegrationTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void suggestPhrasesInIndex() throws InterruptedException, ExecutionException, IOException {
|
||||
public void testPhraseSuggesterCollate() throws InterruptedException, ExecutionException, IOException {
|
||||
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
|
||||
.put(indexSettings())
|
||||
.put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable.
|
||||
@ -1253,6 +1253,13 @@ public class SuggestSearchTests extends ElasticsearchIntegrationTest {
|
||||
} catch (ElasticsearchException e) {
|
||||
// expected
|
||||
}
|
||||
|
||||
// collate request with prune set to true
|
||||
PhraseSuggestionBuilder phraseSuggestWithParamsAndReturn = suggest.collateFilter(null).collateQuery(collateWithParams).collateParams(params).collatePrune(true);
|
||||
searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithParamsAndReturn);
|
||||
assertSuggestionSize(searchSuggest, 0, 10, "title");
|
||||
assertSuggestionPhraseCollateMatchExists(searchSuggest, "title", 2);
|
||||
|
||||
}
|
||||
|
||||
protected Suggest searchSuggest(SuggestionBuilder<?>... suggestion) {
|
||||
|
@ -314,6 +314,22 @@ public class ElasticsearchAssertions {
|
||||
assertVersionSerializable(searchSuggest);
|
||||
}
|
||||
|
||||
public static void assertSuggestionPhraseCollateMatchExists(Suggest searchSuggest, String key, int numberOfPhraseExists) {
|
||||
int counter = 0;
|
||||
assertThat(searchSuggest, notNullValue());
|
||||
String msg = "Suggest result: " + searchSuggest.toString();
|
||||
assertThat(msg, searchSuggest.size(), greaterThanOrEqualTo(1));
|
||||
assertThat(msg, searchSuggest.getSuggestion(key).getName(), equalTo(key));
|
||||
|
||||
for (Suggest.Suggestion.Entry.Option option : searchSuggest.getSuggestion(key).getEntries().get(0).getOptions()) {
|
||||
if (option.collateMatch()) {
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
assertThat(counter, equalTo(numberOfPhraseExists));
|
||||
}
|
||||
|
||||
public static void assertSuggestion(Suggest searchSuggest, int entry, int ord, String key, String text) {
|
||||
assertThat(searchSuggest, notNullValue());
|
||||
String msg = "Suggest result: " + searchSuggest.toString();
|
||||
|
Loading…
x
Reference in New Issue
Block a user