mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 02:14:54 +00:00
More Like This: renamed ignore_like
to unlike
This changes the parameter name `ignore_like` to the more user friendly name `unlike`. This later feature generates a query from the terms in `A` but not from the terms in `B`. This translates to a result set which is like `A` but unlike `B`. We could have further negatively boosted any documents that have some `B`, but these documents already do not receive any contribution from having `B`, and would therefore negatively compete with documents having `A`. Closes #11117
This commit is contained in:
parent
90f9b5f60d
commit
3f6dae1a73
@ -51,8 +51,8 @@ public class MoreLikeThisQuery extends Query {
|
||||
|
||||
private String[] likeText;
|
||||
private Fields[] likeFields;
|
||||
private String[] ignoreText;
|
||||
private Fields[] ignoreFields;
|
||||
private String[] unlikeText;
|
||||
private Fields[] unlikeFields;
|
||||
private String[] moreLikeFields;
|
||||
private Analyzer analyzer;
|
||||
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
|
||||
@ -155,10 +155,14 @@ public class MoreLikeThisQuery extends Query {
|
||||
mlt.setBoost(boostTerms);
|
||||
mlt.setBoostFactor(boostTermsFactor);
|
||||
|
||||
if (this.ignoreText != null || this.ignoreFields != null) {
|
||||
handleSkipTerms(mlt, this.ignoreText, this.ignoreFields);
|
||||
if (this.unlikeText != null || this.unlikeFields != null) {
|
||||
handleUnlike(mlt, this.unlikeText, this.unlikeFields);
|
||||
}
|
||||
|
||||
return createQuery(mlt);
|
||||
}
|
||||
|
||||
private Query createQuery(XMoreLikeThis mlt) throws IOException {
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
if (this.likeFields != null) {
|
||||
Query mltQuery = mlt.like(this.likeFields);
|
||||
@ -177,14 +181,14 @@ public class MoreLikeThisQuery extends Query {
|
||||
}
|
||||
|
||||
bq.setBoost(getBoost());
|
||||
return bq;
|
||||
return bq;
|
||||
}
|
||||
|
||||
private void handleSkipTerms(XMoreLikeThis mlt, String[] ignoreText, Fields[] ignoreFields) throws IOException {
|
||||
private void handleUnlike(XMoreLikeThis mlt, String[] unlikeText, Fields[] unlikeFields) throws IOException {
|
||||
Set<Term> skipTerms = new HashSet<>();
|
||||
// handle like text
|
||||
if (ignoreText != null) {
|
||||
for (String text : ignoreText) {
|
||||
if (unlikeText != null) {
|
||||
for (String text : unlikeText) {
|
||||
// only use the first field to be consistent
|
||||
String fieldName = moreLikeFields[0];
|
||||
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
|
||||
@ -198,8 +202,8 @@ public class MoreLikeThisQuery extends Query {
|
||||
}
|
||||
}
|
||||
// handle like fields
|
||||
if (ignoreFields != null) {
|
||||
for (Fields fields : ignoreFields) {
|
||||
if (unlikeFields != null) {
|
||||
for (Fields fields : unlikeFields) {
|
||||
for (String fieldName : fields) {
|
||||
Terms terms = fields.terms(fieldName);
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
@ -248,12 +252,12 @@ public class MoreLikeThisQuery extends Query {
|
||||
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
|
||||
}
|
||||
|
||||
public void setIgnoreText(Fields... ignoreFields) {
|
||||
this.ignoreFields = ignoreFields;
|
||||
public void setUnlikeText(Fields... ignoreFields) {
|
||||
this.unlikeFields = ignoreFields;
|
||||
}
|
||||
|
||||
public void setIgnoreText(List<String> ignoreText) {
|
||||
this.ignoreText = ignoreText.toArray(Strings.EMPTY_ARRAY);
|
||||
this.unlikeText = ignoreText.toArray(Strings.EMPTY_ARRAY);
|
||||
}
|
||||
|
||||
public String[] getMoreLikeFields() {
|
||||
|
@ -130,7 +130,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
||||
|
||||
private final String[] fields;
|
||||
private List<Item> docs = new ArrayList<>();
|
||||
private List<Item> ignoreDocs = new ArrayList<>();
|
||||
private List<Item> unlikeDocs = new ArrayList<>();
|
||||
private Boolean include = null;
|
||||
private String minimumShouldMatch = null;
|
||||
private int minTermFreq = -1;
|
||||
@ -189,7 +189,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
||||
* Sets the documents from which the terms should not be selected from.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
|
||||
this.ignoreDocs = Arrays.asList(docs);
|
||||
this.unlikeDocs = Arrays.asList(docs);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -197,9 +197,9 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
||||
* Sets the text from which the terms should not be selected from.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
|
||||
this.ignoreDocs = new ArrayList<>();
|
||||
this.unlikeDocs = new ArrayList<>();
|
||||
for (String text : likeText) {
|
||||
this.ignoreDocs.add(new Item(text));
|
||||
this.unlikeDocs.add(new Item(text));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
@ -378,8 +378,8 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
||||
} else {
|
||||
builder.field(likeFieldName, docs);
|
||||
}
|
||||
if (!ignoreDocs.isEmpty()) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(), ignoreDocs);
|
||||
if (!unlikeDocs.isEmpty()) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.UNLIKE.getPreferredName(), unlikeDocs);
|
||||
}
|
||||
if (minimumShouldMatch != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
||||
|
@ -73,7 +73,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||
public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like");
|
||||
public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like");
|
||||
public static final ParseField LIKE = new ParseField("like");
|
||||
public static final ParseField IGNORE_LIKE = new ParseField("ignore_like");
|
||||
public static final ParseField UNLIKE = new ParseField("unlike");
|
||||
public static final ParseField INCLUDE = new ParseField("include");
|
||||
}
|
||||
|
||||
@ -109,8 +109,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||
List<String> likeTexts = new ArrayList<>();
|
||||
MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest();
|
||||
|
||||
List<String> ignoreTexts = new ArrayList<>();
|
||||
MultiTermVectorsRequest ignoreItems = new MultiTermVectorsRequest();
|
||||
List<String> unlikeTexts = new ArrayList<>();
|
||||
MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest();
|
||||
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
@ -120,8 +120,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||
likeTexts.add(parser.text());
|
||||
} else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
parseLikeField(parser, likeTexts, likeItems);
|
||||
} else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
parseLikeField(parser, ignoreTexts, ignoreItems);
|
||||
} else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||
} else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) {
|
||||
mltQuery.setMinTermFrequency(parser.intValue());
|
||||
} else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) {
|
||||
@ -187,9 +187,9 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
parseLikeField(parser, likeTexts, likeItems);
|
||||
}
|
||||
} else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
} else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
parseLikeField(parser, ignoreTexts, ignoreItems);
|
||||
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||
}
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
||||
@ -198,8 +198,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||
if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
parseLikeField(parser, likeTexts, likeItems);
|
||||
}
|
||||
else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
parseLikeField(parser, ignoreTexts, ignoreItems);
|
||||
else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
||||
}
|
||||
@ -240,16 +240,15 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||
if (!likeTexts.isEmpty()) {
|
||||
mltQuery.setLikeText(likeTexts);
|
||||
}
|
||||
if (!ignoreTexts.isEmpty()) {
|
||||
mltQuery.setIgnoreText(ignoreTexts);
|
||||
if (!unlikeTexts.isEmpty()) {
|
||||
mltQuery.setIgnoreText(unlikeTexts);
|
||||
}
|
||||
|
||||
// handle items
|
||||
if (!likeItems.isEmpty()) {
|
||||
// set default index, type and fields if not specified
|
||||
MultiTermVectorsRequest items = likeItems;
|
||||
|
||||
for (TermVectorsRequest item : ignoreItems) {
|
||||
for (TermVectorsRequest item : unlikeItems) {
|
||||
items.add(item);
|
||||
}
|
||||
|
||||
@ -283,10 +282,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||
mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems));
|
||||
|
||||
// getting the Fields for ignored items
|
||||
if (!ignoreItems.isEmpty()) {
|
||||
org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, ignoreItems);
|
||||
if (!unlikeItems.isEmpty()) {
|
||||
org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems);
|
||||
if (ignoreFields.length > 0) {
|
||||
mltQuery.setIgnoreText(ignoreFields);
|
||||
mltQuery.setUnlikeText(ignoreFields);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -162,15 +162,16 @@ follows a similar syntax to the `per_field_analyzer` parameter of the
|
||||
Additionally, to provide documents not necessarily present in the index,
|
||||
<<docs-termvectors-artificial-doc,artificial documents>> are also supported.
|
||||
|
||||
`unlike`:: coming[2.0]
|
||||
The `unlike` parameter is used in conjunction with `like` in order not to
|
||||
select terms found in a chosen set of documents. In other words, we could ask
|
||||
for documents `like: "Apple"`, but `unlike: "cake crumble tree"`. The syntax
|
||||
is the same as `like`.
|
||||
|
||||
`fields`::
|
||||
A list of fields to fetch and analyze the text from. Defaults to the `_all`
|
||||
field for free text and to all possible fields for document inputs.
|
||||
|
||||
`ignore_like`:: coming[2.0]
|
||||
The `ignore_like` parameter is used to skip the terms found in a chosen set of
|
||||
documents. In other words, we could ask for documents `like: "Apple"`, but
|
||||
`ignore_like: "cake crumble tree"`. The syntax is the same as `like`.
|
||||
|
||||
`like_text`:: deprecated[2.0,Replaced by `like`]
|
||||
The text to find documents like it.
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
"Basic mlt query with ignore like":
|
||||
"Basic mlt query with unlike":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test_1
|
||||
@ -45,7 +45,7 @@
|
||||
_index: test_1
|
||||
_type: test
|
||||
_id: 1
|
||||
ignore_like:
|
||||
unlike:
|
||||
_index: test_1
|
||||
_type: test
|
||||
_id: 3
|
Loading…
x
Reference in New Issue
Block a user