mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-20 03:45:02 +00:00
More Like This: renamed ignore_like
to unlike
This changes the parameter name `ignore_like` to the more user friendly name `unlike`. This later feature generates a query from the terms in `A` but not from the terms in `B`. This translates to a result set which is like `A` but unlike `B`. We could have further negatively boosted any documents that have some `B`, but these documents already do not receive any contribution from having `B`, and would therefore negatively compete with documents having `A`. Closes #11117
This commit is contained in:
parent
90f9b5f60d
commit
3f6dae1a73
@ -51,8 +51,8 @@ public class MoreLikeThisQuery extends Query {
|
|||||||
|
|
||||||
private String[] likeText;
|
private String[] likeText;
|
||||||
private Fields[] likeFields;
|
private Fields[] likeFields;
|
||||||
private String[] ignoreText;
|
private String[] unlikeText;
|
||||||
private Fields[] ignoreFields;
|
private Fields[] unlikeFields;
|
||||||
private String[] moreLikeFields;
|
private String[] moreLikeFields;
|
||||||
private Analyzer analyzer;
|
private Analyzer analyzer;
|
||||||
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
|
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
|
||||||
@ -155,10 +155,14 @@ public class MoreLikeThisQuery extends Query {
|
|||||||
mlt.setBoost(boostTerms);
|
mlt.setBoost(boostTerms);
|
||||||
mlt.setBoostFactor(boostTermsFactor);
|
mlt.setBoostFactor(boostTermsFactor);
|
||||||
|
|
||||||
if (this.ignoreText != null || this.ignoreFields != null) {
|
if (this.unlikeText != null || this.unlikeFields != null) {
|
||||||
handleSkipTerms(mlt, this.ignoreText, this.ignoreFields);
|
handleUnlike(mlt, this.unlikeText, this.unlikeFields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return createQuery(mlt);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Query createQuery(XMoreLikeThis mlt) throws IOException {
|
||||||
BooleanQuery bq = new BooleanQuery();
|
BooleanQuery bq = new BooleanQuery();
|
||||||
if (this.likeFields != null) {
|
if (this.likeFields != null) {
|
||||||
Query mltQuery = mlt.like(this.likeFields);
|
Query mltQuery = mlt.like(this.likeFields);
|
||||||
@ -177,14 +181,14 @@ public class MoreLikeThisQuery extends Query {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bq.setBoost(getBoost());
|
bq.setBoost(getBoost());
|
||||||
return bq;
|
return bq;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleSkipTerms(XMoreLikeThis mlt, String[] ignoreText, Fields[] ignoreFields) throws IOException {
|
private void handleUnlike(XMoreLikeThis mlt, String[] unlikeText, Fields[] unlikeFields) throws IOException {
|
||||||
Set<Term> skipTerms = new HashSet<>();
|
Set<Term> skipTerms = new HashSet<>();
|
||||||
// handle like text
|
// handle like text
|
||||||
if (ignoreText != null) {
|
if (unlikeText != null) {
|
||||||
for (String text : ignoreText) {
|
for (String text : unlikeText) {
|
||||||
// only use the first field to be consistent
|
// only use the first field to be consistent
|
||||||
String fieldName = moreLikeFields[0];
|
String fieldName = moreLikeFields[0];
|
||||||
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
|
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
|
||||||
@ -198,8 +202,8 @@ public class MoreLikeThisQuery extends Query {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// handle like fields
|
// handle like fields
|
||||||
if (ignoreFields != null) {
|
if (unlikeFields != null) {
|
||||||
for (Fields fields : ignoreFields) {
|
for (Fields fields : unlikeFields) {
|
||||||
for (String fieldName : fields) {
|
for (String fieldName : fields) {
|
||||||
Terms terms = fields.terms(fieldName);
|
Terms terms = fields.terms(fieldName);
|
||||||
final TermsEnum termsEnum = terms.iterator();
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
@ -248,12 +252,12 @@ public class MoreLikeThisQuery extends Query {
|
|||||||
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
|
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setIgnoreText(Fields... ignoreFields) {
|
public void setUnlikeText(Fields... ignoreFields) {
|
||||||
this.ignoreFields = ignoreFields;
|
this.unlikeFields = ignoreFields;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setIgnoreText(List<String> ignoreText) {
|
public void setIgnoreText(List<String> ignoreText) {
|
||||||
this.ignoreText = ignoreText.toArray(Strings.EMPTY_ARRAY);
|
this.unlikeText = ignoreText.toArray(Strings.EMPTY_ARRAY);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String[] getMoreLikeFields() {
|
public String[] getMoreLikeFields() {
|
||||||
|
@ -130,7 +130,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||||||
|
|
||||||
private final String[] fields;
|
private final String[] fields;
|
||||||
private List<Item> docs = new ArrayList<>();
|
private List<Item> docs = new ArrayList<>();
|
||||||
private List<Item> ignoreDocs = new ArrayList<>();
|
private List<Item> unlikeDocs = new ArrayList<>();
|
||||||
private Boolean include = null;
|
private Boolean include = null;
|
||||||
private String minimumShouldMatch = null;
|
private String minimumShouldMatch = null;
|
||||||
private int minTermFreq = -1;
|
private int minTermFreq = -1;
|
||||||
@ -189,7 +189,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||||||
* Sets the documents from which the terms should not be selected from.
|
* Sets the documents from which the terms should not be selected from.
|
||||||
*/
|
*/
|
||||||
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
|
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
|
||||||
this.ignoreDocs = Arrays.asList(docs);
|
this.unlikeDocs = Arrays.asList(docs);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -197,9 +197,9 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||||||
* Sets the text from which the terms should not be selected from.
|
* Sets the text from which the terms should not be selected from.
|
||||||
*/
|
*/
|
||||||
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
|
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
|
||||||
this.ignoreDocs = new ArrayList<>();
|
this.unlikeDocs = new ArrayList<>();
|
||||||
for (String text : likeText) {
|
for (String text : likeText) {
|
||||||
this.ignoreDocs.add(new Item(text));
|
this.unlikeDocs.add(new Item(text));
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@ -378,8 +378,8 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||||||
} else {
|
} else {
|
||||||
builder.field(likeFieldName, docs);
|
builder.field(likeFieldName, docs);
|
||||||
}
|
}
|
||||||
if (!ignoreDocs.isEmpty()) {
|
if (!unlikeDocs.isEmpty()) {
|
||||||
builder.field(MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(), ignoreDocs);
|
builder.field(MoreLikeThisQueryParser.Fields.UNLIKE.getPreferredName(), unlikeDocs);
|
||||||
}
|
}
|
||||||
if (minimumShouldMatch != null) {
|
if (minimumShouldMatch != null) {
|
||||||
builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
||||||
|
@ -73,7 +73,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||||||
public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like");
|
public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like");
|
||||||
public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like");
|
public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like");
|
||||||
public static final ParseField LIKE = new ParseField("like");
|
public static final ParseField LIKE = new ParseField("like");
|
||||||
public static final ParseField IGNORE_LIKE = new ParseField("ignore_like");
|
public static final ParseField UNLIKE = new ParseField("unlike");
|
||||||
public static final ParseField INCLUDE = new ParseField("include");
|
public static final ParseField INCLUDE = new ParseField("include");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,8 +109,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||||||
List<String> likeTexts = new ArrayList<>();
|
List<String> likeTexts = new ArrayList<>();
|
||||||
MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest();
|
MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest();
|
||||||
|
|
||||||
List<String> ignoreTexts = new ArrayList<>();
|
List<String> unlikeTexts = new ArrayList<>();
|
||||||
MultiTermVectorsRequest ignoreItems = new MultiTermVectorsRequest();
|
MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest();
|
||||||
|
|
||||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||||
if (token == XContentParser.Token.FIELD_NAME) {
|
if (token == XContentParser.Token.FIELD_NAME) {
|
||||||
@ -120,8 +120,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||||||
likeTexts.add(parser.text());
|
likeTexts.add(parser.text());
|
||||||
} else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
} else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||||
parseLikeField(parser, likeTexts, likeItems);
|
parseLikeField(parser, likeTexts, likeItems);
|
||||||
} else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
} else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||||
parseLikeField(parser, ignoreTexts, ignoreItems);
|
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||||
} else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) {
|
} else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) {
|
||||||
mltQuery.setMinTermFrequency(parser.intValue());
|
mltQuery.setMinTermFrequency(parser.intValue());
|
||||||
} else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) {
|
} else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) {
|
||||||
@ -187,9 +187,9 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||||
parseLikeField(parser, likeTexts, likeItems);
|
parseLikeField(parser, likeTexts, likeItems);
|
||||||
}
|
}
|
||||||
} else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
} else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||||
parseLikeField(parser, ignoreTexts, ignoreItems);
|
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
||||||
@ -198,8 +198,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||||||
if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||||
parseLikeField(parser, likeTexts, likeItems);
|
parseLikeField(parser, likeTexts, likeItems);
|
||||||
}
|
}
|
||||||
else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
|
else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
|
||||||
parseLikeField(parser, ignoreTexts, ignoreItems);
|
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||||
} else {
|
} else {
|
||||||
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
||||||
}
|
}
|
||||||
@ -240,16 +240,15 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||||||
if (!likeTexts.isEmpty()) {
|
if (!likeTexts.isEmpty()) {
|
||||||
mltQuery.setLikeText(likeTexts);
|
mltQuery.setLikeText(likeTexts);
|
||||||
}
|
}
|
||||||
if (!ignoreTexts.isEmpty()) {
|
if (!unlikeTexts.isEmpty()) {
|
||||||
mltQuery.setIgnoreText(ignoreTexts);
|
mltQuery.setIgnoreText(unlikeTexts);
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle items
|
// handle items
|
||||||
if (!likeItems.isEmpty()) {
|
if (!likeItems.isEmpty()) {
|
||||||
// set default index, type and fields if not specified
|
// set default index, type and fields if not specified
|
||||||
MultiTermVectorsRequest items = likeItems;
|
MultiTermVectorsRequest items = likeItems;
|
||||||
|
for (TermVectorsRequest item : unlikeItems) {
|
||||||
for (TermVectorsRequest item : ignoreItems) {
|
|
||||||
items.add(item);
|
items.add(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,10 +282,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||||||
mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems));
|
mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems));
|
||||||
|
|
||||||
// getting the Fields for ignored items
|
// getting the Fields for ignored items
|
||||||
if (!ignoreItems.isEmpty()) {
|
if (!unlikeItems.isEmpty()) {
|
||||||
org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, ignoreItems);
|
org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems);
|
||||||
if (ignoreFields.length > 0) {
|
if (ignoreFields.length > 0) {
|
||||||
mltQuery.setIgnoreText(ignoreFields);
|
mltQuery.setUnlikeText(ignoreFields);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,15 +162,16 @@ follows a similar syntax to the `per_field_analyzer` parameter of the
|
|||||||
Additionally, to provide documents not necessarily present in the index,
|
Additionally, to provide documents not necessarily present in the index,
|
||||||
<<docs-termvectors-artificial-doc,artificial documents>> are also supported.
|
<<docs-termvectors-artificial-doc,artificial documents>> are also supported.
|
||||||
|
|
||||||
|
`unlike`:: coming[2.0]
|
||||||
|
The `unlike` parameter is used in conjunction with `like` in order not to
|
||||||
|
select terms found in a chosen set of documents. In other words, we could ask
|
||||||
|
for documents `like: "Apple"`, but `unlike: "cake crumble tree"`. The syntax
|
||||||
|
is the same as `like`.
|
||||||
|
|
||||||
`fields`::
|
`fields`::
|
||||||
A list of fields to fetch and analyze the text from. Defaults to the `_all`
|
A list of fields to fetch and analyze the text from. Defaults to the `_all`
|
||||||
field for free text and to all possible fields for document inputs.
|
field for free text and to all possible fields for document inputs.
|
||||||
|
|
||||||
`ignore_like`:: coming[2.0]
|
|
||||||
The `ignore_like` parameter is used to skip the terms found in a chosen set of
|
|
||||||
documents. In other words, we could ask for documents `like: "Apple"`, but
|
|
||||||
`ignore_like: "cake crumble tree"`. The syntax is the same as `like`.
|
|
||||||
|
|
||||||
`like_text`:: deprecated[2.0,Replaced by `like`]
|
`like_text`:: deprecated[2.0,Replaced by `like`]
|
||||||
The text to find documents like it.
|
The text to find documents like it.
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
"Basic mlt query with ignore like":
|
"Basic mlt query with unlike":
|
||||||
- do:
|
- do:
|
||||||
indices.create:
|
indices.create:
|
||||||
index: test_1
|
index: test_1
|
||||||
@ -45,7 +45,7 @@
|
|||||||
_index: test_1
|
_index: test_1
|
||||||
_type: test
|
_type: test
|
||||||
_id: 1
|
_id: 1
|
||||||
ignore_like:
|
unlike:
|
||||||
_index: test_1
|
_index: test_1
|
||||||
_type: test
|
_type: test
|
||||||
_id: 3
|
_id: 3
|
Loading…
x
Reference in New Issue
Block a user