More Like This: renamed ignore_like to unlike

This changes the parameter name `ignore_like` to the more user friendly name
`unlike`. This later feature generates a query from the terms in `A` but not
from the terms in `B`. This translates to a result set which is like `A` but
unlike `B`. We could have further negatively boosted any documents that have
some `B`, but these documents already do not receive any contribution from
having `B`, and would therefore negatively compete with documents having `A`.

Closes #11117
This commit is contained in:
Alex Ksikes 2015-05-12 14:31:26 +02:00
parent 90f9b5f60d
commit 3f6dae1a73
5 changed files with 46 additions and 42 deletions

View File

@ -51,8 +51,8 @@ public class MoreLikeThisQuery extends Query {
private String[] likeText;
private Fields[] likeFields;
private String[] ignoreText;
private Fields[] ignoreFields;
private String[] unlikeText;
private Fields[] unlikeFields;
private String[] moreLikeFields;
private Analyzer analyzer;
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
@ -155,10 +155,14 @@ public class MoreLikeThisQuery extends Query {
mlt.setBoost(boostTerms);
mlt.setBoostFactor(boostTermsFactor);
if (this.ignoreText != null || this.ignoreFields != null) {
handleSkipTerms(mlt, this.ignoreText, this.ignoreFields);
if (this.unlikeText != null || this.unlikeFields != null) {
handleUnlike(mlt, this.unlikeText, this.unlikeFields);
}
return createQuery(mlt);
}
private Query createQuery(XMoreLikeThis mlt) throws IOException {
BooleanQuery bq = new BooleanQuery();
if (this.likeFields != null) {
Query mltQuery = mlt.like(this.likeFields);
@ -177,14 +181,14 @@ public class MoreLikeThisQuery extends Query {
}
bq.setBoost(getBoost());
return bq;
return bq;
}
private void handleSkipTerms(XMoreLikeThis mlt, String[] ignoreText, Fields[] ignoreFields) throws IOException {
private void handleUnlike(XMoreLikeThis mlt, String[] unlikeText, Fields[] unlikeFields) throws IOException {
Set<Term> skipTerms = new HashSet<>();
// handle like text
if (ignoreText != null) {
for (String text : ignoreText) {
if (unlikeText != null) {
for (String text : unlikeText) {
// only use the first field to be consistent
String fieldName = moreLikeFields[0];
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
@ -198,8 +202,8 @@ public class MoreLikeThisQuery extends Query {
}
}
// handle like fields
if (ignoreFields != null) {
for (Fields fields : ignoreFields) {
if (unlikeFields != null) {
for (Fields fields : unlikeFields) {
for (String fieldName : fields) {
Terms terms = fields.terms(fieldName);
final TermsEnum termsEnum = terms.iterator();
@ -248,12 +252,12 @@ public class MoreLikeThisQuery extends Query {
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
}
public void setIgnoreText(Fields... ignoreFields) {
this.ignoreFields = ignoreFields;
public void setUnlikeText(Fields... ignoreFields) {
this.unlikeFields = ignoreFields;
}
public void setIgnoreText(List<String> ignoreText) {
this.ignoreText = ignoreText.toArray(Strings.EMPTY_ARRAY);
this.unlikeText = ignoreText.toArray(Strings.EMPTY_ARRAY);
}
public String[] getMoreLikeFields() {

View File

@ -130,7 +130,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
private final String[] fields;
private List<Item> docs = new ArrayList<>();
private List<Item> ignoreDocs = new ArrayList<>();
private List<Item> unlikeDocs = new ArrayList<>();
private Boolean include = null;
private String minimumShouldMatch = null;
private int minTermFreq = -1;
@ -189,7 +189,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
* Sets the documents from which the terms should not be selected from.
*/
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
this.ignoreDocs = Arrays.asList(docs);
this.unlikeDocs = Arrays.asList(docs);
return this;
}
@ -197,9 +197,9 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
* Sets the text from which the terms should not be selected from.
*/
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
this.ignoreDocs = new ArrayList<>();
this.unlikeDocs = new ArrayList<>();
for (String text : likeText) {
this.ignoreDocs.add(new Item(text));
this.unlikeDocs.add(new Item(text));
}
return this;
}
@ -378,8 +378,8 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
} else {
builder.field(likeFieldName, docs);
}
if (!ignoreDocs.isEmpty()) {
builder.field(MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(), ignoreDocs);
if (!unlikeDocs.isEmpty()) {
builder.field(MoreLikeThisQueryParser.Fields.UNLIKE.getPreferredName(), unlikeDocs);
}
if (minimumShouldMatch != null) {
builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);

View File

@ -73,7 +73,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like");
public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like");
public static final ParseField LIKE = new ParseField("like");
public static final ParseField IGNORE_LIKE = new ParseField("ignore_like");
public static final ParseField UNLIKE = new ParseField("unlike");
public static final ParseField INCLUDE = new ParseField("include");
}
@ -109,8 +109,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
List<String> likeTexts = new ArrayList<>();
MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest();
List<String> ignoreTexts = new ArrayList<>();
MultiTermVectorsRequest ignoreItems = new MultiTermVectorsRequest();
List<String> unlikeTexts = new ArrayList<>();
MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
@ -120,8 +120,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
likeTexts.add(parser.text());
} else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, likeTexts, likeItems);
} else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, ignoreTexts, ignoreItems);
} else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, unlikeTexts, unlikeItems);
} else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMinTermFrequency(parser.intValue());
} else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) {
@ -187,9 +187,9 @@ public class MoreLikeThisQueryParser implements QueryParser {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
parseLikeField(parser, likeTexts, likeItems);
}
} else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
} else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
parseLikeField(parser, ignoreTexts, ignoreItems);
parseLikeField(parser, unlikeTexts, unlikeItems);
}
} else {
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
@ -198,8 +198,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, likeTexts, likeItems);
}
else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, ignoreTexts, ignoreItems);
else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, unlikeTexts, unlikeItems);
} else {
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
}
@ -240,16 +240,15 @@ public class MoreLikeThisQueryParser implements QueryParser {
if (!likeTexts.isEmpty()) {
mltQuery.setLikeText(likeTexts);
}
if (!ignoreTexts.isEmpty()) {
mltQuery.setIgnoreText(ignoreTexts);
if (!unlikeTexts.isEmpty()) {
mltQuery.setIgnoreText(unlikeTexts);
}
// handle items
if (!likeItems.isEmpty()) {
// set default index, type and fields if not specified
MultiTermVectorsRequest items = likeItems;
for (TermVectorsRequest item : ignoreItems) {
for (TermVectorsRequest item : unlikeItems) {
items.add(item);
}
@ -283,10 +282,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems));
// getting the Fields for ignored items
if (!ignoreItems.isEmpty()) {
org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, ignoreItems);
if (!unlikeItems.isEmpty()) {
org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems);
if (ignoreFields.length > 0) {
mltQuery.setIgnoreText(ignoreFields);
mltQuery.setUnlikeText(ignoreFields);
}
}

View File

@ -162,15 +162,16 @@ follows a similar syntax to the `per_field_analyzer` parameter of the
Additionally, to provide documents not necessarily present in the index,
<<docs-termvectors-artificial-doc,artificial documents>> are also supported.
`unlike`:: coming[2.0]
The `unlike` parameter is used in conjunction with `like` in order not to
select terms found in a chosen set of documents. In other words, we could ask
for documents `like: "Apple"`, but `unlike: "cake crumble tree"`. The syntax
is the same as `like`.
`fields`::
A list of fields to fetch and analyze the text from. Defaults to the `_all`
field for free text and to all possible fields for document inputs.
`ignore_like`:: coming[2.0]
The `ignore_like` parameter is used to skip the terms found in a chosen set of
documents. In other words, we could ask for documents `like: "Apple"`, but
`ignore_like: "cake crumble tree"`. The syntax is the same as `like`.
`like_text`:: deprecated[2.0,Replaced by `like`]
The text to find documents like it.

View File

@ -1,5 +1,5 @@
---
"Basic mlt query with ignore like":
"Basic mlt query with unlike":
- do:
indices.create:
index: test_1
@ -45,7 +45,7 @@
_index: test_1
_type: test
_id: 1
ignore_like:
unlike:
_index: test_1
_type: test
_id: 3