More Like This: renamed ignore_like to unlike

This changes the parameter name `ignore_like` to the more user friendly name
`unlike`. This later feature generates a query from the terms in `A` but not
from the terms in `B`. This translates to a result set which is like `A` but
unlike `B`. We could have further negatively boosted any documents that have
some `B`, but these documents already do not receive any contribution from
having `B`, and would therefore negatively compete with documents having `A`.

Closes #11117
This commit is contained in:
Alex Ksikes 2015-05-12 14:31:26 +02:00
parent 90f9b5f60d
commit 3f6dae1a73
5 changed files with 46 additions and 42 deletions

View File

@ -51,8 +51,8 @@ public class MoreLikeThisQuery extends Query {
private String[] likeText; private String[] likeText;
private Fields[] likeFields; private Fields[] likeFields;
private String[] ignoreText; private String[] unlikeText;
private Fields[] ignoreFields; private Fields[] unlikeFields;
private String[] moreLikeFields; private String[] moreLikeFields;
private Analyzer analyzer; private Analyzer analyzer;
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH; private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
@ -155,10 +155,14 @@ public class MoreLikeThisQuery extends Query {
mlt.setBoost(boostTerms); mlt.setBoost(boostTerms);
mlt.setBoostFactor(boostTermsFactor); mlt.setBoostFactor(boostTermsFactor);
if (this.ignoreText != null || this.ignoreFields != null) { if (this.unlikeText != null || this.unlikeFields != null) {
handleSkipTerms(mlt, this.ignoreText, this.ignoreFields); handleUnlike(mlt, this.unlikeText, this.unlikeFields);
} }
return createQuery(mlt);
}
private Query createQuery(XMoreLikeThis mlt) throws IOException {
BooleanQuery bq = new BooleanQuery(); BooleanQuery bq = new BooleanQuery();
if (this.likeFields != null) { if (this.likeFields != null) {
Query mltQuery = mlt.like(this.likeFields); Query mltQuery = mlt.like(this.likeFields);
@ -177,14 +181,14 @@ public class MoreLikeThisQuery extends Query {
} }
bq.setBoost(getBoost()); bq.setBoost(getBoost());
return bq; return bq;
} }
private void handleSkipTerms(XMoreLikeThis mlt, String[] ignoreText, Fields[] ignoreFields) throws IOException { private void handleUnlike(XMoreLikeThis mlt, String[] unlikeText, Fields[] unlikeFields) throws IOException {
Set<Term> skipTerms = new HashSet<>(); Set<Term> skipTerms = new HashSet<>();
// handle like text // handle like text
if (ignoreText != null) { if (unlikeText != null) {
for (String text : ignoreText) { for (String text : unlikeText) {
// only use the first field to be consistent // only use the first field to be consistent
String fieldName = moreLikeFields[0]; String fieldName = moreLikeFields[0];
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) { try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
@ -198,8 +202,8 @@ public class MoreLikeThisQuery extends Query {
} }
} }
// handle like fields // handle like fields
if (ignoreFields != null) { if (unlikeFields != null) {
for (Fields fields : ignoreFields) { for (Fields fields : unlikeFields) {
for (String fieldName : fields) { for (String fieldName : fields) {
Terms terms = fields.terms(fieldName); Terms terms = fields.terms(fieldName);
final TermsEnum termsEnum = terms.iterator(); final TermsEnum termsEnum = terms.iterator();
@ -248,12 +252,12 @@ public class MoreLikeThisQuery extends Query {
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY)); setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
} }
public void setIgnoreText(Fields... ignoreFields) { public void setUnlikeText(Fields... ignoreFields) {
this.ignoreFields = ignoreFields; this.unlikeFields = ignoreFields;
} }
public void setIgnoreText(List<String> ignoreText) { public void setIgnoreText(List<String> ignoreText) {
this.ignoreText = ignoreText.toArray(Strings.EMPTY_ARRAY); this.unlikeText = ignoreText.toArray(Strings.EMPTY_ARRAY);
} }
public String[] getMoreLikeFields() { public String[] getMoreLikeFields() {

View File

@ -130,7 +130,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
private final String[] fields; private final String[] fields;
private List<Item> docs = new ArrayList<>(); private List<Item> docs = new ArrayList<>();
private List<Item> ignoreDocs = new ArrayList<>(); private List<Item> unlikeDocs = new ArrayList<>();
private Boolean include = null; private Boolean include = null;
private String minimumShouldMatch = null; private String minimumShouldMatch = null;
private int minTermFreq = -1; private int minTermFreq = -1;
@ -189,7 +189,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
* Sets the documents from which the terms should not be selected from. * Sets the documents from which the terms should not be selected from.
*/ */
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) { public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
this.ignoreDocs = Arrays.asList(docs); this.unlikeDocs = Arrays.asList(docs);
return this; return this;
} }
@ -197,9 +197,9 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
* Sets the text from which the terms should not be selected from. * Sets the text from which the terms should not be selected from.
*/ */
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) { public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
this.ignoreDocs = new ArrayList<>(); this.unlikeDocs = new ArrayList<>();
for (String text : likeText) { for (String text : likeText) {
this.ignoreDocs.add(new Item(text)); this.unlikeDocs.add(new Item(text));
} }
return this; return this;
} }
@ -378,8 +378,8 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
} else { } else {
builder.field(likeFieldName, docs); builder.field(likeFieldName, docs);
} }
if (!ignoreDocs.isEmpty()) { if (!unlikeDocs.isEmpty()) {
builder.field(MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(), ignoreDocs); builder.field(MoreLikeThisQueryParser.Fields.UNLIKE.getPreferredName(), unlikeDocs);
} }
if (minimumShouldMatch != null) { if (minimumShouldMatch != null) {
builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch); builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);

View File

@ -73,7 +73,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like"); public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like");
public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like"); public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like");
public static final ParseField LIKE = new ParseField("like"); public static final ParseField LIKE = new ParseField("like");
public static final ParseField IGNORE_LIKE = new ParseField("ignore_like"); public static final ParseField UNLIKE = new ParseField("unlike");
public static final ParseField INCLUDE = new ParseField("include"); public static final ParseField INCLUDE = new ParseField("include");
} }
@ -109,8 +109,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
List<String> likeTexts = new ArrayList<>(); List<String> likeTexts = new ArrayList<>();
MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest(); MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest();
List<String> ignoreTexts = new ArrayList<>(); List<String> unlikeTexts = new ArrayList<>();
MultiTermVectorsRequest ignoreItems = new MultiTermVectorsRequest(); MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) { if (token == XContentParser.Token.FIELD_NAME) {
@ -120,8 +120,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
likeTexts.add(parser.text()); likeTexts.add(parser.text());
} else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) { } else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, likeTexts, likeItems); parseLikeField(parser, likeTexts, likeItems);
} else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) { } else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, ignoreTexts, ignoreItems); parseLikeField(parser, unlikeTexts, unlikeItems);
} else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) { } else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) {
mltQuery.setMinTermFrequency(parser.intValue()); mltQuery.setMinTermFrequency(parser.intValue());
} else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) { } else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) {
@ -187,9 +187,9 @@ public class MoreLikeThisQueryParser implements QueryParser {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
parseLikeField(parser, likeTexts, likeItems); parseLikeField(parser, likeTexts, likeItems);
} }
} else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) { } else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
parseLikeField(parser, ignoreTexts, ignoreItems); parseLikeField(parser, unlikeTexts, unlikeItems);
} }
} else { } else {
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]"); throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
@ -198,8 +198,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) { if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, likeTexts, likeItems); parseLikeField(parser, likeTexts, likeItems);
} }
else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) { else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
parseLikeField(parser, ignoreTexts, ignoreItems); parseLikeField(parser, unlikeTexts, unlikeItems);
} else { } else {
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]"); throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
} }
@ -240,16 +240,15 @@ public class MoreLikeThisQueryParser implements QueryParser {
if (!likeTexts.isEmpty()) { if (!likeTexts.isEmpty()) {
mltQuery.setLikeText(likeTexts); mltQuery.setLikeText(likeTexts);
} }
if (!ignoreTexts.isEmpty()) { if (!unlikeTexts.isEmpty()) {
mltQuery.setIgnoreText(ignoreTexts); mltQuery.setIgnoreText(unlikeTexts);
} }
// handle items // handle items
if (!likeItems.isEmpty()) { if (!likeItems.isEmpty()) {
// set default index, type and fields if not specified // set default index, type and fields if not specified
MultiTermVectorsRequest items = likeItems; MultiTermVectorsRequest items = likeItems;
for (TermVectorsRequest item : unlikeItems) {
for (TermVectorsRequest item : ignoreItems) {
items.add(item); items.add(item);
} }
@ -283,10 +282,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems)); mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems));
// getting the Fields for ignored items // getting the Fields for ignored items
if (!ignoreItems.isEmpty()) { if (!unlikeItems.isEmpty()) {
org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, ignoreItems); org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems);
if (ignoreFields.length > 0) { if (ignoreFields.length > 0) {
mltQuery.setIgnoreText(ignoreFields); mltQuery.setUnlikeText(ignoreFields);
} }
} }

View File

@ -162,15 +162,16 @@ follows a similar syntax to the `per_field_analyzer` parameter of the
Additionally, to provide documents not necessarily present in the index, Additionally, to provide documents not necessarily present in the index,
<<docs-termvectors-artificial-doc,artificial documents>> are also supported. <<docs-termvectors-artificial-doc,artificial documents>> are also supported.
`unlike`:: coming[2.0]
The `unlike` parameter is used in conjunction with `like` in order not to
select terms found in a chosen set of documents. In other words, we could ask
for documents `like: "Apple"`, but `unlike: "cake crumble tree"`. The syntax
is the same as `like`.
`fields`:: `fields`::
A list of fields to fetch and analyze the text from. Defaults to the `_all` A list of fields to fetch and analyze the text from. Defaults to the `_all`
field for free text and to all possible fields for document inputs. field for free text and to all possible fields for document inputs.
`ignore_like`:: coming[2.0]
The `ignore_like` parameter is used to skip the terms found in a chosen set of
documents. In other words, we could ask for documents `like: "Apple"`, but
`ignore_like: "cake crumble tree"`. The syntax is the same as `like`.
`like_text`:: deprecated[2.0,Replaced by `like`] `like_text`:: deprecated[2.0,Replaced by `like`]
The text to find documents like it. The text to find documents like it.

View File

@ -1,5 +1,5 @@
--- ---
"Basic mlt query with ignore like": "Basic mlt query with unlike":
- do: - do:
indices.create: indices.create:
index: test_1 index: test_1
@ -45,7 +45,7 @@
_index: test_1 _index: test_1
_type: test _type: test
_id: 1 _id: 1
ignore_like: unlike:
_index: test_1 _index: test_1
_type: test _type: test
_id: 3 _id: 3