From 3f6dae1a7304bbf8961efd603360f774a0444055 Mon Sep 17 00:00:00 2001 From: Alex Ksikes Date: Tue, 12 May 2015 14:31:26 +0200 Subject: [PATCH] More Like This: renamed `ignore_like` to `unlike` This changes the parameter name `ignore_like` to the more user friendly name `unlike`. This later feature generates a query from the terms in `A` but not from the terms in `B`. This translates to a result set which is like `A` but unlike `B`. We could have further negatively boosted any documents that have some `B`, but these documents already do not receive any contribution from having `B`, and would therefore negatively compete with documents having `A`. Closes #11117 --- .../lucene/search/MoreLikeThisQuery.java | 30 ++++++++++-------- .../index/query/MoreLikeThisQueryBuilder.java | 12 +++---- .../index/query/MoreLikeThisQueryParser.java | 31 +++++++++---------- docs/reference/query-dsl/mlt-query.asciidoc | 11 ++++--- .../mlt/{30_ignore.yaml => 30_unlike.yaml} | 4 +-- 5 files changed, 46 insertions(+), 42 deletions(-) rename rest-api-spec/test/mlt/{30_ignore.yaml => 30_unlike.yaml} (94%) diff --git a/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java b/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java index 970cb21a2b1..1a5d2687565 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java @@ -51,8 +51,8 @@ public class MoreLikeThisQuery extends Query { private String[] likeText; private Fields[] likeFields; - private String[] ignoreText; - private Fields[] ignoreFields; + private String[] unlikeText; + private Fields[] unlikeFields; private String[] moreLikeFields; private Analyzer analyzer; private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH; @@ -155,10 +155,14 @@ public class MoreLikeThisQuery extends Query { mlt.setBoost(boostTerms); mlt.setBoostFactor(boostTermsFactor); - if (this.ignoreText != null || this.ignoreFields != null) { - handleSkipTerms(mlt, this.ignoreText, this.ignoreFields); + if (this.unlikeText != null || this.unlikeFields != null) { + handleUnlike(mlt, this.unlikeText, this.unlikeFields); } + + return createQuery(mlt); + } + private Query createQuery(XMoreLikeThis mlt) throws IOException { BooleanQuery bq = new BooleanQuery(); if (this.likeFields != null) { Query mltQuery = mlt.like(this.likeFields); @@ -177,14 +181,14 @@ public class MoreLikeThisQuery extends Query { } bq.setBoost(getBoost()); - return bq; + return bq; } - private void handleSkipTerms(XMoreLikeThis mlt, String[] ignoreText, Fields[] ignoreFields) throws IOException { + private void handleUnlike(XMoreLikeThis mlt, String[] unlikeText, Fields[] unlikeFields) throws IOException { Set skipTerms = new HashSet<>(); // handle like text - if (ignoreText != null) { - for (String text : ignoreText) { + if (unlikeText != null) { + for (String text : unlikeText) { // only use the first field to be consistent String fieldName = moreLikeFields[0]; try (TokenStream ts = analyzer.tokenStream(fieldName, text)) { @@ -198,8 +202,8 @@ public class MoreLikeThisQuery extends Query { } } // handle like fields - if (ignoreFields != null) { - for (Fields fields : ignoreFields) { + if (unlikeFields != null) { + for (Fields fields : unlikeFields) { for (String fieldName : fields) { Terms terms = fields.terms(fieldName); final TermsEnum termsEnum = terms.iterator(); @@ -248,12 +252,12 @@ public class MoreLikeThisQuery extends Query { setLikeText(likeText.toArray(Strings.EMPTY_ARRAY)); } - public void setIgnoreText(Fields... ignoreFields) { - this.ignoreFields = ignoreFields; + public void setUnlikeText(Fields... ignoreFields) { + this.unlikeFields = ignoreFields; } public void setIgnoreText(List ignoreText) { - this.ignoreText = ignoreText.toArray(Strings.EMPTY_ARRAY); + this.unlikeText = ignoreText.toArray(Strings.EMPTY_ARRAY); } public String[] getMoreLikeFields() { diff --git a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java index a4d803878bf..19d65d91d27 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java @@ -130,7 +130,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ private final String[] fields; private List docs = new ArrayList<>(); - private List ignoreDocs = new ArrayList<>(); + private List unlikeDocs = new ArrayList<>(); private Boolean include = null; private String minimumShouldMatch = null; private int minTermFreq = -1; @@ -189,7 +189,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ * Sets the documents from which the terms should not be selected from. */ public MoreLikeThisQueryBuilder ignoreLike(Item... docs) { - this.ignoreDocs = Arrays.asList(docs); + this.unlikeDocs = Arrays.asList(docs); return this; } @@ -197,9 +197,9 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ * Sets the text from which the terms should not be selected from. */ public MoreLikeThisQueryBuilder ignoreLike(String... likeText) { - this.ignoreDocs = new ArrayList<>(); + this.unlikeDocs = new ArrayList<>(); for (String text : likeText) { - this.ignoreDocs.add(new Item(text)); + this.unlikeDocs.add(new Item(text)); } return this; } @@ -378,8 +378,8 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ } else { builder.field(likeFieldName, docs); } - if (!ignoreDocs.isEmpty()) { - builder.field(MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(), ignoreDocs); + if (!unlikeDocs.isEmpty()) { + builder.field(MoreLikeThisQueryParser.Fields.UNLIKE.getPreferredName(), unlikeDocs); } if (minimumShouldMatch != null) { builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch); diff --git a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java index cd6367937e6..b882be1ae2c 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java +++ b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java @@ -73,7 +73,7 @@ public class MoreLikeThisQueryParser implements QueryParser { public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like"); public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like"); public static final ParseField LIKE = new ParseField("like"); - public static final ParseField IGNORE_LIKE = new ParseField("ignore_like"); + public static final ParseField UNLIKE = new ParseField("unlike"); public static final ParseField INCLUDE = new ParseField("include"); } @@ -109,8 +109,8 @@ public class MoreLikeThisQueryParser implements QueryParser { List likeTexts = new ArrayList<>(); MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest(); - List ignoreTexts = new ArrayList<>(); - MultiTermVectorsRequest ignoreItems = new MultiTermVectorsRequest(); + List unlikeTexts = new ArrayList<>(); + MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -120,8 +120,8 @@ public class MoreLikeThisQueryParser implements QueryParser { likeTexts.add(parser.text()); } else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) { parseLikeField(parser, likeTexts, likeItems); - } else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) { - parseLikeField(parser, ignoreTexts, ignoreItems); + } else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) { + parseLikeField(parser, unlikeTexts, unlikeItems); } else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setMinTermFrequency(parser.intValue()); } else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) { @@ -187,9 +187,9 @@ public class MoreLikeThisQueryParser implements QueryParser { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { parseLikeField(parser, likeTexts, likeItems); } - } else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) { + } else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - parseLikeField(parser, ignoreTexts, ignoreItems); + parseLikeField(parser, unlikeTexts, unlikeItems); } } else { throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]"); @@ -198,8 +198,8 @@ public class MoreLikeThisQueryParser implements QueryParser { if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) { parseLikeField(parser, likeTexts, likeItems); } - else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) { - parseLikeField(parser, ignoreTexts, ignoreItems); + else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) { + parseLikeField(parser, unlikeTexts, unlikeItems); } else { throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]"); } @@ -240,16 +240,15 @@ public class MoreLikeThisQueryParser implements QueryParser { if (!likeTexts.isEmpty()) { mltQuery.setLikeText(likeTexts); } - if (!ignoreTexts.isEmpty()) { - mltQuery.setIgnoreText(ignoreTexts); + if (!unlikeTexts.isEmpty()) { + mltQuery.setIgnoreText(unlikeTexts); } // handle items if (!likeItems.isEmpty()) { // set default index, type and fields if not specified MultiTermVectorsRequest items = likeItems; - - for (TermVectorsRequest item : ignoreItems) { + for (TermVectorsRequest item : unlikeItems) { items.add(item); } @@ -283,10 +282,10 @@ public class MoreLikeThisQueryParser implements QueryParser { mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems)); // getting the Fields for ignored items - if (!ignoreItems.isEmpty()) { - org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, ignoreItems); + if (!unlikeItems.isEmpty()) { + org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems); if (ignoreFields.length > 0) { - mltQuery.setIgnoreText(ignoreFields); + mltQuery.setUnlikeText(ignoreFields); } } diff --git a/docs/reference/query-dsl/mlt-query.asciidoc b/docs/reference/query-dsl/mlt-query.asciidoc index 37246db5023..79fc4a288b2 100644 --- a/docs/reference/query-dsl/mlt-query.asciidoc +++ b/docs/reference/query-dsl/mlt-query.asciidoc @@ -162,15 +162,16 @@ follows a similar syntax to the `per_field_analyzer` parameter of the Additionally, to provide documents not necessarily present in the index, <> are also supported. +`unlike`:: coming[2.0] +The `unlike` parameter is used in conjunction with `like` in order not to +select terms found in a chosen set of documents. In other words, we could ask +for documents `like: "Apple"`, but `unlike: "cake crumble tree"`. The syntax +is the same as `like`. + `fields`:: A list of fields to fetch and analyze the text from. Defaults to the `_all` field for free text and to all possible fields for document inputs. -`ignore_like`:: coming[2.0] -The `ignore_like` parameter is used to skip the terms found in a chosen set of -documents. In other words, we could ask for documents `like: "Apple"`, but -`ignore_like: "cake crumble tree"`. The syntax is the same as `like`. - `like_text`:: deprecated[2.0,Replaced by `like`] The text to find documents like it. diff --git a/rest-api-spec/test/mlt/30_ignore.yaml b/rest-api-spec/test/mlt/30_unlike.yaml similarity index 94% rename from rest-api-spec/test/mlt/30_ignore.yaml rename to rest-api-spec/test/mlt/30_unlike.yaml index e91b3ae518e..01cd372e8cf 100644 --- a/rest-api-spec/test/mlt/30_ignore.yaml +++ b/rest-api-spec/test/mlt/30_unlike.yaml @@ -1,5 +1,5 @@ --- -"Basic mlt query with ignore like": +"Basic mlt query with unlike": - do: indices.create: index: test_1 @@ -45,7 +45,7 @@ _index: test_1 _type: test _id: 1 - ignore_like: + unlike: _index: test_1 _type: test _id: 3