More Like This: renamed ignore_like to unlike

This changes the parameter name `ignore_like` to the more user friendly name `unlike`. This later feature generates a query from the terms in `A` but not from the terms in `B`. This translates to a result set which is like `A` but unlike `B`. We could have further negatively boosted any documents that have some `B`, but these documents already do not receive any contribution from having `B`, and would therefore negatively compete with documents having `A`. Closes #11117
2025-02-17 02:14:54 +00:00 · 2015-05-12 14:31:26 +02:00 · 2015-05-12 14:31:26 +02:00 · 3f6dae1a73
commit 3f6dae1a73
parent 90f9b5f60d
5 changed files with 46 additions and 42 deletions
--- a/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java
+++ b/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java
@ -51,8 +51,8 @@ public class MoreLikeThisQuery extends Query {

    private String[] likeText;
    private Fields[] likeFields;
-    private String[] ignoreText;
-    private Fields[] ignoreFields;
+    private String[] unlikeText;
+    private Fields[] unlikeFields;
    private String[] moreLikeFields;
    private Analyzer analyzer;
    private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
@ -155,10 +155,14 @@ public class MoreLikeThisQuery extends Query {
        mlt.setBoost(boostTerms);
        mlt.setBoostFactor(boostTermsFactor);

-        if (this.ignoreText != null || this.ignoreFields != null) {
-            handleSkipTerms(mlt, this.ignoreText, this.ignoreFields);
+        if (this.unlikeText != null || this.unlikeFields != null) {
+            handleUnlike(mlt, this.unlikeText, this.unlikeFields);
        }
+        
+        return createQuery(mlt);
+    }

+    private Query createQuery(XMoreLikeThis mlt) throws IOException {
        BooleanQuery bq = new BooleanQuery();
        if (this.likeFields != null) {
            Query mltQuery = mlt.like(this.likeFields);
@ -177,14 +181,14 @@ public class MoreLikeThisQuery extends Query {
        }

        bq.setBoost(getBoost());
-        return bq;
+        return bq;    
    }

-    private void handleSkipTerms(XMoreLikeThis mlt, String[] ignoreText, Fields[] ignoreFields) throws IOException {
+    private void handleUnlike(XMoreLikeThis mlt, String[] unlikeText, Fields[] unlikeFields) throws IOException {
        Set<Term> skipTerms = new HashSet<>();
        // handle like text
-        if (ignoreText != null) {
-            for (String text : ignoreText) {
+        if (unlikeText != null) {
+            for (String text : unlikeText) {
                // only use the first field to be consistent
                String fieldName = moreLikeFields[0];
                try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
@ -198,8 +202,8 @@ public class MoreLikeThisQuery extends Query {
            }
        }
        // handle like fields
-        if (ignoreFields != null) {
-            for (Fields fields : ignoreFields) {
+        if (unlikeFields != null) {
+            for (Fields fields : unlikeFields) {
                for (String fieldName : fields) {
                    Terms terms = fields.terms(fieldName);
                    final TermsEnum termsEnum = terms.iterator();
@ -248,12 +252,12 @@ public class MoreLikeThisQuery extends Query {
        setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
    }

-    public void setIgnoreText(Fields... ignoreFields) {
-        this.ignoreFields = ignoreFields;
+    public void setUnlikeText(Fields... ignoreFields) {
+        this.unlikeFields = ignoreFields;
    }

    public void setIgnoreText(List<String> ignoreText) {
-        this.ignoreText = ignoreText.toArray(Strings.EMPTY_ARRAY);
+        this.unlikeText = ignoreText.toArray(Strings.EMPTY_ARRAY);
    }

    public String[] getMoreLikeFields() {
--- a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java
+++ b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java
@ -130,7 +130,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ

    private final String[] fields;
    private List<Item> docs = new ArrayList<>();
-    private List<Item> ignoreDocs = new ArrayList<>();
+    private List<Item> unlikeDocs = new ArrayList<>();
    private Boolean include = null;
    private String minimumShouldMatch = null;
    private int minTermFreq = -1;
@ -189,7 +189,7 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
     * Sets the documents from which the terms should not be selected from.
     */
    public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
-        this.ignoreDocs = Arrays.asList(docs);
+        this.unlikeDocs = Arrays.asList(docs);
        return this;
    }

@ -197,9 +197,9 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
     * Sets the text from which the terms should not be selected from.
     */
    public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
-        this.ignoreDocs = new ArrayList<>();
+        this.unlikeDocs = new ArrayList<>();
        for (String text : likeText) {
-            this.ignoreDocs.add(new Item(text));
+            this.unlikeDocs.add(new Item(text));
        }
        return this;
    }
@ -378,8 +378,8 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
        } else {
            builder.field(likeFieldName, docs);
        }
-        if (!ignoreDocs.isEmpty()) {
-            builder.field(MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(), ignoreDocs);
+        if (!unlikeDocs.isEmpty()) {
+            builder.field(MoreLikeThisQueryParser.Fields.UNLIKE.getPreferredName(), unlikeDocs);
        }
        if (minimumShouldMatch != null) {
            builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
--- a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java
+++ b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java
@ -73,7 +73,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
        public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like");
        public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like");
        public static final ParseField LIKE = new ParseField("like");
-        public static final ParseField IGNORE_LIKE = new ParseField("ignore_like");
+        public static final ParseField UNLIKE = new ParseField("unlike");
        public static final ParseField INCLUDE = new ParseField("include");
    }

@ -109,8 +109,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
        List<String> likeTexts = new ArrayList<>();
        MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest();

-        List<String> ignoreTexts = new ArrayList<>();
-        MultiTermVectorsRequest ignoreItems = new MultiTermVectorsRequest();
+        List<String> unlikeTexts = new ArrayList<>();
+        MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest();

        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
            if (token == XContentParser.Token.FIELD_NAME) {
@ -120,8 +120,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
                    likeTexts.add(parser.text());
                } else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
                    parseLikeField(parser, likeTexts, likeItems);
-                } else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
-                    parseLikeField(parser, ignoreTexts, ignoreItems);
+                } else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
+                    parseLikeField(parser, unlikeTexts, unlikeItems);
                } else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) {
                    mltQuery.setMinTermFrequency(parser.intValue());
                } else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) {
@ -187,9 +187,9 @@ public class MoreLikeThisQueryParser implements QueryParser {
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        parseLikeField(parser, likeTexts, likeItems);
                    }
-                } else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
+                } else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
-                        parseLikeField(parser, ignoreTexts, ignoreItems);
+                        parseLikeField(parser, unlikeTexts, unlikeItems);
                    }
                } else {
                    throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
@ -198,8 +198,8 @@ public class MoreLikeThisQueryParser implements QueryParser {
                if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) {
                    parseLikeField(parser, likeTexts, likeItems);
                }
-                else if (Fields.IGNORE_LIKE.match(currentFieldName, parseContext.parseFlags())) {
-                    parseLikeField(parser, ignoreTexts, ignoreItems);
+                else if (Fields.UNLIKE.match(currentFieldName, parseContext.parseFlags())) {
+                    parseLikeField(parser, unlikeTexts, unlikeItems);
                } else {
                    throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
                }
@ -240,16 +240,15 @@ public class MoreLikeThisQueryParser implements QueryParser {
        if (!likeTexts.isEmpty()) {
            mltQuery.setLikeText(likeTexts);
        }
-        if (!ignoreTexts.isEmpty()) {
-            mltQuery.setIgnoreText(ignoreTexts);
+        if (!unlikeTexts.isEmpty()) {
+            mltQuery.setIgnoreText(unlikeTexts);
        }

        // handle items
        if (!likeItems.isEmpty()) {
            // set default index, type and fields if not specified
            MultiTermVectorsRequest items = likeItems;
-
-            for (TermVectorsRequest item : ignoreItems) {
+            for (TermVectorsRequest item : unlikeItems) {
                items.add(item);
            }

@ -283,10 +282,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
            mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems));

            // getting the Fields for ignored items
-            if (!ignoreItems.isEmpty()) {
-                org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, ignoreItems);
+            if (!unlikeItems.isEmpty()) {
+                org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems);
                if (ignoreFields.length > 0) {
-                    mltQuery.setIgnoreText(ignoreFields);
+                    mltQuery.setUnlikeText(ignoreFields);
                }
            }

--- a/docs/reference/query-dsl/mlt-query.asciidoc
+++ b/docs/reference/query-dsl/mlt-query.asciidoc
@ -162,15 +162,16 @@ follows a similar syntax to the `per_field_analyzer` parameter of the
 Additionally, to provide documents not necessarily present in the index,
 <<docs-termvectors-artificial-doc,artificial documents>> are also supported.

+`unlike`:: coming[2.0] 
+The `unlike` parameter is used in conjunction with `like` in order not to
+select terms found in a chosen set of documents. In other words, we could ask
+for documents `like: "Apple"`, but `unlike: "cake crumble tree"`. The syntax
+is the same as `like`.
+
 `fields`::
 A list of fields to fetch and analyze the text from. Defaults to the `_all`
 field for free text and to all possible fields for document inputs.

-`ignore_like`:: coming[2.0]
-The `ignore_like` parameter is used to skip the terms found in a chosen set of
-documents. In other words, we could ask for documents `like: "Apple"`, but
-`ignore_like: "cake crumble tree"`. The syntax is the same as `like`.
-
 `like_text`:: deprecated[2.0,Replaced by `like`]
 The text to find documents like it.

--- a/rest-api-spec/test/mlt/30_unlike.yaml
+++ b/rest-api-spec/test/mlt/30_unlike.yaml
@ -1,5 +1,5 @@
 ---
-"Basic mlt query with ignore like":
+"Basic mlt query with unlike":
  - do:
      indices.create:
        index: test_1
@ -45,7 +45,7 @@
                    _index: test_1
                    _type: test
                    _id: 1
-                ignore_like:
+                unlike:
                    _index: test_1
                    _type: test
                    _id: 3