From 0b92d524a7d906c6f1ec8aa2af30c751b4117919 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <christoph@elastic.co>
Date: Fri, 2 Sep 2016 18:40:59 +0200
Subject: [PATCH] Add threshold for document ratings for PrecisionAtN and
 ReciprocalRank

PrecisionAtN and ReciprocalRank are binary evaluation metrics by default that
only distiguish between relevant/irrelevant search results. So far we assumed
that relevant documents are labaled with 1 (irrelevant docs with 0) in the
evaluation request, but this is cumbersome if the ratings are provided on a
larger integer scale and would need to get mapped to a 0/1 value.

This change introduces a threshold parameter on the PrecisionAtN and
ReciprocalRank metric than can be used to set the threshold from which on a
document is considered "relevant". It defaults to 1, so in case of 0/1 ratings
the threshold doesn't have to be set and only ratings with value 0 are
considered to be irrelevant.
---
 .../index/rankeval/PrecisionAtN.java          | 37 ++++++++++++++----
 .../index/rankeval/ReciprocalRank.java        | 38 ++++++++++++++-----
 .../index/rankeval/PrecisionAtNTests.java     | 25 +++++++++++-
 .../index/rankeval/ReciprocalRankTests.java   | 25 ++++++++++++
 4 files changed, 106 insertions(+), 19 deletions(-)

diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java
index 5c49e0f16ea..9aa04185e35 100644
--- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java
+++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java
@@ -37,16 +37,30 @@ import javax.naming.directory.SearchResult;
 
 /**
  * Evaluate Precision at N, N being the number of search results to consider for precision calculation.
- *
  * Documents of unkonwn quality are ignored in the precision at n computation and returned by document id.
+ * By default documents with a rating equal or bigger than 1 are considered to be "relevant" for the precision
+ * calculation. This value can be changes using the "relevant_rating_threshold" parameter.
  * */
 public class PrecisionAtN extends RankedListQualityMetric {
 
     /** Number of results to check against a given set of relevant results. */
     private int n;
 
+    /** ratings equal or above this value will be considered relevant. */
+    private int relevantRatingThreshhold = 1;
+
     public static final String NAME = "precisionatn";
 
+    private static final ParseField SIZE_FIELD = new ParseField("size");
+    private static final ParseField RELEVANT_RATING_FIELD = new ParseField("relevant_rating_threshold");
+    private static final ConstructingObjectParser<PrecisionAtN, ParseFieldMatcherSupplier> PARSER = new ConstructingObjectParser<>(
+            "precision_at", a -> new PrecisionAtN((Integer) a[0]));
+
+    static {
+        PARSER.declareInt(ConstructingObjectParser.constructorArg(), SIZE_FIELD);
+        PARSER.declareInt(PrecisionAtN::setRelevantRatingThreshhold, RELEVANT_RATING_FIELD);
+    }
+
     public PrecisionAtN(StreamInput in) throws IOException {
         n = in.readInt();
     }
@@ -82,12 +96,19 @@ public class PrecisionAtN extends RankedListQualityMetric {
         return n;
     }
 
-    private static final ParseField SIZE_FIELD = new ParseField("size");
-    private static final ConstructingObjectParser<PrecisionAtN, ParseFieldMatcherSupplier> PARSER = new ConstructingObjectParser<>(
-            "precision_at", a -> new PrecisionAtN((Integer) a[0]));
+    /**
+     * Sets the rating threshold above which ratings are considered to be "relevant" for this metric.
+     * */
+    public void setRelevantRatingThreshhold(int threshold) {
+        this.relevantRatingThreshhold = threshold;
+    }
 
-    static {
-        PARSER.declareInt(ConstructingObjectParser.constructorArg(), SIZE_FIELD);
+    /**
+     * Return the rating threshold above which ratings are considered to be "relevant" for this metric.
+     * Defaults to 1.
+     * */
+    public int getRelevantRatingThreshold() {
+        return relevantRatingThreshhold ;
     }
 
     public static PrecisionAtN fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) {
@@ -103,9 +124,9 @@ public class PrecisionAtN extends RankedListQualityMetric {
         Collection<RatedDocumentKey> relevantDocIds = new ArrayList<>();
         Collection<RatedDocumentKey> irrelevantDocIds = new ArrayList<>();
         for (RatedDocument doc : ratedDocs) {
-            if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
+            if (doc.getRating() >= this.relevantRatingThreshhold) {
                 relevantDocIds.add(doc.getKey());
-            } else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
+            } else {
                 irrelevantDocIds.add(doc.getKey());
             }
         }
diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java
index dd4e710859b..3279c40734c 100644
--- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java
+++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java
@@ -26,8 +26,6 @@ import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.xcontent.ObjectParser;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.index.rankeval.PrecisionAtN.Rating;
-import org.elasticsearch.index.rankeval.PrecisionAtN.RatingMapping;
 import org.elasticsearch.search.SearchHit;
 
 import java.io.IOException;
@@ -41,6 +39,8 @@ import javax.naming.directory.SearchResult;
 
 /**
  * Evaluate reciprocal rank.
+ * By default documents with a rating equal or bigger than 1 are considered to be "relevant" for the reciprocal rank
+ * calculation. This value can be changes using the "relevant_rating_threshold" parameter.
  * */
 public class ReciprocalRank extends RankedListQualityMetric {
 
@@ -48,6 +48,9 @@ public class ReciprocalRank extends RankedListQualityMetric {
     public static final int DEFAULT_MAX_ACCEPTABLE_RANK = 10;
     private int maxAcceptableRank = DEFAULT_MAX_ACCEPTABLE_RANK;
 
+    /** ratings equal or above this value will be considered relevant. */
+    private int relevantRatingThreshhold = 1;
+
     /**
      * Initializes maxAcceptableRank with 10
      */
@@ -90,6 +93,21 @@ public class ReciprocalRank extends RankedListQualityMetric {
         return this.maxAcceptableRank;
     }
 
+    /**
+     * Sets the rating threshold above which ratings are considered to be "relevant" for this metric.
+     * */
+    public void setRelevantRatingThreshhold(int threshold) {
+        this.relevantRatingThreshhold = threshold;
+    }
+
+    /**
+     * Return the rating threshold above which ratings are considered to be "relevant" for this metric.
+     * Defaults to 1.
+     * */
+    public int getRelevantRatingThreshold() {
+        return relevantRatingThreshhold ;
+    }
+
     /**
      * Compute ReciprocalRank based on provided relevant document IDs.
      * @return reciprocal Rank for above {@link SearchResult} list.
@@ -99,9 +117,9 @@ public class ReciprocalRank extends RankedListQualityMetric {
         Set<RatedDocumentKey> relevantDocIds = new HashSet<>();
         Set<RatedDocumentKey> irrelevantDocIds = new HashSet<>();
         for (RatedDocument doc : ratedDocs) {
-            if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
+            if (doc.getRating() >= this.relevantRatingThreshhold) {
                 relevantDocIds.add(doc.getKey());
-            } else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
+            } else {
                 irrelevantDocIds.add(doc.getKey());
             }
         }
@@ -110,16 +128,14 @@ public class ReciprocalRank extends RankedListQualityMetric {
         int firstRelevant = -1;
         boolean found = false;
         for (int i = 0; i < hits.length; i++) {
-            // TODO here we use index/type/id triple not for a rated document but an unrated document in the search hits. Maybe rename?
-            RatedDocumentKey id = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId());
-            if (relevantDocIds.contains(id)) {
+            RatedDocumentKey key = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId());
+            if (relevantDocIds.contains(key)) {
                 if (found == false && i < maxAcceptableRank) {
-                    firstRelevant = i + 1; // add one because rank is not
-                                           // 0-based
+                    firstRelevant = i + 1; // add one because rank is not 0-based
                     found = true;
                 }
             } else {
-                unknownDocIds.add(id);
+                unknownDocIds.add(key);
             }
         }
 
@@ -133,11 +149,13 @@ public class ReciprocalRank extends RankedListQualityMetric {
     }
 
     private static final ParseField MAX_RANK_FIELD = new ParseField("max_acceptable_rank");
+    private static final ParseField RELEVANT_RATING_FIELD = new ParseField("relevant_rating_threshold");
     private static final ObjectParser<ReciprocalRank, ParseFieldMatcherSupplier> PARSER = new ObjectParser<>(
             "reciprocal_rank", () -> new ReciprocalRank());
 
     static {
         PARSER.declareInt(ReciprocalRank::setMaxAcceptableRank, MAX_RANK_FIELD);
+        PARSER.declareInt(ReciprocalRank::setRelevantRatingThreshhold, RELEVANT_RATING_FIELD);
     }
 
     public static ReciprocalRank fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) {
diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java
index d668b21630e..381ddc10023 100644
--- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java
+++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java
@@ -64,6 +64,27 @@ public class PrecisionAtNTests extends ESTestCase {
         assertEquals((double) 4 / 5, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001);
     }
 
+    /**
+     * test that the relevant rating threshold can be set to something larger than 1.
+     * e.g. we set it to 2 here and expect dics 0-2 to be not relevant, doc 3 and 4 to be relevant
+     */
+    public void testPrecisionAtFiveRelevanceThreshold() throws IOException, InterruptedException, ExecutionException {
+        List<RatedDocument> rated = new ArrayList<>();
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "0"), 0));
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "1"), 1));
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), 2));
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), 3));
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), 4));
+        InternalSearchHit[] hits = new InternalSearchHit[5];
+        for (int i = 0; i < 5; i++) {
+            hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap());
+            hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0));
+        }
+        PrecisionAtN precisionAtN = new PrecisionAtN(5);
+        precisionAtN.setRelevantRatingThreshhold(2);
+        assertEquals((double) 3 / 5, precisionAtN.evaluate(hits, rated).getQualityLevel(), 0.00001);
+    }
+
     public void testPrecisionAtFiveCorrectIndex() throws IOException, InterruptedException, ExecutionException {
         List<RatedDocument> rated = new ArrayList<>();
         rated.add(new RatedDocument(new RatedDocumentKey("test_other", "testtype", "0"), Rating.RELEVANT.ordinal()));
@@ -96,11 +117,13 @@ public class PrecisionAtNTests extends ESTestCase {
 
     public void testParseFromXContent() throws IOException {
         String xContent = " {\n"
-         + "   \"size\": 10\n"
+         + "   \"size\": 10,\n"
+         + "   \"relevant_rating_threshold\" : 2"
          + "}";
         XContentParser parser = XContentFactory.xContent(xContent).createParser(xContent);
         PrecisionAtN precicionAt = PrecisionAtN.fromXContent(parser, () -> ParseFieldMatcher.STRICT);
         assertEquals(10, precicionAt.getN());
+        assertEquals(2, precicionAt.getRelevantRatingThreshold());
     }
 
     public void testCombine() {
diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ReciprocalRankTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ReciprocalRankTests.java
index b524e763dc7..44c90d1eca3 100644
--- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ReciprocalRankTests.java
+++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ReciprocalRankTests.java
@@ -26,10 +26,12 @@ import org.elasticsearch.search.SearchShardTarget;
 import org.elasticsearch.search.internal.InternalSearchHit;
 import org.elasticsearch.test.ESTestCase;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Vector;
+import java.util.concurrent.ExecutionException;
 
 import static java.util.Collections.emptyList;
 
@@ -103,6 +105,29 @@ public class ReciprocalRankTests extends ESTestCase {
         assertEquals(1.0 / (relevantAt + 1), evaluation.getQualityLevel(), Double.MIN_VALUE);
     }
 
+    /**
+     * test that the relevant rating threshold can be set to something larger than 1.
+     * e.g. we set it to 2 here and expect dics 0-2 to be not relevant, so first relevant doc has
+     * third ranking position, so RR should be 1/3
+     */
+    public void testPrecisionAtFiveRelevanceThreshold() throws IOException, InterruptedException, ExecutionException {
+        List<RatedDocument> rated = new ArrayList<>();
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "0"), 0));
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "1"), 1));
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), 2));
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), 3));
+        rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), 4));
+        InternalSearchHit[] hits = new InternalSearchHit[5];
+        for (int i = 0; i < 5; i++) {
+            hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap());
+            hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0));
+        }
+
+        ReciprocalRank reciprocalRank = new ReciprocalRank();
+        reciprocalRank.setRelevantRatingThreshhold(2);
+        assertEquals((double) 1 / 3, reciprocalRank.evaluate(hits, rated).getQualityLevel(), 0.00001);
+    }
+
     public void testCombine() {
         ReciprocalRank reciprocalRank = new ReciprocalRank();
         Vector<EvalQueryQuality> partialResults = new Vector<>(3);