From 0b92d524a7d906c6f1ec8aa2af30c751b4117919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Fri, 2 Sep 2016 18:40:59 +0200 Subject: [PATCH] Add threshold for document ratings for PrecisionAtN and ReciprocalRank PrecisionAtN and ReciprocalRank are binary evaluation metrics by default that only distiguish between relevant/irrelevant search results. So far we assumed that relevant documents are labaled with 1 (irrelevant docs with 0) in the evaluation request, but this is cumbersome if the ratings are provided on a larger integer scale and would need to get mapped to a 0/1 value. This change introduces a threshold parameter on the PrecisionAtN and ReciprocalRank metric than can be used to set the threshold from which on a document is considered "relevant". It defaults to 1, so in case of 0/1 ratings the threshold doesn't have to be set and only ratings with value 0 are considered to be irrelevant. --- .../index/rankeval/PrecisionAtN.java | 37 ++++++++++++++---- .../index/rankeval/ReciprocalRank.java | 38 ++++++++++++++----- .../index/rankeval/PrecisionAtNTests.java | 25 +++++++++++- .../index/rankeval/ReciprocalRankTests.java | 25 ++++++++++++ 4 files changed, 106 insertions(+), 19 deletions(-) diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java index 5c49e0f16ea..9aa04185e35 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java @@ -37,16 +37,30 @@ import javax.naming.directory.SearchResult; /** * Evaluate Precision at N, N being the number of search results to consider for precision calculation. - * * Documents of unkonwn quality are ignored in the precision at n computation and returned by document id. + * By default documents with a rating equal or bigger than 1 are considered to be "relevant" for the precision + * calculation. This value can be changes using the "relevant_rating_threshold" parameter. * */ public class PrecisionAtN extends RankedListQualityMetric { /** Number of results to check against a given set of relevant results. */ private int n; + /** ratings equal or above this value will be considered relevant. */ + private int relevantRatingThreshhold = 1; + public static final String NAME = "precisionatn"; + private static final ParseField SIZE_FIELD = new ParseField("size"); + private static final ParseField RELEVANT_RATING_FIELD = new ParseField("relevant_rating_threshold"); + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "precision_at", a -> new PrecisionAtN((Integer) a[0])); + + static { + PARSER.declareInt(ConstructingObjectParser.constructorArg(), SIZE_FIELD); + PARSER.declareInt(PrecisionAtN::setRelevantRatingThreshhold, RELEVANT_RATING_FIELD); + } + public PrecisionAtN(StreamInput in) throws IOException { n = in.readInt(); } @@ -82,12 +96,19 @@ public class PrecisionAtN extends RankedListQualityMetric { return n; } - private static final ParseField SIZE_FIELD = new ParseField("size"); - private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( - "precision_at", a -> new PrecisionAtN((Integer) a[0])); + /** + * Sets the rating threshold above which ratings are considered to be "relevant" for this metric. + * */ + public void setRelevantRatingThreshhold(int threshold) { + this.relevantRatingThreshhold = threshold; + } - static { - PARSER.declareInt(ConstructingObjectParser.constructorArg(), SIZE_FIELD); + /** + * Return the rating threshold above which ratings are considered to be "relevant" for this metric. + * Defaults to 1. + * */ + public int getRelevantRatingThreshold() { + return relevantRatingThreshhold ; } public static PrecisionAtN fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) { @@ -103,9 +124,9 @@ public class PrecisionAtN extends RankedListQualityMetric { Collection relevantDocIds = new ArrayList<>(); Collection irrelevantDocIds = new ArrayList<>(); for (RatedDocument doc : ratedDocs) { - if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) { + if (doc.getRating() >= this.relevantRatingThreshhold) { relevantDocIds.add(doc.getKey()); - } else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) { + } else { irrelevantDocIds.add(doc.getKey()); } } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java index dd4e710859b..3279c40734c 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java @@ -26,8 +26,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.index.rankeval.PrecisionAtN.Rating; -import org.elasticsearch.index.rankeval.PrecisionAtN.RatingMapping; import org.elasticsearch.search.SearchHit; import java.io.IOException; @@ -41,6 +39,8 @@ import javax.naming.directory.SearchResult; /** * Evaluate reciprocal rank. + * By default documents with a rating equal or bigger than 1 are considered to be "relevant" for the reciprocal rank + * calculation. This value can be changes using the "relevant_rating_threshold" parameter. * */ public class ReciprocalRank extends RankedListQualityMetric { @@ -48,6 +48,9 @@ public class ReciprocalRank extends RankedListQualityMetric { public static final int DEFAULT_MAX_ACCEPTABLE_RANK = 10; private int maxAcceptableRank = DEFAULT_MAX_ACCEPTABLE_RANK; + /** ratings equal or above this value will be considered relevant. */ + private int relevantRatingThreshhold = 1; + /** * Initializes maxAcceptableRank with 10 */ @@ -90,6 +93,21 @@ public class ReciprocalRank extends RankedListQualityMetric { return this.maxAcceptableRank; } + /** + * Sets the rating threshold above which ratings are considered to be "relevant" for this metric. + * */ + public void setRelevantRatingThreshhold(int threshold) { + this.relevantRatingThreshhold = threshold; + } + + /** + * Return the rating threshold above which ratings are considered to be "relevant" for this metric. + * Defaults to 1. + * */ + public int getRelevantRatingThreshold() { + return relevantRatingThreshhold ; + } + /** * Compute ReciprocalRank based on provided relevant document IDs. * @return reciprocal Rank for above {@link SearchResult} list. @@ -99,9 +117,9 @@ public class ReciprocalRank extends RankedListQualityMetric { Set relevantDocIds = new HashSet<>(); Set irrelevantDocIds = new HashSet<>(); for (RatedDocument doc : ratedDocs) { - if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) { + if (doc.getRating() >= this.relevantRatingThreshhold) { relevantDocIds.add(doc.getKey()); - } else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) { + } else { irrelevantDocIds.add(doc.getKey()); } } @@ -110,16 +128,14 @@ public class ReciprocalRank extends RankedListQualityMetric { int firstRelevant = -1; boolean found = false; for (int i = 0; i < hits.length; i++) { - // TODO here we use index/type/id triple not for a rated document but an unrated document in the search hits. Maybe rename? - RatedDocumentKey id = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId()); - if (relevantDocIds.contains(id)) { + RatedDocumentKey key = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId()); + if (relevantDocIds.contains(key)) { if (found == false && i < maxAcceptableRank) { - firstRelevant = i + 1; // add one because rank is not - // 0-based + firstRelevant = i + 1; // add one because rank is not 0-based found = true; } } else { - unknownDocIds.add(id); + unknownDocIds.add(key); } } @@ -133,11 +149,13 @@ public class ReciprocalRank extends RankedListQualityMetric { } private static final ParseField MAX_RANK_FIELD = new ParseField("max_acceptable_rank"); + private static final ParseField RELEVANT_RATING_FIELD = new ParseField("relevant_rating_threshold"); private static final ObjectParser PARSER = new ObjectParser<>( "reciprocal_rank", () -> new ReciprocalRank()); static { PARSER.declareInt(ReciprocalRank::setMaxAcceptableRank, MAX_RANK_FIELD); + PARSER.declareInt(ReciprocalRank::setRelevantRatingThreshhold, RELEVANT_RATING_FIELD); } public static ReciprocalRank fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) { diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java index d668b21630e..381ddc10023 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java @@ -64,6 +64,27 @@ public class PrecisionAtNTests extends ESTestCase { assertEquals((double) 4 / 5, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001); } + /** + * test that the relevant rating threshold can be set to something larger than 1. + * e.g. we set it to 2 here and expect dics 0-2 to be not relevant, doc 3 and 4 to be relevant + */ + public void testPrecisionAtFiveRelevanceThreshold() throws IOException, InterruptedException, ExecutionException { + List rated = new ArrayList<>(); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "0"), 0)); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "1"), 1)); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), 2)); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), 3)); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), 4)); + InternalSearchHit[] hits = new InternalSearchHit[5]; + for (int i = 0; i < 5; i++) { + hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap()); + hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0)); + } + PrecisionAtN precisionAtN = new PrecisionAtN(5); + precisionAtN.setRelevantRatingThreshhold(2); + assertEquals((double) 3 / 5, precisionAtN.evaluate(hits, rated).getQualityLevel(), 0.00001); + } + public void testPrecisionAtFiveCorrectIndex() throws IOException, InterruptedException, ExecutionException { List rated = new ArrayList<>(); rated.add(new RatedDocument(new RatedDocumentKey("test_other", "testtype", "0"), Rating.RELEVANT.ordinal())); @@ -96,11 +117,13 @@ public class PrecisionAtNTests extends ESTestCase { public void testParseFromXContent() throws IOException { String xContent = " {\n" - + " \"size\": 10\n" + + " \"size\": 10,\n" + + " \"relevant_rating_threshold\" : 2" + "}"; XContentParser parser = XContentFactory.xContent(xContent).createParser(xContent); PrecisionAtN precicionAt = PrecisionAtN.fromXContent(parser, () -> ParseFieldMatcher.STRICT); assertEquals(10, precicionAt.getN()); + assertEquals(2, precicionAt.getRelevantRatingThreshold()); } public void testCombine() { diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ReciprocalRankTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ReciprocalRankTests.java index b524e763dc7..44c90d1eca3 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ReciprocalRankTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ReciprocalRankTests.java @@ -26,10 +26,12 @@ import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.test.ESTestCase; +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Vector; +import java.util.concurrent.ExecutionException; import static java.util.Collections.emptyList; @@ -103,6 +105,29 @@ public class ReciprocalRankTests extends ESTestCase { assertEquals(1.0 / (relevantAt + 1), evaluation.getQualityLevel(), Double.MIN_VALUE); } + /** + * test that the relevant rating threshold can be set to something larger than 1. + * e.g. we set it to 2 here and expect dics 0-2 to be not relevant, so first relevant doc has + * third ranking position, so RR should be 1/3 + */ + public void testPrecisionAtFiveRelevanceThreshold() throws IOException, InterruptedException, ExecutionException { + List rated = new ArrayList<>(); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "0"), 0)); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "1"), 1)); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), 2)); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), 3)); + rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), 4)); + InternalSearchHit[] hits = new InternalSearchHit[5]; + for (int i = 0; i < 5; i++) { + hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap()); + hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0)); + } + + ReciprocalRank reciprocalRank = new ReciprocalRank(); + reciprocalRank.setRelevantRatingThreshhold(2); + assertEquals((double) 1 / 3, reciprocalRank.evaluate(hits, rated).getQualityLevel(), 0.00001); + } + public void testCombine() { ReciprocalRank reciprocalRank = new ReciprocalRank(); Vector partialResults = new Vector<>(3);