diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java index 31597507b27..4162896ef7d 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java @@ -19,7 +19,9 @@ package org.elasticsearch.index.rankeval; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcherSupplier; +import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.XContentParser; @@ -42,15 +44,51 @@ import javax.naming.directory.SearchResult; public class ReciprocalRank extends RankedListQualityMetric { public static final String NAME = "reciprocal_rank"; - // the rank to use if the result list does not contain any relevant document - // TODO decide on better default or make configurable - private static final int RANK_IF_NOT_FOUND = Integer.MAX_VALUE; + public static final int DEFAULT_MAX_ACCEPTABLE_RANK = 10; + private int maxAcceptableRank = DEFAULT_MAX_ACCEPTABLE_RANK; + + /** + * Initializes maxAcceptableRank with 10 + */ + public ReciprocalRank() { + // use defaults + } + + /** + * @param maxAcceptableRank + * maximal acceptable rank. Must be positive. + */ + public ReciprocalRank(int maxAcceptableRank) { + if (maxAcceptableRank <= 0) { + throw new IllegalArgumentException("maximal acceptable rank needs to be positive but was [" + maxAcceptableRank + "]"); + } + this.maxAcceptableRank = maxAcceptableRank; + } + + public ReciprocalRank(StreamInput in) throws IOException { + this.maxAcceptableRank = in.readInt(); + } @Override public String getWriteableName() { return NAME; } + /** + * @param maxAcceptableRank + * maximal acceptable rank. Must be positive. + */ + public void setMaxAcceptableRank(int maxAcceptableRank) { + if (maxAcceptableRank <= 0) { + throw new IllegalArgumentException("maximal acceptable rank needs to be positive but was [" + maxAcceptableRank + "]"); + } + this.maxAcceptableRank = maxAcceptableRank; + } + + public int getMaxAcceptableRank() { + return this.maxAcceptableRank; + } + /** * Compute ReciprocalRank based on provided relevant document IDs. * @return reciprocal Rank for above {@link SearchResult} list. @@ -67,31 +105,39 @@ public class ReciprocalRank extends RankedListQualityMetric { } } - Collection unknownDocIds = new ArrayList(); - int firstRelevant = RANK_IF_NOT_FOUND; + Collection unknownDocIds = new ArrayList<>(); + int firstRelevant = -1; boolean found = false; for (int i = 0; i < hits.length; i++) { String id = hits[i].getId(); - if (relevantDocIds.contains(id) && found == false) { - firstRelevant = i + 1; // add one because rank is not 0-based - found = true; - continue; - } else if (irrelevantDocIds.contains(id) == false) { + if (relevantDocIds.contains(id)) { + if (found == false && i < maxAcceptableRank) { + firstRelevant = i + 1; // add one because rank is not + // 0-based + found = true; + } + } else { unknownDocIds.add(id); } } - double reciprocalRank = 1.0d / firstRelevant; + double reciprocalRank = (firstRelevant == -1) ? 0 : 1.0d / firstRelevant; return new EvalQueryQuality(reciprocalRank, unknownDocIds); } @Override public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(maxAcceptableRank); } + private static final ParseField MAX_RANK_FIELD = new ParseField("max_acceptable_rank"); private static final ObjectParser PARSER = new ObjectParser<>( "reciprocal_rank", () -> new ReciprocalRank()); + static { + PARSER.declareInt(ReciprocalRank::setMaxAcceptableRank, MAX_RANK_FIELD); + } + public static ReciprocalRank fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) { return PARSER.apply(parser, matcher); } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/ReciprocalRankTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/ReciprocalRankTests.java index db16972e452..d51b8074757 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/ReciprocalRankTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/ReciprocalRankTests.java @@ -34,6 +34,37 @@ import java.util.List; public class ReciprocalRankTests extends ESTestCase { + public void testMaxAcceptableRank() { + ReciprocalRank reciprocalRank = new ReciprocalRank(); + assertEquals(ReciprocalRank.DEFAULT_MAX_ACCEPTABLE_RANK, reciprocalRank.getMaxAcceptableRank()); + + int maxRank = randomIntBetween(1, 100); + reciprocalRank.setMaxAcceptableRank(maxRank); + assertEquals(maxRank, reciprocalRank.getMaxAcceptableRank()); + + SearchHit[] hits = new SearchHit[10]; + for (int i = 0; i < 10; i++) { + hits[i] = new InternalSearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap()); + } + List ratedDocs = new ArrayList<>(); + int relevantAt = 5; + for (int i = 0; i < 10; i++) { + if (i == relevantAt) { + ratedDocs.add(new RatedDocument(Integer.toString(i), Rating.RELEVANT.ordinal())); + } else { + ratedDocs.add(new RatedDocument(Integer.toString(i), Rating.IRRELEVANT.ordinal())); + } + } + + int rankAtFirstRelevant = relevantAt + 1; + EvalQueryQuality evaluation = reciprocalRank.evaluate(hits, ratedDocs); + assertEquals(1.0 / rankAtFirstRelevant, evaluation.getQualityLevel(), Double.MIN_VALUE); + + reciprocalRank = new ReciprocalRank(rankAtFirstRelevant - 1); + evaluation = reciprocalRank.evaluate(hits, ratedDocs); + assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE); + } + public void testEvaluationOneRelevantInResults() { ReciprocalRank reciprocalRank = new ReciprocalRank(); SearchHit[] hits = new SearchHit[10]; @@ -63,6 +94,6 @@ public class ReciprocalRankTests extends ESTestCase { } List ratedDocs = new ArrayList<>(); EvalQueryQuality evaluation = reciprocalRank.evaluate(hits, ratedDocs); - assertEquals(1.0 / Integer.MAX_VALUE, evaluation.getQualityLevel(), Double.MIN_VALUE); + assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE); } } diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml index 6707416101f..f2d6fbaf767 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml @@ -124,5 +124,34 @@ } - match: {rank_eval.spec_id: "cities_qa_queries"} - # average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663 +# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663 - match: {rank_eval.quality_level: 0.41666666666666663} + + - do: + rank_eval: + body: { + "spec_id" : "cities_qa_queries", + "requests" : [ + { + "id": "amsterdam_query", + "request": { "query": { "match" : {"text" : "amsterdam" }}}, + # doc4 should be returned in third position, so reciprocal rank is 1/3 + "ratings": [{ "doc4": 1}] + }, + { + "id" : "berlin_query", + "request": { "query": { "match" : { "text" : "berlin" } }, "size" : 10 }, + # doc1 should be returned in first position, doc3 in second, so reciprocal rank is 1/2 + "ratings": [{"doc4": 1}] + } + ], + "metric" : { + "reciprocal_rank": { + "max_acceptable_rank" : 2 + } + } + } + + - match: {rank_eval.spec_id: "cities_qa_queries"} +# average is (0 + 1/2)/2 = 1/4 + - match: {rank_eval.quality_level: 0.25}