diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java index 54edd722126..98bc7f0d809 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java @@ -21,9 +21,11 @@ package org.elasticsearch.index.rankeval; import java.util.Collection; -/** Returned for each search specification. Summarizes the measured quality metric for this search request - * and adds the document ids found that were in the search result but not annotated in the original request. - * */ +/** + * Returned for each search specification. Summarizes the measured quality + * metric for this search request and adds the document ids found that were in + * the search result but not annotated in the original request. + */ public class EvalQueryQuality { private double qualityLevel; diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java index 829be8a9bc5..f36cca9cef5 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java @@ -58,6 +58,9 @@ public abstract class RankedListQualityMetric implements NamedWriteable { case PrecisionAtN.NAME: rc = PrecisionAtN.fromXContent(parser, context); break; + case ReciprocalRank.NAME: + rc = ReciprocalRank.fromXContent(parser, context); + break; default: throw new ParsingException(parser.getTokenLocation(), "[_na] unknown query metric name [{}]", metricName); } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java new file mode 100644 index 00000000000..31597507b27 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ReciprocalRank.java @@ -0,0 +1,98 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.ParseFieldMatcherSupplier; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.rankeval.PrecisionAtN.Rating; +import org.elasticsearch.index.rankeval.PrecisionAtN.RatingMapping; +import org.elasticsearch.search.SearchHit; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import javax.naming.directory.SearchResult; + +/** + * Evaluate reciprocal rank. + * */ +public class ReciprocalRank extends RankedListQualityMetric { + + public static final String NAME = "reciprocal_rank"; + // the rank to use if the result list does not contain any relevant document + // TODO decide on better default or make configurable + private static final int RANK_IF_NOT_FOUND = Integer.MAX_VALUE; + + @Override + public String getWriteableName() { + return NAME; + } + + /** + * Compute ReciprocalRank based on provided relevant document IDs. + * @return reciprocal Rank for above {@link SearchResult} list. + **/ + @Override + public EvalQueryQuality evaluate(SearchHit[] hits, List ratedDocs) { + Set relevantDocIds = new HashSet<>(); + Set irrelevantDocIds = new HashSet<>(); + for (RatedDocument doc : ratedDocs) { + if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) { + relevantDocIds.add(doc.getDocID()); + } else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) { + irrelevantDocIds.add(doc.getDocID()); + } + } + + Collection unknownDocIds = new ArrayList(); + int firstRelevant = RANK_IF_NOT_FOUND; + boolean found = false; + for (int i = 0; i < hits.length; i++) { + String id = hits[i].getId(); + if (relevantDocIds.contains(id) && found == false) { + firstRelevant = i + 1; // add one because rank is not 0-based + found = true; + continue; + } else if (irrelevantDocIds.contains(id) == false) { + unknownDocIds.add(id); + } + } + + double reciprocalRank = 1.0d / firstRelevant; + return new EvalQueryQuality(reciprocalRank, unknownDocIds); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + } + + private static final ObjectParser PARSER = new ObjectParser<>( + "reciprocal_rank", () -> new ReciprocalRank()); + + public static ReciprocalRank fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) { + return PARSER.apply(parser, matcher); + } +} diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/ReciprocalRankTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/ReciprocalRankTests.java new file mode 100644 index 00000000000..db16972e452 --- /dev/null +++ b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/ReciprocalRankTests.java @@ -0,0 +1,68 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.quality; + +import org.elasticsearch.common.text.Text; +import org.elasticsearch.index.rankeval.EvalQueryQuality; +import org.elasticsearch.index.rankeval.PrecisionAtN.Rating; +import org.elasticsearch.index.rankeval.RatedDocument; +import org.elasticsearch.index.rankeval.ReciprocalRank; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.internal.InternalSearchHit; +import org.elasticsearch.test.ESTestCase; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ReciprocalRankTests extends ESTestCase { + + public void testEvaluationOneRelevantInResults() { + ReciprocalRank reciprocalRank = new ReciprocalRank(); + SearchHit[] hits = new SearchHit[10]; + for (int i = 0; i < 10; i++) { + hits[i] = new InternalSearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap()); + } + List ratedDocs = new ArrayList<>(); + // mark one of the ten docs relevant + int relevantAt = randomIntBetween(0, 9); + for (int i = 0; i <= 20; i++) { + if (i == relevantAt) { + ratedDocs.add(new RatedDocument(Integer.toString(i), Rating.RELEVANT.ordinal())); + } else { + ratedDocs.add(new RatedDocument(Integer.toString(i), Rating.IRRELEVANT.ordinal())); + } + } + + EvalQueryQuality evaluation = reciprocalRank.evaluate(hits, ratedDocs); + assertEquals(1.0 / (relevantAt + 1), evaluation.getQualityLevel(), Double.MIN_VALUE); + } + + public void testEvaluationNoRelevantInResults() { + ReciprocalRank reciprocalRank = new ReciprocalRank(); + SearchHit[] hits = new SearchHit[10]; + for (int i = 0; i < 10; i++) { + hits[i] = new InternalSearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap()); + } + List ratedDocs = new ArrayList<>(); + EvalQueryQuality evaluation = reciprocalRank.evaluate(hits, ratedDocs); + assertEquals(1.0 / Integer.MAX_VALUE, evaluation.getQualityLevel(), Double.MIN_VALUE); + } +} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml index d3487082b6a..6707416101f 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml @@ -1,6 +1,12 @@ --- "Response format": - + - do: + indices.create: + index: foo + body: + settings: + index: + number_of_shards: 1 - do: index: index: foo @@ -55,3 +61,68 @@ - match: {rank_eval.quality_level: 1} - match: {rank_eval.unknown_docs.0.amsterdam_query: [ "doc4"]} - match: {rank_eval.unknown_docs.1.berlin_query: [ "doc4"]} +--- +"Reciprocal Rank": + + - do: + indices.create: + index: foo + body: + settings: + index: + number_of_shards: 1 + - do: + index: + index: foo + type: bar + id: doc1 + body: { "text": "berlin" } + + - do: + index: + index: foo + type: bar + id: doc2 + body: { "text": "amsterdam" } + + - do: + index: + index: foo + type: bar + id: doc3 + body: { "text": "amsterdam" } + + - do: + index: + index: foo + type: bar + id: doc4 + body: { "text": "something about amsterdam and berlin" } + + - do: + indices.refresh: {} + + - do: + rank_eval: + body: { + "spec_id" : "cities_qa_queries", + "requests" : [ + { + "id": "amsterdam_query", + "request": { "query": { "match" : {"text" : "amsterdam" }}}, + # doc4 should be returned in third position, so reciprocal rank is 1/3 + "ratings": [{ "doc4": 1}] + }, + { + "id" : "berlin_query", + "request": { "query": { "match" : { "text" : "berlin" } }, "size" : 10 }, + # doc1 should be returned in first position, doc3 in second, so reciprocal rank is 1/2 + "ratings": [{"doc4": 1}] + } + ], + "metric" : { "reciprocal_rank": {} } + } + + - match: {rank_eval.spec_id: "cities_qa_queries"} + # average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663 + - match: {rank_eval.quality_level: 0.41666666666666663}