Add option for maximally allowed rank to Reciprocal Rank metric

This commit is contained in:
Christoph Büscher 2016-07-27 16:24:41 +02:00
parent ad87bacf91
commit 71c0d59e95
3 changed files with 119 additions and 13 deletions

View File

@ -19,7 +19,9 @@
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcherSupplier;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentParser;
@ -42,15 +44,51 @@ import javax.naming.directory.SearchResult;
public class ReciprocalRank extends RankedListQualityMetric {
public static final String NAME = "reciprocal_rank";
// the rank to use if the result list does not contain any relevant document
// TODO decide on better default or make configurable
private static final int RANK_IF_NOT_FOUND = Integer.MAX_VALUE;
public static final int DEFAULT_MAX_ACCEPTABLE_RANK = 10;
private int maxAcceptableRank = DEFAULT_MAX_ACCEPTABLE_RANK;
/**
* Initializes maxAcceptableRank with 10
*/
public ReciprocalRank() {
// use defaults
}
/**
* @param maxAcceptableRank
* maximal acceptable rank. Must be positive.
*/
public ReciprocalRank(int maxAcceptableRank) {
if (maxAcceptableRank <= 0) {
throw new IllegalArgumentException("maximal acceptable rank needs to be positive but was [" + maxAcceptableRank + "]");
}
this.maxAcceptableRank = maxAcceptableRank;
}
public ReciprocalRank(StreamInput in) throws IOException {
this.maxAcceptableRank = in.readInt();
}
@Override
public String getWriteableName() {
return NAME;
}
/**
* @param maxAcceptableRank
* maximal acceptable rank. Must be positive.
*/
public void setMaxAcceptableRank(int maxAcceptableRank) {
if (maxAcceptableRank <= 0) {
throw new IllegalArgumentException("maximal acceptable rank needs to be positive but was [" + maxAcceptableRank + "]");
}
this.maxAcceptableRank = maxAcceptableRank;
}
public int getMaxAcceptableRank() {
return this.maxAcceptableRank;
}
/**
* Compute ReciprocalRank based on provided relevant document IDs.
* @return reciprocal Rank for above {@link SearchResult} list.
@ -67,31 +105,39 @@ public class ReciprocalRank extends RankedListQualityMetric {
}
}
Collection<String> unknownDocIds = new ArrayList<String>();
int firstRelevant = RANK_IF_NOT_FOUND;
Collection<String> unknownDocIds = new ArrayList<>();
int firstRelevant = -1;
boolean found = false;
for (int i = 0; i < hits.length; i++) {
String id = hits[i].getId();
if (relevantDocIds.contains(id) && found == false) {
firstRelevant = i + 1; // add one because rank is not 0-based
found = true;
continue;
} else if (irrelevantDocIds.contains(id) == false) {
if (relevantDocIds.contains(id)) {
if (found == false && i < maxAcceptableRank) {
firstRelevant = i + 1; // add one because rank is not
// 0-based
found = true;
}
} else {
unknownDocIds.add(id);
}
}
double reciprocalRank = 1.0d / firstRelevant;
double reciprocalRank = (firstRelevant == -1) ? 0 : 1.0d / firstRelevant;
return new EvalQueryQuality(reciprocalRank, unknownDocIds);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(maxAcceptableRank);
}
private static final ParseField MAX_RANK_FIELD = new ParseField("max_acceptable_rank");
private static final ObjectParser<ReciprocalRank, ParseFieldMatcherSupplier> PARSER = new ObjectParser<>(
"reciprocal_rank", () -> new ReciprocalRank());
static {
PARSER.declareInt(ReciprocalRank::setMaxAcceptableRank, MAX_RANK_FIELD);
}
public static ReciprocalRank fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) {
return PARSER.apply(parser, matcher);
}

View File

@ -34,6 +34,37 @@ import java.util.List;
public class ReciprocalRankTests extends ESTestCase {
public void testMaxAcceptableRank() {
ReciprocalRank reciprocalRank = new ReciprocalRank();
assertEquals(ReciprocalRank.DEFAULT_MAX_ACCEPTABLE_RANK, reciprocalRank.getMaxAcceptableRank());
int maxRank = randomIntBetween(1, 100);
reciprocalRank.setMaxAcceptableRank(maxRank);
assertEquals(maxRank, reciprocalRank.getMaxAcceptableRank());
SearchHit[] hits = new SearchHit[10];
for (int i = 0; i < 10; i++) {
hits[i] = new InternalSearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap());
}
List<RatedDocument> ratedDocs = new ArrayList<>();
int relevantAt = 5;
for (int i = 0; i < 10; i++) {
if (i == relevantAt) {
ratedDocs.add(new RatedDocument(Integer.toString(i), Rating.RELEVANT.ordinal()));
} else {
ratedDocs.add(new RatedDocument(Integer.toString(i), Rating.IRRELEVANT.ordinal()));
}
}
int rankAtFirstRelevant = relevantAt + 1;
EvalQueryQuality evaluation = reciprocalRank.evaluate(hits, ratedDocs);
assertEquals(1.0 / rankAtFirstRelevant, evaluation.getQualityLevel(), Double.MIN_VALUE);
reciprocalRank = new ReciprocalRank(rankAtFirstRelevant - 1);
evaluation = reciprocalRank.evaluate(hits, ratedDocs);
assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE);
}
public void testEvaluationOneRelevantInResults() {
ReciprocalRank reciprocalRank = new ReciprocalRank();
SearchHit[] hits = new SearchHit[10];
@ -63,6 +94,6 @@ public class ReciprocalRankTests extends ESTestCase {
}
List<RatedDocument> ratedDocs = new ArrayList<>();
EvalQueryQuality evaluation = reciprocalRank.evaluate(hits, ratedDocs);
assertEquals(1.0 / Integer.MAX_VALUE, evaluation.getQualityLevel(), Double.MIN_VALUE);
assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE);
}
}

View File

@ -124,5 +124,34 @@
}
- match: {rank_eval.spec_id: "cities_qa_queries"}
# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
- match: {rank_eval.quality_level: 0.41666666666666663}
- do:
rank_eval:
body: {
"spec_id" : "cities_qa_queries",
"requests" : [
{
"id": "amsterdam_query",
"request": { "query": { "match" : {"text" : "amsterdam" }}},
# doc4 should be returned in third position, so reciprocal rank is 1/3
"ratings": [{ "doc4": 1}]
},
{
"id" : "berlin_query",
"request": { "query": { "match" : { "text" : "berlin" } }, "size" : 10 },
# doc1 should be returned in first position, doc3 in second, so reciprocal rank is 1/2
"ratings": [{"doc4": 1}]
}
],
"metric" : {
"reciprocal_rank": {
"max_acceptable_rank" : 2
}
}
}
- match: {rank_eval.spec_id: "cities_qa_queries"}
# average is (0 + 1/2)/2 = 1/4
- match: {rank_eval.quality_level: 0.25}