Add option for maximally allowed rank to Reciprocal Rank metric
This commit is contained in:
parent
ad87bacf91
commit
71c0d59e95
|
@ -19,7 +19,9 @@
|
|||
|
||||
package org.elasticsearch.index.rankeval;
|
||||
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcherSupplier;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
@ -42,15 +44,51 @@ import javax.naming.directory.SearchResult;
|
|||
public class ReciprocalRank extends RankedListQualityMetric {
|
||||
|
||||
public static final String NAME = "reciprocal_rank";
|
||||
// the rank to use if the result list does not contain any relevant document
|
||||
// TODO decide on better default or make configurable
|
||||
private static final int RANK_IF_NOT_FOUND = Integer.MAX_VALUE;
|
||||
public static final int DEFAULT_MAX_ACCEPTABLE_RANK = 10;
|
||||
private int maxAcceptableRank = DEFAULT_MAX_ACCEPTABLE_RANK;
|
||||
|
||||
/**
|
||||
* Initializes maxAcceptableRank with 10
|
||||
*/
|
||||
public ReciprocalRank() {
|
||||
// use defaults
|
||||
}
|
||||
|
||||
/**
|
||||
* @param maxAcceptableRank
|
||||
* maximal acceptable rank. Must be positive.
|
||||
*/
|
||||
public ReciprocalRank(int maxAcceptableRank) {
|
||||
if (maxAcceptableRank <= 0) {
|
||||
throw new IllegalArgumentException("maximal acceptable rank needs to be positive but was [" + maxAcceptableRank + "]");
|
||||
}
|
||||
this.maxAcceptableRank = maxAcceptableRank;
|
||||
}
|
||||
|
||||
public ReciprocalRank(StreamInput in) throws IOException {
|
||||
this.maxAcceptableRank = in.readInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getWriteableName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param maxAcceptableRank
|
||||
* maximal acceptable rank. Must be positive.
|
||||
*/
|
||||
public void setMaxAcceptableRank(int maxAcceptableRank) {
|
||||
if (maxAcceptableRank <= 0) {
|
||||
throw new IllegalArgumentException("maximal acceptable rank needs to be positive but was [" + maxAcceptableRank + "]");
|
||||
}
|
||||
this.maxAcceptableRank = maxAcceptableRank;
|
||||
}
|
||||
|
||||
public int getMaxAcceptableRank() {
|
||||
return this.maxAcceptableRank;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute ReciprocalRank based on provided relevant document IDs.
|
||||
* @return reciprocal Rank for above {@link SearchResult} list.
|
||||
|
@ -67,31 +105,39 @@ public class ReciprocalRank extends RankedListQualityMetric {
|
|||
}
|
||||
}
|
||||
|
||||
Collection<String> unknownDocIds = new ArrayList<String>();
|
||||
int firstRelevant = RANK_IF_NOT_FOUND;
|
||||
Collection<String> unknownDocIds = new ArrayList<>();
|
||||
int firstRelevant = -1;
|
||||
boolean found = false;
|
||||
for (int i = 0; i < hits.length; i++) {
|
||||
String id = hits[i].getId();
|
||||
if (relevantDocIds.contains(id) && found == false) {
|
||||
firstRelevant = i + 1; // add one because rank is not 0-based
|
||||
found = true;
|
||||
continue;
|
||||
} else if (irrelevantDocIds.contains(id) == false) {
|
||||
if (relevantDocIds.contains(id)) {
|
||||
if (found == false && i < maxAcceptableRank) {
|
||||
firstRelevant = i + 1; // add one because rank is not
|
||||
// 0-based
|
||||
found = true;
|
||||
}
|
||||
} else {
|
||||
unknownDocIds.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
double reciprocalRank = 1.0d / firstRelevant;
|
||||
double reciprocalRank = (firstRelevant == -1) ? 0 : 1.0d / firstRelevant;
|
||||
return new EvalQueryQuality(reciprocalRank, unknownDocIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeVInt(maxAcceptableRank);
|
||||
}
|
||||
|
||||
private static final ParseField MAX_RANK_FIELD = new ParseField("max_acceptable_rank");
|
||||
private static final ObjectParser<ReciprocalRank, ParseFieldMatcherSupplier> PARSER = new ObjectParser<>(
|
||||
"reciprocal_rank", () -> new ReciprocalRank());
|
||||
|
||||
static {
|
||||
PARSER.declareInt(ReciprocalRank::setMaxAcceptableRank, MAX_RANK_FIELD);
|
||||
}
|
||||
|
||||
public static ReciprocalRank fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) {
|
||||
return PARSER.apply(parser, matcher);
|
||||
}
|
||||
|
|
|
@ -34,6 +34,37 @@ import java.util.List;
|
|||
|
||||
public class ReciprocalRankTests extends ESTestCase {
|
||||
|
||||
public void testMaxAcceptableRank() {
|
||||
ReciprocalRank reciprocalRank = new ReciprocalRank();
|
||||
assertEquals(ReciprocalRank.DEFAULT_MAX_ACCEPTABLE_RANK, reciprocalRank.getMaxAcceptableRank());
|
||||
|
||||
int maxRank = randomIntBetween(1, 100);
|
||||
reciprocalRank.setMaxAcceptableRank(maxRank);
|
||||
assertEquals(maxRank, reciprocalRank.getMaxAcceptableRank());
|
||||
|
||||
SearchHit[] hits = new SearchHit[10];
|
||||
for (int i = 0; i < 10; i++) {
|
||||
hits[i] = new InternalSearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap());
|
||||
}
|
||||
List<RatedDocument> ratedDocs = new ArrayList<>();
|
||||
int relevantAt = 5;
|
||||
for (int i = 0; i < 10; i++) {
|
||||
if (i == relevantAt) {
|
||||
ratedDocs.add(new RatedDocument(Integer.toString(i), Rating.RELEVANT.ordinal()));
|
||||
} else {
|
||||
ratedDocs.add(new RatedDocument(Integer.toString(i), Rating.IRRELEVANT.ordinal()));
|
||||
}
|
||||
}
|
||||
|
||||
int rankAtFirstRelevant = relevantAt + 1;
|
||||
EvalQueryQuality evaluation = reciprocalRank.evaluate(hits, ratedDocs);
|
||||
assertEquals(1.0 / rankAtFirstRelevant, evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
|
||||
reciprocalRank = new ReciprocalRank(rankAtFirstRelevant - 1);
|
||||
evaluation = reciprocalRank.evaluate(hits, ratedDocs);
|
||||
assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
}
|
||||
|
||||
public void testEvaluationOneRelevantInResults() {
|
||||
ReciprocalRank reciprocalRank = new ReciprocalRank();
|
||||
SearchHit[] hits = new SearchHit[10];
|
||||
|
@ -63,6 +94,6 @@ public class ReciprocalRankTests extends ESTestCase {
|
|||
}
|
||||
List<RatedDocument> ratedDocs = new ArrayList<>();
|
||||
EvalQueryQuality evaluation = reciprocalRank.evaluate(hits, ratedDocs);
|
||||
assertEquals(1.0 / Integer.MAX_VALUE, evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -124,5 +124,34 @@
|
|||
}
|
||||
|
||||
- match: {rank_eval.spec_id: "cities_qa_queries"}
|
||||
# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
|
||||
# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
|
||||
- match: {rank_eval.quality_level: 0.41666666666666663}
|
||||
|
||||
- do:
|
||||
rank_eval:
|
||||
body: {
|
||||
"spec_id" : "cities_qa_queries",
|
||||
"requests" : [
|
||||
{
|
||||
"id": "amsterdam_query",
|
||||
"request": { "query": { "match" : {"text" : "amsterdam" }}},
|
||||
# doc4 should be returned in third position, so reciprocal rank is 1/3
|
||||
"ratings": [{ "doc4": 1}]
|
||||
},
|
||||
{
|
||||
"id" : "berlin_query",
|
||||
"request": { "query": { "match" : { "text" : "berlin" } }, "size" : 10 },
|
||||
# doc1 should be returned in first position, doc3 in second, so reciprocal rank is 1/2
|
||||
"ratings": [{"doc4": 1}]
|
||||
}
|
||||
],
|
||||
"metric" : {
|
||||
"reciprocal_rank": {
|
||||
"max_acceptable_rank" : 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- match: {rank_eval.spec_id: "cities_qa_queries"}
|
||||
# average is (0 + 1/2)/2 = 1/4
|
||||
- match: {rank_eval.quality_level: 0.25}
|
||||
|
|
Loading…
Reference in New Issue