mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 02:14:54 +00:00
Add threshold for document ratings for PrecisionAtN and ReciprocalRank
PrecisionAtN and ReciprocalRank are binary evaluation metrics by default that only distiguish between relevant/irrelevant search results. So far we assumed that relevant documents are labaled with 1 (irrelevant docs with 0) in the evaluation request, but this is cumbersome if the ratings are provided on a larger integer scale and would need to get mapped to a 0/1 value. This change introduces a threshold parameter on the PrecisionAtN and ReciprocalRank metric than can be used to set the threshold from which on a document is considered "relevant". It defaults to 1, so in case of 0/1 ratings the threshold doesn't have to be set and only ratings with value 0 are considered to be irrelevant.
This commit is contained in:
parent
8910864a18
commit
0b92d524a7
@ -37,16 +37,30 @@ import javax.naming.directory.SearchResult;
|
||||
|
||||
/**
|
||||
* Evaluate Precision at N, N being the number of search results to consider for precision calculation.
|
||||
*
|
||||
* Documents of unkonwn quality are ignored in the precision at n computation and returned by document id.
|
||||
* By default documents with a rating equal or bigger than 1 are considered to be "relevant" for the precision
|
||||
* calculation. This value can be changes using the "relevant_rating_threshold" parameter.
|
||||
* */
|
||||
public class PrecisionAtN extends RankedListQualityMetric {
|
||||
|
||||
/** Number of results to check against a given set of relevant results. */
|
||||
private int n;
|
||||
|
||||
/** ratings equal or above this value will be considered relevant. */
|
||||
private int relevantRatingThreshhold = 1;
|
||||
|
||||
public static final String NAME = "precisionatn";
|
||||
|
||||
private static final ParseField SIZE_FIELD = new ParseField("size");
|
||||
private static final ParseField RELEVANT_RATING_FIELD = new ParseField("relevant_rating_threshold");
|
||||
private static final ConstructingObjectParser<PrecisionAtN, ParseFieldMatcherSupplier> PARSER = new ConstructingObjectParser<>(
|
||||
"precision_at", a -> new PrecisionAtN((Integer) a[0]));
|
||||
|
||||
static {
|
||||
PARSER.declareInt(ConstructingObjectParser.constructorArg(), SIZE_FIELD);
|
||||
PARSER.declareInt(PrecisionAtN::setRelevantRatingThreshhold, RELEVANT_RATING_FIELD);
|
||||
}
|
||||
|
||||
public PrecisionAtN(StreamInput in) throws IOException {
|
||||
n = in.readInt();
|
||||
}
|
||||
@ -82,12 +96,19 @@ public class PrecisionAtN extends RankedListQualityMetric {
|
||||
return n;
|
||||
}
|
||||
|
||||
private static final ParseField SIZE_FIELD = new ParseField("size");
|
||||
private static final ConstructingObjectParser<PrecisionAtN, ParseFieldMatcherSupplier> PARSER = new ConstructingObjectParser<>(
|
||||
"precision_at", a -> new PrecisionAtN((Integer) a[0]));
|
||||
/**
|
||||
* Sets the rating threshold above which ratings are considered to be "relevant" for this metric.
|
||||
* */
|
||||
public void setRelevantRatingThreshhold(int threshold) {
|
||||
this.relevantRatingThreshhold = threshold;
|
||||
}
|
||||
|
||||
static {
|
||||
PARSER.declareInt(ConstructingObjectParser.constructorArg(), SIZE_FIELD);
|
||||
/**
|
||||
* Return the rating threshold above which ratings are considered to be "relevant" for this metric.
|
||||
* Defaults to 1.
|
||||
* */
|
||||
public int getRelevantRatingThreshold() {
|
||||
return relevantRatingThreshhold ;
|
||||
}
|
||||
|
||||
public static PrecisionAtN fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) {
|
||||
@ -103,9 +124,9 @@ public class PrecisionAtN extends RankedListQualityMetric {
|
||||
Collection<RatedDocumentKey> relevantDocIds = new ArrayList<>();
|
||||
Collection<RatedDocumentKey> irrelevantDocIds = new ArrayList<>();
|
||||
for (RatedDocument doc : ratedDocs) {
|
||||
if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
|
||||
if (doc.getRating() >= this.relevantRatingThreshhold) {
|
||||
relevantDocIds.add(doc.getKey());
|
||||
} else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
|
||||
} else {
|
||||
irrelevantDocIds.add(doc.getKey());
|
||||
}
|
||||
}
|
||||
|
@ -26,8 +26,6 @@ import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.rankeval.PrecisionAtN.Rating;
|
||||
import org.elasticsearch.index.rankeval.PrecisionAtN.RatingMapping;
|
||||
import org.elasticsearch.search.SearchHit;
|
||||
|
||||
import java.io.IOException;
|
||||
@ -41,6 +39,8 @@ import javax.naming.directory.SearchResult;
|
||||
|
||||
/**
|
||||
* Evaluate reciprocal rank.
|
||||
* By default documents with a rating equal or bigger than 1 are considered to be "relevant" for the reciprocal rank
|
||||
* calculation. This value can be changes using the "relevant_rating_threshold" parameter.
|
||||
* */
|
||||
public class ReciprocalRank extends RankedListQualityMetric {
|
||||
|
||||
@ -48,6 +48,9 @@ public class ReciprocalRank extends RankedListQualityMetric {
|
||||
public static final int DEFAULT_MAX_ACCEPTABLE_RANK = 10;
|
||||
private int maxAcceptableRank = DEFAULT_MAX_ACCEPTABLE_RANK;
|
||||
|
||||
/** ratings equal or above this value will be considered relevant. */
|
||||
private int relevantRatingThreshhold = 1;
|
||||
|
||||
/**
|
||||
* Initializes maxAcceptableRank with 10
|
||||
*/
|
||||
@ -90,6 +93,21 @@ public class ReciprocalRank extends RankedListQualityMetric {
|
||||
return this.maxAcceptableRank;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the rating threshold above which ratings are considered to be "relevant" for this metric.
|
||||
* */
|
||||
public void setRelevantRatingThreshhold(int threshold) {
|
||||
this.relevantRatingThreshhold = threshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the rating threshold above which ratings are considered to be "relevant" for this metric.
|
||||
* Defaults to 1.
|
||||
* */
|
||||
public int getRelevantRatingThreshold() {
|
||||
return relevantRatingThreshhold ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute ReciprocalRank based on provided relevant document IDs.
|
||||
* @return reciprocal Rank for above {@link SearchResult} list.
|
||||
@ -99,9 +117,9 @@ public class ReciprocalRank extends RankedListQualityMetric {
|
||||
Set<RatedDocumentKey> relevantDocIds = new HashSet<>();
|
||||
Set<RatedDocumentKey> irrelevantDocIds = new HashSet<>();
|
||||
for (RatedDocument doc : ratedDocs) {
|
||||
if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
|
||||
if (doc.getRating() >= this.relevantRatingThreshhold) {
|
||||
relevantDocIds.add(doc.getKey());
|
||||
} else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
|
||||
} else {
|
||||
irrelevantDocIds.add(doc.getKey());
|
||||
}
|
||||
}
|
||||
@ -110,16 +128,14 @@ public class ReciprocalRank extends RankedListQualityMetric {
|
||||
int firstRelevant = -1;
|
||||
boolean found = false;
|
||||
for (int i = 0; i < hits.length; i++) {
|
||||
// TODO here we use index/type/id triple not for a rated document but an unrated document in the search hits. Maybe rename?
|
||||
RatedDocumentKey id = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId());
|
||||
if (relevantDocIds.contains(id)) {
|
||||
RatedDocumentKey key = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId());
|
||||
if (relevantDocIds.contains(key)) {
|
||||
if (found == false && i < maxAcceptableRank) {
|
||||
firstRelevant = i + 1; // add one because rank is not
|
||||
// 0-based
|
||||
firstRelevant = i + 1; // add one because rank is not 0-based
|
||||
found = true;
|
||||
}
|
||||
} else {
|
||||
unknownDocIds.add(id);
|
||||
unknownDocIds.add(key);
|
||||
}
|
||||
}
|
||||
|
||||
@ -133,11 +149,13 @@ public class ReciprocalRank extends RankedListQualityMetric {
|
||||
}
|
||||
|
||||
private static final ParseField MAX_RANK_FIELD = new ParseField("max_acceptable_rank");
|
||||
private static final ParseField RELEVANT_RATING_FIELD = new ParseField("relevant_rating_threshold");
|
||||
private static final ObjectParser<ReciprocalRank, ParseFieldMatcherSupplier> PARSER = new ObjectParser<>(
|
||||
"reciprocal_rank", () -> new ReciprocalRank());
|
||||
|
||||
static {
|
||||
PARSER.declareInt(ReciprocalRank::setMaxAcceptableRank, MAX_RANK_FIELD);
|
||||
PARSER.declareInt(ReciprocalRank::setRelevantRatingThreshhold, RELEVANT_RATING_FIELD);
|
||||
}
|
||||
|
||||
public static ReciprocalRank fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) {
|
||||
|
@ -64,6 +64,27 @@ public class PrecisionAtNTests extends ESTestCase {
|
||||
assertEquals((double) 4 / 5, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001);
|
||||
}
|
||||
|
||||
/**
|
||||
* test that the relevant rating threshold can be set to something larger than 1.
|
||||
* e.g. we set it to 2 here and expect dics 0-2 to be not relevant, doc 3 and 4 to be relevant
|
||||
*/
|
||||
public void testPrecisionAtFiveRelevanceThreshold() throws IOException, InterruptedException, ExecutionException {
|
||||
List<RatedDocument> rated = new ArrayList<>();
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "0"), 0));
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "1"), 1));
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), 2));
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), 3));
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), 4));
|
||||
InternalSearchHit[] hits = new InternalSearchHit[5];
|
||||
for (int i = 0; i < 5; i++) {
|
||||
hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap());
|
||||
hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0));
|
||||
}
|
||||
PrecisionAtN precisionAtN = new PrecisionAtN(5);
|
||||
precisionAtN.setRelevantRatingThreshhold(2);
|
||||
assertEquals((double) 3 / 5, precisionAtN.evaluate(hits, rated).getQualityLevel(), 0.00001);
|
||||
}
|
||||
|
||||
public void testPrecisionAtFiveCorrectIndex() throws IOException, InterruptedException, ExecutionException {
|
||||
List<RatedDocument> rated = new ArrayList<>();
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test_other", "testtype", "0"), Rating.RELEVANT.ordinal()));
|
||||
@ -96,11 +117,13 @@ public class PrecisionAtNTests extends ESTestCase {
|
||||
|
||||
public void testParseFromXContent() throws IOException {
|
||||
String xContent = " {\n"
|
||||
+ " \"size\": 10\n"
|
||||
+ " \"size\": 10,\n"
|
||||
+ " \"relevant_rating_threshold\" : 2"
|
||||
+ "}";
|
||||
XContentParser parser = XContentFactory.xContent(xContent).createParser(xContent);
|
||||
PrecisionAtN precicionAt = PrecisionAtN.fromXContent(parser, () -> ParseFieldMatcher.STRICT);
|
||||
assertEquals(10, precicionAt.getN());
|
||||
assertEquals(2, precicionAt.getRelevantRatingThreshold());
|
||||
}
|
||||
|
||||
public void testCombine() {
|
||||
|
@ -26,10 +26,12 @@ import org.elasticsearch.search.SearchShardTarget;
|
||||
import org.elasticsearch.search.internal.InternalSearchHit;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Vector;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import static java.util.Collections.emptyList;
|
||||
|
||||
@ -103,6 +105,29 @@ public class ReciprocalRankTests extends ESTestCase {
|
||||
assertEquals(1.0 / (relevantAt + 1), evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* test that the relevant rating threshold can be set to something larger than 1.
|
||||
* e.g. we set it to 2 here and expect dics 0-2 to be not relevant, so first relevant doc has
|
||||
* third ranking position, so RR should be 1/3
|
||||
*/
|
||||
public void testPrecisionAtFiveRelevanceThreshold() throws IOException, InterruptedException, ExecutionException {
|
||||
List<RatedDocument> rated = new ArrayList<>();
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "0"), 0));
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "1"), 1));
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), 2));
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), 3));
|
||||
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), 4));
|
||||
InternalSearchHit[] hits = new InternalSearchHit[5];
|
||||
for (int i = 0; i < 5; i++) {
|
||||
hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap());
|
||||
hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0));
|
||||
}
|
||||
|
||||
ReciprocalRank reciprocalRank = new ReciprocalRank();
|
||||
reciprocalRank.setRelevantRatingThreshhold(2);
|
||||
assertEquals((double) 1 / 3, reciprocalRank.evaluate(hits, rated).getQualityLevel(), 0.00001);
|
||||
}
|
||||
|
||||
public void testCombine() {
|
||||
ReciprocalRank reciprocalRank = new ReciprocalRank();
|
||||
Vector<EvalQueryQuality> partialResults = new Vector<>(3);
|
||||
|
Loading…
x
Reference in New Issue
Block a user