Rename ranking evaluation response section (#32166)
Currently the ranking evaluation response contains a 'unknown_docs' section for each search use case in the evaluation set. It contains document ids for results in the search hits that currently don't have a quality rating. This change renames it to `unrated_docs`, which better reflects its purpose.
This commit is contained in:
parent
c5cde96691
commit
5cbd9ad177
|
@ -40,7 +40,7 @@ import java.util.Map;
|
|||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
|
||||
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;
|
||||
|
||||
public class RankEvalIT extends ESRestHighLevelClientTestCase {
|
||||
|
||||
|
@ -84,7 +84,7 @@ public class RankEvalIT extends ESRestHighLevelClientTestCase {
|
|||
Map<String, EvalQueryQuality> partialResults = response.getPartialResults();
|
||||
assertEquals(2, partialResults.size());
|
||||
EvalQueryQuality amsterdamQueryQuality = partialResults.get("amsterdam_query");
|
||||
assertEquals(2, filterUnknownDocuments(amsterdamQueryQuality.getHitsAndRatings()).size());
|
||||
assertEquals(2, filterUnratedDocuments(amsterdamQueryQuality.getHitsAndRatings()).size());
|
||||
List<RatedSearchHit> hitsAndRatings = amsterdamQueryQuality.getHitsAndRatings();
|
||||
assertEquals(7, hitsAndRatings.size());
|
||||
for (RatedSearchHit hit : hitsAndRatings) {
|
||||
|
@ -96,7 +96,7 @@ public class RankEvalIT extends ESRestHighLevelClientTestCase {
|
|||
}
|
||||
}
|
||||
EvalQueryQuality berlinQueryQuality = partialResults.get("berlin_query");
|
||||
assertEquals(6, filterUnknownDocuments(berlinQueryQuality.getHitsAndRatings()).size());
|
||||
assertEquals(6, filterUnratedDocuments(berlinQueryQuality.getHitsAndRatings()).size());
|
||||
hitsAndRatings = berlinQueryQuality.getHitsAndRatings();
|
||||
assertEquals(7, hitsAndRatings.size());
|
||||
for (RatedSearchHit hit : hitsAndRatings) {
|
||||
|
|
|
@ -274,7 +274,7 @@ that shows potential errors of individual queries. The response has the followin
|
|||
"details": {
|
||||
"my_query_id1": { <2>
|
||||
"quality_level": 0.6, <3>
|
||||
"unknown_docs": [ <4>
|
||||
"unrated_docs": [ <4>
|
||||
{
|
||||
"_index": "my_index",
|
||||
"_id": "1960795"
|
||||
|
@ -309,7 +309,7 @@ that shows potential errors of individual queries. The response has the followin
|
|||
<1> the overall evaluation quality calculated by the defined metric
|
||||
<2> the `details` section contains one entry for every query in the original `requests` section, keyed by the search request id
|
||||
<3> the `quality_level` in the `details` section shows the contribution of this query to the global quality score
|
||||
<4> the `unknown_docs` section contains an `_index` and `_id` entry for each document in the search result for this
|
||||
<4> the `unrated_docs` section contains an `_index` and `_id` entry for each document in the search result for this
|
||||
query that didn't have a ratings value. This can be used to ask the user to supply ratings for these documents
|
||||
<5> the `hits` section shows a grouping of the search results with their supplied rating
|
||||
<6> the `metric_details` give additional information about the calculated quality metric (e.g. how many of the retrieved
|
||||
|
|
|
@ -102,8 +102,8 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject(queryId);
|
||||
builder.field(QUALITY_LEVEL_FIELD.getPreferredName(), this.evaluationResult);
|
||||
builder.startArray(UNKNOWN_DOCS_FIELD.getPreferredName());
|
||||
for (DocumentKey key : EvaluationMetric.filterUnknownDocuments(ratedHits)) {
|
||||
builder.startArray(UNRATED_DOCS_FIELD.getPreferredName());
|
||||
for (DocumentKey key : EvaluationMetric.filterUnratedDocuments(ratedHits)) {
|
||||
builder.startObject();
|
||||
builder.field(RatedDocument.INDEX_FIELD.getPreferredName(), key.getIndex());
|
||||
builder.field(RatedDocument.DOC_ID_FIELD.getPreferredName(), key.getDocId());
|
||||
|
@ -123,7 +123,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
}
|
||||
|
||||
private static final ParseField QUALITY_LEVEL_FIELD = new ParseField("quality_level");
|
||||
private static final ParseField UNKNOWN_DOCS_FIELD = new ParseField("unknown_docs");
|
||||
private static final ParseField UNRATED_DOCS_FIELD = new ParseField("unrated_docs");
|
||||
private static final ParseField HITS_FIELD = new ParseField("hits");
|
||||
private static final ParseField METRIC_DETAILS_FIELD = new ParseField("metric_details");
|
||||
private static final ObjectParser<ParsedEvalQueryQuality, Void> PARSER = new ObjectParser<>("eval_query_quality",
|
||||
|
|
|
@ -76,10 +76,9 @@ public interface EvaluationMetric extends ToXContentObject, NamedWriteable {
|
|||
/**
|
||||
* filter @link {@link RatedSearchHit} that don't have a rating
|
||||
*/
|
||||
static List<DocumentKey> filterUnknownDocuments(List<RatedSearchHit> ratedHits) {
|
||||
List<DocumentKey> unknownDocs = ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false)
|
||||
static List<DocumentKey> filterUnratedDocuments(List<RatedSearchHit> ratedHits) {
|
||||
return ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false)
|
||||
.map(hit -> new DocumentKey(hit.getSearchHit().getIndex(), hit.getSearchHit().getId())).collect(Collectors.toList());
|
||||
return unknownDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -40,7 +40,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
|
||||
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;
|
||||
import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
|
||||
import static org.elasticsearch.test.XContentTestUtils.insertRandomFields;
|
||||
import static org.hamcrest.CoreMatchers.containsString;
|
||||
|
@ -128,7 +128,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
|
||||
EvalQueryQuality result = dcg.evaluate("id", hits, rated);
|
||||
assertEquals(12.779642067948913, result.getQualityLevel(), DELTA);
|
||||
assertEquals(2, filterUnknownDocuments(result.getHitsAndRatings()).size());
|
||||
assertEquals(2, filterUnratedDocuments(result.getHitsAndRatings()).size());
|
||||
|
||||
/**
|
||||
* Check with normalization: to get the maximal possible dcg, sort documents by
|
||||
|
@ -185,7 +185,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
|
||||
EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(12.392789260714371, result.getQualityLevel(), DELTA);
|
||||
assertEquals(1, filterUnknownDocuments(result.getHitsAndRatings()).size());
|
||||
assertEquals(1, filterUnratedDocuments(result.getHitsAndRatings()).size());
|
||||
|
||||
/**
|
||||
* Check with normalization: to get the maximal possible dcg, sort documents by
|
||||
|
@ -224,13 +224,13 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
|
||||
EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(0.0d, result.getQualityLevel(), DELTA);
|
||||
assertEquals(0, filterUnknownDocuments(result.getHitsAndRatings()).size());
|
||||
assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size());
|
||||
|
||||
// also check normalized
|
||||
dcg = new DiscountedCumulativeGain(true, null, 10);
|
||||
result = dcg.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(0.0d, result.getQualityLevel(), DELTA);
|
||||
assertEquals(0, filterUnknownDocuments(result.getHitsAndRatings()).size());
|
||||
assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size());
|
||||
}
|
||||
|
||||
public void testParseFromXContent() throws IOException {
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.elasticsearch.common.xcontent.ToXContent;
|
|||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.rankeval.RatedDocument.DocumentKey;
|
||||
import org.elasticsearch.search.SearchShardTarget;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
|
@ -52,11 +51,6 @@ public class EvalQueryQualityTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public static EvalQueryQuality randomEvalQueryQuality() {
|
||||
List<DocumentKey> unknownDocs = new ArrayList<>();
|
||||
int numberOfUnknownDocs = randomInt(5);
|
||||
for (int i = 0; i < numberOfUnknownDocs; i++) {
|
||||
unknownDocs.add(new DocumentKey(randomAlphaOfLength(10), randomAlphaOfLength(10)));
|
||||
}
|
||||
int numberOfSearchHits = randomInt(5);
|
||||
List<RatedSearchHit> ratedHits = new ArrayList<>();
|
||||
for (int i = 0; i < numberOfSearchHits; i++) {
|
||||
|
|
|
@ -40,7 +40,7 @@ import java.util.List;
|
|||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
|
||||
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
|
@ -120,7 +120,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
|||
for (Entry<String, EvalQueryQuality> entry : entrySet) {
|
||||
EvalQueryQuality quality = entry.getValue();
|
||||
if (entry.getKey() == "amsterdam_query") {
|
||||
assertEquals(2, filterUnknownDocuments(quality.getHitsAndRatings()).size());
|
||||
assertEquals(2, filterUnratedDocuments(quality.getHitsAndRatings()).size());
|
||||
List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
|
||||
assertEquals(6, hitsAndRatings.size());
|
||||
for (RatedSearchHit hit : hitsAndRatings) {
|
||||
|
@ -133,7 +133,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
|||
}
|
||||
}
|
||||
if (entry.getKey() == "berlin_query") {
|
||||
assertEquals(5, filterUnknownDocuments(quality.getHitsAndRatings()).size());
|
||||
assertEquals(5, filterUnratedDocuments(quality.getHitsAndRatings()).size());
|
||||
List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
|
||||
assertEquals(6, hitsAndRatings.size());
|
||||
for (RatedSearchHit hit : hitsAndRatings) {
|
||||
|
|
|
@ -158,7 +158,7 @@ public class RankEvalResponseTests extends ESTestCase {
|
|||
" \"details\": {" +
|
||||
" \"coffee_query\": {" +
|
||||
" \"quality_level\": 0.1," +
|
||||
" \"unknown_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," +
|
||||
" \"unrated_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," +
|
||||
" \"hits\":[{\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"123\",\"_score\":1.0}," +
|
||||
" \"rating\":5}," +
|
||||
" {\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"456\",\"_score\":1.0}," +
|
||||
|
|
|
@ -73,7 +73,7 @@ setup:
|
|||
|
||||
- match: { quality_level: 1}
|
||||
- match: { details.amsterdam_query.quality_level: 1.0}
|
||||
- match: { details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc4"}]}
|
||||
- match: { details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]}
|
||||
- match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 2, "docs_retrieved": 2}}
|
||||
|
||||
- length: { details.amsterdam_query.hits: 3}
|
||||
|
@ -85,7 +85,7 @@ setup:
|
|||
- is_false: details.amsterdam_query.hits.2.rating
|
||||
|
||||
- match: { details.berlin_query.quality_level: 1.0}
|
||||
- match: { details.berlin_query.unknown_docs: [ {"_index": "foo", "_id": "doc4"}]}
|
||||
- match: { details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]}
|
||||
- match: { details.berlin_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
|
||||
- length: { details.berlin_query.hits: 2}
|
||||
- match: { details.berlin_query.hits.0.hit._id: "doc1" }
|
||||
|
@ -155,9 +155,9 @@ setup:
|
|||
- gt: {details.amsterdam_query.quality_level: 0.333}
|
||||
- lt: {details.amsterdam_query.quality_level: 0.334}
|
||||
- match: {details.amsterdam_query.metric_details.mean_reciprocal_rank: {"first_relevant": 3}}
|
||||
- match: {details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc2"},
|
||||
- match: {details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc2"},
|
||||
{"_index": "foo", "_id": "doc3"} ]}
|
||||
- match: {details.berlin_query.quality_level: 0.5}
|
||||
- match: {details.berlin_query.metric_details.mean_reciprocal_rank: {"first_relevant": 2}}
|
||||
- match: {details.berlin_query.unknown_docs: [ {"_index": "foo", "_id": "doc1"}]}
|
||||
- match: {details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc1"}]}
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@
|
|||
- lt: {quality_level: 13.848264 }
|
||||
- gt: {details.dcg_query.quality_level: 13.848263}
|
||||
- lt: {details.dcg_query.quality_level: 13.848264}
|
||||
- match: {details.dcg_query.unknown_docs: [ ]}
|
||||
- match: {details.dcg_query.unrated_docs: [ ]}
|
||||
|
||||
# reverse the order in which the results are returned (less relevant docs first)
|
||||
|
||||
|
@ -100,7 +100,7 @@
|
|||
- lt: {quality_level: 10.299675}
|
||||
- gt: {details.dcg_query_reverse.quality_level: 10.299674}
|
||||
- lt: {details.dcg_query_reverse.quality_level: 10.299675}
|
||||
- match: {details.dcg_query_reverse.unknown_docs: [ ]}
|
||||
- match: {details.dcg_query_reverse.unrated_docs: [ ]}
|
||||
|
||||
# if we mix both, we should get the average
|
||||
|
||||
|
@ -138,7 +138,7 @@
|
|||
- lt: {quality_level: 12.073970}
|
||||
- gt: {details.dcg_query.quality_level: 13.848263}
|
||||
- lt: {details.dcg_query.quality_level: 13.848264}
|
||||
- match: {details.dcg_query.unknown_docs: [ ]}
|
||||
- match: {details.dcg_query.unrated_docs: [ ]}
|
||||
- gt: {details.dcg_query_reverse.quality_level: 10.299674}
|
||||
- lt: {details.dcg_query_reverse.quality_level: 10.299675}
|
||||
- match: {details.dcg_query_reverse.unknown_docs: [ ]}
|
||||
- match: {details.dcg_query_reverse.unrated_docs: [ ]}
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
|
||||
- match: { quality_level: 1}
|
||||
- match: { details.amsterdam_query.quality_level: 1.0}
|
||||
- match: { details.amsterdam_query.unknown_docs: [ ]}
|
||||
- match: { details.amsterdam_query.unrated_docs: [ ]}
|
||||
- match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
|
||||
|
||||
- is_true: failures.invalid_query
|
||||
|
|
|
@ -85,7 +85,7 @@ setup:
|
|||
}
|
||||
|
||||
- match: {quality_level: 0.9}
|
||||
- match: {details.amsterdam_query.unknown_docs.0._id: "6"}
|
||||
- match: {details.amsterdam_query.unrated_docs.0._id: "6"}
|
||||
|
||||
---
|
||||
"Test illegal request parts":
|
||||
|
|
Loading…
Reference in New Issue