Rename ranking evaluation `quality_level` to `metric_score` (#32168)
The notion of "quality" is an overloaded term in the search ranking evaluation context. Its usually used to decribe certain levels of "good" vs. "bad" of a seach result with respect to the users information need. We currently report the result of the ranking evaluation as `quality_level` which is a bit missleading. This changes the response parameter name to `metric_score` which fits better.
This commit is contained in:
parent
1b1aa4ecff
commit
fe6bb75eb4
|
@ -80,7 +80,7 @@ public class RankEvalIT extends ESRestHighLevelClientTestCase {
|
|||
RankEvalResponse response = execute(rankEvalRequest, highLevelClient()::rankEval, highLevelClient()::rankEvalAsync);
|
||||
// the expected Prec@ for the first query is 5/7 and the expected Prec@ for the second is 1/7, divided by 2 to get the average
|
||||
double expectedPrecision = (1.0 / 7.0 + 5.0 / 7.0) / 2.0;
|
||||
assertEquals(expectedPrecision, response.getEvaluationResult(), Double.MIN_VALUE);
|
||||
assertEquals(expectedPrecision, response.getMetricScore(), Double.MIN_VALUE);
|
||||
Map<String, EvalQueryQuality> partialResults = response.getPartialResults();
|
||||
assertEquals(2, partialResults.size());
|
||||
EvalQueryQuality amsterdamQueryQuality = partialResults.get("amsterdam_query");
|
||||
|
|
|
@ -1136,14 +1136,14 @@ public class SearchDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||
// end::rank-eval-execute
|
||||
|
||||
// tag::rank-eval-response
|
||||
double evaluationResult = response.getEvaluationResult(); // <1>
|
||||
double evaluationResult = response.getMetricScore(); // <1>
|
||||
assertEquals(1.0 / 3.0, evaluationResult, 0.0);
|
||||
Map<String, EvalQueryQuality> partialResults =
|
||||
response.getPartialResults();
|
||||
EvalQueryQuality evalQuality =
|
||||
partialResults.get("kimchy_query"); // <2>
|
||||
assertEquals("kimchy_query", evalQuality.getId());
|
||||
double qualityLevel = evalQuality.getQualityLevel(); // <3>
|
||||
double qualityLevel = evalQuality.metricScore(); // <3>
|
||||
assertEquals(1.0 / 3.0, qualityLevel, 0.0);
|
||||
List<RatedSearchHit> hitsAndRatings = evalQuality.getHitsAndRatings();
|
||||
RatedSearchHit ratedSearchHit = hitsAndRatings.get(2);
|
||||
|
|
|
@ -270,10 +270,10 @@ that shows potential errors of individual queries. The response has the followin
|
|||
--------------------------------
|
||||
{
|
||||
"rank_eval": {
|
||||
"quality_level": 0.4, <1>
|
||||
"metric_score": 0.4, <1>
|
||||
"details": {
|
||||
"my_query_id1": { <2>
|
||||
"quality_level": 0.6, <3>
|
||||
"metric_score": 0.6, <3>
|
||||
"unrated_docs": [ <4>
|
||||
{
|
||||
"_index": "my_index",
|
||||
|
@ -308,7 +308,7 @@ that shows potential errors of individual queries. The response has the followin
|
|||
|
||||
<1> the overall evaluation quality calculated by the defined metric
|
||||
<2> the `details` section contains one entry for every query in the original `requests` section, keyed by the search request id
|
||||
<3> the `quality_level` in the `details` section shows the contribution of this query to the global quality score
|
||||
<3> the `metric_score` in the `details` section shows the contribution of this query to the global quality metric score
|
||||
<4> the `unrated_docs` section contains an `_index` and `_id` entry for each document in the search result for this
|
||||
query that didn't have a ratings value. This can be used to ask the user to supply ratings for these documents
|
||||
<5> the `hits` section shows a grouping of the search results with their supplied rating
|
||||
|
|
|
@ -126,8 +126,6 @@ public class DiscountedCumulativeGain implements EvaluationMetric {
|
|||
@Override
|
||||
public EvalQueryQuality evaluate(String taskId, SearchHit[] hits,
|
||||
List<RatedDocument> ratedDocs) {
|
||||
List<Integer> allRatings = ratedDocs.stream().mapToInt(RatedDocument::getRating).boxed()
|
||||
.collect(Collectors.toList());
|
||||
List<RatedSearchHit> ratedHits = joinHitsWithRatings(hits, ratedDocs);
|
||||
List<Integer> ratingsInSearchHits = new ArrayList<>(ratedHits.size());
|
||||
int unratedResults = 0;
|
||||
|
@ -144,6 +142,8 @@ public class DiscountedCumulativeGain implements EvaluationMetric {
|
|||
double idcg = 0;
|
||||
|
||||
if (normalize) {
|
||||
List<Integer> allRatings = ratedDocs.stream().mapToInt(RatedDocument::getRating).boxed()
|
||||
.collect(Collectors.toList());
|
||||
Collections.sort(allRatings, Comparator.nullsLast(Collections.reverseOrder()));
|
||||
idcg = computeDCG(allRatings.subList(0, Math.min(ratingsInSearchHits.size(), allRatings.size())));
|
||||
if (idcg != 0) {
|
||||
|
|
|
@ -41,19 +41,19 @@ import java.util.Objects;
|
|||
public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
||||
|
||||
private final String queryId;
|
||||
private final double evaluationResult;
|
||||
private final double metricScore;
|
||||
private MetricDetail optionalMetricDetails;
|
||||
private final List<RatedSearchHit> ratedHits;
|
||||
|
||||
public EvalQueryQuality(String id, double evaluationResult) {
|
||||
public EvalQueryQuality(String id, double metricScore) {
|
||||
this.queryId = id;
|
||||
this.evaluationResult = evaluationResult;
|
||||
this.metricScore = metricScore;
|
||||
this.ratedHits = new ArrayList<>();
|
||||
}
|
||||
|
||||
public EvalQueryQuality(StreamInput in) throws IOException {
|
||||
this.queryId = in.readString();
|
||||
this.evaluationResult = in.readDouble();
|
||||
this.metricScore = in.readDouble();
|
||||
this.ratedHits = in.readList(RatedSearchHit::new);
|
||||
this.optionalMetricDetails = in.readOptionalNamedWriteable(MetricDetail.class);
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
// only used for parsing internally
|
||||
private EvalQueryQuality(String queryId, ParsedEvalQueryQuality builder) {
|
||||
this.queryId = queryId;
|
||||
this.evaluationResult = builder.evaluationResult;
|
||||
this.metricScore = builder.evaluationResult;
|
||||
this.optionalMetricDetails = builder.optionalMetricDetails;
|
||||
this.ratedHits = builder.ratedHits;
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(queryId);
|
||||
out.writeDouble(evaluationResult);
|
||||
out.writeDouble(metricScore);
|
||||
out.writeList(ratedHits);
|
||||
out.writeOptionalNamedWriteable(this.optionalMetricDetails);
|
||||
}
|
||||
|
@ -78,8 +78,8 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
return queryId;
|
||||
}
|
||||
|
||||
public double getQualityLevel() {
|
||||
return evaluationResult;
|
||||
public double metricScore() {
|
||||
return metricScore;
|
||||
}
|
||||
|
||||
public void setMetricDetails(MetricDetail breakdown) {
|
||||
|
@ -101,7 +101,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject(queryId);
|
||||
builder.field(QUALITY_LEVEL_FIELD.getPreferredName(), this.evaluationResult);
|
||||
builder.field(METRIC_SCORE_FIELD.getPreferredName(), this.metricScore);
|
||||
builder.startArray(UNRATED_DOCS_FIELD.getPreferredName());
|
||||
for (DocumentKey key : EvaluationMetric.filterUnratedDocuments(ratedHits)) {
|
||||
builder.startObject();
|
||||
|
@ -122,7 +122,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
return builder;
|
||||
}
|
||||
|
||||
private static final ParseField QUALITY_LEVEL_FIELD = new ParseField("quality_level");
|
||||
static final ParseField METRIC_SCORE_FIELD = new ParseField("metric_score");
|
||||
private static final ParseField UNRATED_DOCS_FIELD = new ParseField("unrated_docs");
|
||||
private static final ParseField HITS_FIELD = new ParseField("hits");
|
||||
private static final ParseField METRIC_DETAILS_FIELD = new ParseField("metric_details");
|
||||
|
@ -136,7 +136,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
}
|
||||
|
||||
static {
|
||||
PARSER.declareDouble((obj, value) -> obj.evaluationResult = value, QUALITY_LEVEL_FIELD);
|
||||
PARSER.declareDouble((obj, value) -> obj.evaluationResult = value, METRIC_SCORE_FIELD);
|
||||
PARSER.declareObject((obj, value) -> obj.optionalMetricDetails = value, (p, c) -> parseMetricDetail(p),
|
||||
METRIC_DETAILS_FIELD);
|
||||
PARSER.declareObjectArray((obj, list) -> obj.ratedHits = list, (p, c) -> RatedSearchHit.parse(p), HITS_FIELD);
|
||||
|
@ -164,13 +164,13 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable {
|
|||
}
|
||||
EvalQueryQuality other = (EvalQueryQuality) obj;
|
||||
return Objects.equals(queryId, other.queryId) &&
|
||||
Objects.equals(evaluationResult, other.evaluationResult) &&
|
||||
Objects.equals(metricScore, other.metricScore) &&
|
||||
Objects.equals(ratedHits, other.ratedHits) &&
|
||||
Objects.equals(optionalMetricDetails, other.optionalMetricDetails);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
return Objects.hash(queryId, evaluationResult, ratedHits, optionalMetricDetails);
|
||||
return Objects.hash(queryId, metricScore, ratedHits, optionalMetricDetails);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,23 +39,22 @@ import java.util.stream.Collectors;
|
|||
public interface EvaluationMetric extends ToXContentObject, NamedWriteable {
|
||||
|
||||
/**
|
||||
* Returns a single metric representing the ranking quality of a set of returned
|
||||
* documents wrt. to a set of document ids labeled as relevant for this search.
|
||||
* Evaluates a single ranking evaluation case.
|
||||
*
|
||||
* @param taskId
|
||||
* the id of the query for which the ranking is currently evaluated
|
||||
* an identifier of the query for which the search ranking is
|
||||
* evaluated
|
||||
* @param hits
|
||||
* the result hits as returned by a search request
|
||||
* the search result hits
|
||||
* @param ratedDocs
|
||||
* the documents that were ranked by human annotators for this query
|
||||
* case
|
||||
* @return some metric representing the quality of the result hit list wrt. to
|
||||
* relevant doc ids.
|
||||
* the documents that contain the document rating for this query case
|
||||
* @return an {@link EvalQueryQuality} instance that contains the metric score
|
||||
* with respect to the provided search hits and ratings
|
||||
*/
|
||||
EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List<RatedDocument> ratedDocs);
|
||||
|
||||
/**
|
||||
* join hits with rated documents using the joint _index/_id document key
|
||||
* Joins hits with rated documents using the joint _index/_id document key.
|
||||
*/
|
||||
static List<RatedSearchHit> joinHitsWithRatings(SearchHit[] hits, List<RatedDocument> ratedDocs) {
|
||||
Map<DocumentKey, RatedDocument> ratedDocumentMap = ratedDocs.stream()
|
||||
|
@ -74,7 +73,7 @@ public interface EvaluationMetric extends ToXContentObject, NamedWriteable {
|
|||
}
|
||||
|
||||
/**
|
||||
* filter @link {@link RatedSearchHit} that don't have a rating
|
||||
* Filter {@link RatedSearchHit}s that do not have a rating.
|
||||
*/
|
||||
static List<DocumentKey> filterUnratedDocuments(List<RatedSearchHit> ratedHits) {
|
||||
return ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false)
|
||||
|
@ -82,11 +81,11 @@ public interface EvaluationMetric extends ToXContentObject, NamedWriteable {
|
|||
}
|
||||
|
||||
/**
|
||||
* how evaluation metrics for particular search queries get combined for the overall evaluation score.
|
||||
* Defaults to averaging over the partial results.
|
||||
* Combine several {@link EvalQueryQuality} results into the overall evaluation score.
|
||||
* This defaults to averaging over the partial results, but can be overwritten to obtain a different behavior.
|
||||
*/
|
||||
default double combine(Collection<EvalQueryQuality> partialResults) {
|
||||
return partialResults.stream().mapToDouble(EvalQueryQuality::getQualityLevel).sum() / partialResults.size();
|
||||
return partialResults.stream().mapToDouble(EvalQueryQuality::metricScore).sum() / partialResults.size();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -110,8 +110,7 @@ public class MeanReciprocalRank implements EvaluationMetric {
|
|||
* Compute ReciprocalRank based on provided relevant document IDs.
|
||||
**/
|
||||
@Override
|
||||
public EvalQueryQuality evaluate(String taskId, SearchHit[] hits,
|
||||
List<RatedDocument> ratedDocs) {
|
||||
public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List<RatedDocument> ratedDocs) {
|
||||
List<RatedSearchHit> ratedHits = joinHitsWithRatings(hits, ratedDocs);
|
||||
int firstRelevant = -1;
|
||||
int rank = 1;
|
||||
|
|
|
@ -48,15 +48,15 @@ import java.util.stream.Collectors;
|
|||
public class RankEvalResponse extends ActionResponse implements ToXContentObject {
|
||||
|
||||
/** The overall evaluation result. */
|
||||
private double evaluationResult;
|
||||
private double metricScore;
|
||||
/** details about individual ranking evaluation queries, keyed by their id */
|
||||
private Map<String, EvalQueryQuality> details;
|
||||
/** exceptions for specific ranking evaluation queries, keyed by their id */
|
||||
private Map<String, Exception> failures;
|
||||
|
||||
public RankEvalResponse(double qualityLevel, Map<String, EvalQueryQuality> partialResults,
|
||||
public RankEvalResponse(double metricScore, Map<String, EvalQueryQuality> partialResults,
|
||||
Map<String, Exception> failures) {
|
||||
this.evaluationResult = qualityLevel;
|
||||
this.metricScore = metricScore;
|
||||
this.details = new HashMap<>(partialResults);
|
||||
this.failures = new HashMap<>(failures);
|
||||
}
|
||||
|
@ -65,8 +65,8 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject
|
|||
// only used in RankEvalAction#newResponse()
|
||||
}
|
||||
|
||||
public double getEvaluationResult() {
|
||||
return evaluationResult;
|
||||
public double getMetricScore() {
|
||||
return metricScore;
|
||||
}
|
||||
|
||||
public Map<String, EvalQueryQuality> getPartialResults() {
|
||||
|
@ -85,7 +85,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject
|
|||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
super.writeTo(out);
|
||||
out.writeDouble(evaluationResult);
|
||||
out.writeDouble(metricScore);
|
||||
out.writeVInt(details.size());
|
||||
for (String queryId : details.keySet()) {
|
||||
out.writeString(queryId);
|
||||
|
@ -101,7 +101,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject
|
|||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
super.readFrom(in);
|
||||
this.evaluationResult = in.readDouble();
|
||||
this.metricScore = in.readDouble();
|
||||
int partialResultSize = in.readVInt();
|
||||
this.details = new HashMap<>(partialResultSize);
|
||||
for (int i = 0; i < partialResultSize; i++) {
|
||||
|
@ -120,7 +120,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject
|
|||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field("quality_level", evaluationResult);
|
||||
builder.field("metric_score", metricScore);
|
||||
builder.startObject("details");
|
||||
for (String key : details.keySet()) {
|
||||
details.get(key).toXContent(builder, params);
|
||||
|
@ -137,7 +137,6 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject
|
|||
return builder;
|
||||
}
|
||||
|
||||
private static final ParseField QUALITY_LEVEL_FIELD = new ParseField("quality_level");
|
||||
private static final ParseField DETAILS_FIELD = new ParseField("details");
|
||||
private static final ParseField FAILURES_FIELD = new ParseField("failures");
|
||||
@SuppressWarnings("unchecked")
|
||||
|
@ -147,7 +146,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject
|
|||
((List<EvalQueryQuality>) a[1]).stream().collect(Collectors.toMap(EvalQueryQuality::getId, Function.identity())),
|
||||
((List<Tuple<String, Exception>>) a[2]).stream().collect(Collectors.toMap(Tuple::v1, Tuple::v2))));
|
||||
static {
|
||||
PARSER.declareDouble(ConstructingObjectParser.constructorArg(), QUALITY_LEVEL_FIELD);
|
||||
PARSER.declareDouble(ConstructingObjectParser.constructorArg(), EvalQueryQuality.METRIC_SCORE_FIELD);
|
||||
PARSER.declareNamedObjects(ConstructingObjectParser.optionalConstructorArg(), (p, c, n) -> EvalQueryQuality.fromXContent(p, n),
|
||||
DETAILS_FIELD);
|
||||
PARSER.declareNamedObjects(ConstructingObjectParser.optionalConstructorArg(), (p, c, n) -> {
|
||||
|
|
|
@ -76,7 +76,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
hits[i].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null));
|
||||
}
|
||||
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
|
||||
assertEquals(EXPECTED_DCG, dcg.evaluate("id", hits, rated).getQualityLevel(), DELTA);
|
||||
assertEquals(EXPECTED_DCG, dcg.evaluate("id", hits, rated).metricScore(), DELTA);
|
||||
|
||||
/**
|
||||
* Check with normalization: to get the maximal possible dcg, sort documents by
|
||||
|
@ -94,7 +94,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
* idcg = 14.595390756454922 (sum of last column)
|
||||
*/
|
||||
dcg = new DiscountedCumulativeGain(true, null, 10);
|
||||
assertEquals(EXPECTED_NDCG, dcg.evaluate("id", hits, rated).getQualityLevel(), DELTA);
|
||||
assertEquals(EXPECTED_NDCG, dcg.evaluate("id", hits, rated).metricScore(), DELTA);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -127,7 +127,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
}
|
||||
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
|
||||
EvalQueryQuality result = dcg.evaluate("id", hits, rated);
|
||||
assertEquals(12.779642067948913, result.getQualityLevel(), DELTA);
|
||||
assertEquals(12.779642067948913, result.metricScore(), DELTA);
|
||||
assertEquals(2, filterUnratedDocuments(result.getHitsAndRatings()).size());
|
||||
|
||||
/**
|
||||
|
@ -146,7 +146,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
* idcg = 13.347184833073591 (sum of last column)
|
||||
*/
|
||||
dcg = new DiscountedCumulativeGain(true, null, 10);
|
||||
assertEquals(12.779642067948913 / 13.347184833073591, dcg.evaluate("id", hits, rated).getQualityLevel(), DELTA);
|
||||
assertEquals(12.779642067948913 / 13.347184833073591, dcg.evaluate("id", hits, rated).metricScore(), DELTA);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -184,7 +184,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
}
|
||||
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
|
||||
EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(12.392789260714371, result.getQualityLevel(), DELTA);
|
||||
assertEquals(12.392789260714371, result.metricScore(), DELTA);
|
||||
assertEquals(1, filterUnratedDocuments(result.getHitsAndRatings()).size());
|
||||
|
||||
/**
|
||||
|
@ -204,7 +204,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
* idcg = 13.347184833073591 (sum of last column)
|
||||
*/
|
||||
dcg = new DiscountedCumulativeGain(true, null, 10);
|
||||
assertEquals(12.392789260714371 / 13.347184833073591, dcg.evaluate("id", hits, ratedDocs).getQualityLevel(), DELTA);
|
||||
assertEquals(12.392789260714371 / 13.347184833073591, dcg.evaluate("id", hits, ratedDocs).metricScore(), DELTA);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -223,13 +223,13 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
|
|||
SearchHit[] hits = new SearchHit[0];
|
||||
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
|
||||
EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(0.0d, result.getQualityLevel(), DELTA);
|
||||
assertEquals(0.0d, result.metricScore(), DELTA);
|
||||
assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size());
|
||||
|
||||
// also check normalized
|
||||
dcg = new DiscountedCumulativeGain(true, null, 10);
|
||||
result = dcg.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(0.0d, result.getQualityLevel(), DELTA);
|
||||
assertEquals(0.0d, result.metricScore(), DELTA);
|
||||
assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size());
|
||||
}
|
||||
|
||||
|
|
|
@ -129,7 +129,7 @@ public class EvalQueryQualityTests extends ESTestCase {
|
|||
|
||||
private static EvalQueryQuality mutateTestItem(EvalQueryQuality original) {
|
||||
String id = original.getId();
|
||||
double qualityLevel = original.getQualityLevel();
|
||||
double metricScore = original.metricScore();
|
||||
List<RatedSearchHit> ratedHits = new ArrayList<>(original.getHitsAndRatings());
|
||||
MetricDetail metricDetails = original.getMetricDetails();
|
||||
switch (randomIntBetween(0, 3)) {
|
||||
|
@ -137,7 +137,7 @@ public class EvalQueryQualityTests extends ESTestCase {
|
|||
id = id + "_";
|
||||
break;
|
||||
case 1:
|
||||
qualityLevel = qualityLevel + 0.1;
|
||||
metricScore = metricScore + 0.1;
|
||||
break;
|
||||
case 2:
|
||||
if (metricDetails == null) {
|
||||
|
@ -152,7 +152,7 @@ public class EvalQueryQualityTests extends ESTestCase {
|
|||
default:
|
||||
throw new IllegalStateException("The test should only allow four parameters mutated");
|
||||
}
|
||||
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(id, qualityLevel);
|
||||
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(id, metricScore);
|
||||
evalQueryQuality.setMetricDetails(metricDetails);
|
||||
evalQueryQuality.addHitsAndRatings(ratedHits);
|
||||
return evalQueryQuality;
|
||||
|
|
|
@ -76,10 +76,10 @@ public class ExpectedReciprocalRankTests extends ESTestCase {
|
|||
Integer[] relevanceRatings = new Integer[] { 3, 2, 0, 1};
|
||||
SearchHit[] hits = createSearchHits(rated, relevanceRatings);
|
||||
ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, 0, 3);
|
||||
assertEquals(0.8984375, err.evaluate("id", hits, rated).getQualityLevel(), DELTA);
|
||||
assertEquals(0.8984375, err.evaluate("id", hits, rated).metricScore(), DELTA);
|
||||
// take 4th rank into window
|
||||
err = new ExpectedReciprocalRank(3, 0, 4);
|
||||
assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).getQualityLevel(), DELTA);
|
||||
assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).metricScore(), DELTA);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -102,11 +102,11 @@ public class ExpectedReciprocalRankTests extends ESTestCase {
|
|||
SearchHit[] hits = createSearchHits(rated, relevanceRatings);
|
||||
ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, null, 4);
|
||||
EvalQueryQuality evaluation = err.evaluate("id", hits, rated);
|
||||
assertEquals(0.875 + 0.00390625, evaluation.getQualityLevel(), DELTA);
|
||||
assertEquals(0.875 + 0.00390625, evaluation.metricScore(), DELTA);
|
||||
assertEquals(1, ((ExpectedReciprocalRank.Detail) evaluation.getMetricDetails()).getUnratedDocs());
|
||||
// if we supply e.g. 2 as unknown docs rating, it should be the same as in the other test above
|
||||
err = new ExpectedReciprocalRank(3, 2, 4);
|
||||
assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).getQualityLevel(), DELTA);
|
||||
assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).metricScore(), DELTA);
|
||||
}
|
||||
|
||||
private SearchHit[] createSearchHits(List<RatedDocument> rated, Integer[] relevanceRatings) {
|
||||
|
@ -126,7 +126,7 @@ public class ExpectedReciprocalRankTests extends ESTestCase {
|
|||
*/
|
||||
public void testNoResults() throws Exception {
|
||||
ExpectedReciprocalRank err = new ExpectedReciprocalRank(5, 0, 10);
|
||||
assertEquals(0.0, err.evaluate("id", new SearchHit[0], Collections.emptyList()).getQualityLevel(), DELTA);
|
||||
assertEquals(0.0, err.evaluate("id", new SearchHit[0], Collections.emptyList()).metricScore(), DELTA);
|
||||
}
|
||||
|
||||
public void testParseFromXContent() throws IOException {
|
||||
|
|
|
@ -95,14 +95,14 @@ public class MeanReciprocalRankTests extends ESTestCase {
|
|||
|
||||
int rankAtFirstRelevant = relevantAt + 1;
|
||||
EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(1.0 / rankAtFirstRelevant, evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
assertEquals(1.0 / rankAtFirstRelevant, evaluation.metricScore(), Double.MIN_VALUE);
|
||||
assertEquals(rankAtFirstRelevant, ((MeanReciprocalRank.Detail) evaluation.getMetricDetails()).getFirstRelevantRank());
|
||||
|
||||
// check that if we have fewer search hits than relevant doc position,
|
||||
// we don't find any result and get 0.0 quality level
|
||||
// we don't find any result and get 0.0 score
|
||||
reciprocalRank = new MeanReciprocalRank();
|
||||
evaluation = reciprocalRank.evaluate("id", Arrays.copyOfRange(hits, 0, relevantAt), ratedDocs);
|
||||
assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
assertEquals(0.0, evaluation.metricScore(), Double.MIN_VALUE);
|
||||
}
|
||||
|
||||
public void testEvaluationOneRelevantInResults() {
|
||||
|
@ -120,7 +120,7 @@ public class MeanReciprocalRankTests extends ESTestCase {
|
|||
}
|
||||
|
||||
EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(1.0 / (relevantAt + 1), evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
assertEquals(1.0 / (relevantAt + 1), evaluation.metricScore(), Double.MIN_VALUE);
|
||||
assertEquals(relevantAt + 1, ((MeanReciprocalRank.Detail) evaluation.getMetricDetails()).getFirstRelevantRank());
|
||||
}
|
||||
|
||||
|
@ -140,7 +140,7 @@ public class MeanReciprocalRankTests extends ESTestCase {
|
|||
|
||||
MeanReciprocalRank reciprocalRank = new MeanReciprocalRank(2, 10);
|
||||
EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, rated);
|
||||
assertEquals((double) 1 / 3, evaluation.getQualityLevel(), 0.00001);
|
||||
assertEquals((double) 1 / 3, evaluation.metricScore(), 0.00001);
|
||||
assertEquals(3, ((MeanReciprocalRank.Detail) evaluation.getMetricDetails()).getFirstRelevantRank());
|
||||
}
|
||||
|
||||
|
@ -158,13 +158,13 @@ public class MeanReciprocalRankTests extends ESTestCase {
|
|||
SearchHit[] hits = createSearchHits(0, 9, "test");
|
||||
List<RatedDocument> ratedDocs = new ArrayList<>();
|
||||
EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, ratedDocs);
|
||||
assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE);
|
||||
assertEquals(0.0, evaluation.metricScore(), Double.MIN_VALUE);
|
||||
}
|
||||
|
||||
public void testNoResults() throws Exception {
|
||||
SearchHit[] hits = new SearchHit[0];
|
||||
EvalQueryQuality evaluated = (new MeanReciprocalRank()).evaluate("id", hits, Collections.emptyList());
|
||||
assertEquals(0.0d, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals(0.0d, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(-1, ((MeanReciprocalRank.Detail) evaluated.getMetricDetails()).getFirstRelevantRank());
|
||||
}
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ public class PrecisionAtKTests extends ESTestCase {
|
|||
List<RatedDocument> rated = new ArrayList<>();
|
||||
rated.add(createRatedDoc("test", "0", RELEVANT_RATING_1));
|
||||
EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", toSearchHits(rated, "test"), rated);
|
||||
assertEquals(1, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals(1, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(1, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(1, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ public class PrecisionAtKTests extends ESTestCase {
|
|||
rated.add(createRatedDoc("test", "3", RELEVANT_RATING_1));
|
||||
rated.add(createRatedDoc("test", "4", IRRELEVANT_RATING_0));
|
||||
EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", toSearchHits(rated, "test"), rated);
|
||||
assertEquals((double) 4 / 5, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals((double) 4 / 5, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(4, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(5, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ public class PrecisionAtKTests extends ESTestCase {
|
|||
rated.add(createRatedDoc("test", "4", 4));
|
||||
PrecisionAtK precisionAtN = new PrecisionAtK(2, false, 5);
|
||||
EvalQueryQuality evaluated = precisionAtN.evaluate("id", toSearchHits(rated, "test"), rated);
|
||||
assertEquals((double) 3 / 5, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals((double) 3 / 5, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(3, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(5, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ public class PrecisionAtKTests extends ESTestCase {
|
|||
rated.add(createRatedDoc("test", "2", IRRELEVANT_RATING_0));
|
||||
// the following search hits contain only the last three documents
|
||||
EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", toSearchHits(rated.subList(2, 5), "test"), rated);
|
||||
assertEquals((double) 2 / 3, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals((double) 2 / 3, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(2, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(3, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
}
|
||||
|
@ -114,14 +114,14 @@ public class PrecisionAtKTests extends ESTestCase {
|
|||
searchHits[2].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null));
|
||||
|
||||
EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", searchHits, rated);
|
||||
assertEquals((double) 2 / 3, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals((double) 2 / 3, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(2, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(3, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
|
||||
// also try with setting `ignore_unlabeled`
|
||||
PrecisionAtK prec = new PrecisionAtK(1, true, 10);
|
||||
evaluated = prec.evaluate("id", searchHits, rated);
|
||||
assertEquals((double) 2 / 2, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals((double) 2 / 2, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(2, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(2, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
}
|
||||
|
@ -133,14 +133,14 @@ public class PrecisionAtKTests extends ESTestCase {
|
|||
hits[i].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null));
|
||||
}
|
||||
EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", hits, Collections.emptyList());
|
||||
assertEquals(0.0d, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals(0.0d, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(5, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
|
||||
// also try with setting `ignore_unlabeled`
|
||||
PrecisionAtK prec = new PrecisionAtK(1, true, 10);
|
||||
evaluated = prec.evaluate("id", hits, Collections.emptyList());
|
||||
assertEquals(0.0d, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals(0.0d, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
}
|
||||
|
@ -148,7 +148,7 @@ public class PrecisionAtKTests extends ESTestCase {
|
|||
public void testNoResults() throws Exception {
|
||||
SearchHit[] hits = new SearchHit[0];
|
||||
EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", hits, Collections.emptyList());
|
||||
assertEquals(0.0d, evaluated.getQualityLevel(), 0.00001);
|
||||
assertEquals(0.0d, evaluated.metricScore(), 0.00001);
|
||||
assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved());
|
||||
assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved());
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
|||
// the expected Prec@ for the first query is 4/6 and the expected Prec@ for the
|
||||
// second is 1/6, divided by 2 to get the average
|
||||
double expectedPrecision = (1.0 / 6.0 + 4.0 / 6.0) / 2.0;
|
||||
assertEquals(expectedPrecision, response.getEvaluationResult(), Double.MIN_VALUE);
|
||||
assertEquals(expectedPrecision, response.getMetricScore(), Double.MIN_VALUE);
|
||||
Set<Entry<String, EvalQueryQuality>> entrySet = response.getPartialResults().entrySet();
|
||||
assertEquals(2, entrySet.size());
|
||||
for (Entry<String, EvalQueryQuality> entry : entrySet) {
|
||||
|
@ -157,7 +157,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
|||
// if we look only at top 3 documente, the expected P@3 for the first query is
|
||||
// 2/3 and the expected Prec@ for the second is 1/3, divided by 2 to get the average
|
||||
expectedPrecision = (1.0 / 3.0 + 2.0 / 3.0) / 2.0;
|
||||
assertEquals(expectedPrecision, response.getEvaluationResult(), Double.MIN_VALUE);
|
||||
assertEquals(expectedPrecision, response.getMetricScore(), Double.MIN_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -186,7 +186,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
|||
new RankEvalRequest(task, new String[] { TEST_INDEX }));
|
||||
|
||||
RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
|
||||
assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), 10E-14);
|
||||
assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getMetricScore(), 10E-14);
|
||||
|
||||
// test that a different window size k affects the result
|
||||
metric = new DiscountedCumulativeGain(false, null, 3);
|
||||
|
@ -195,7 +195,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
|||
builder = new RankEvalRequestBuilder(client(), RankEvalAction.INSTANCE, new RankEvalRequest(task, new String[] { TEST_INDEX }));
|
||||
|
||||
response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
|
||||
assertEquals(12.39278926071437, response.getEvaluationResult(), 10E-14);
|
||||
assertEquals(12.39278926071437, response.getMetricScore(), 10E-14);
|
||||
}
|
||||
|
||||
public void testMRRRequest() {
|
||||
|
@ -218,7 +218,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
|||
// the expected reciprocal rank for the berlin_query is 1/1
|
||||
// dividing by 2 to get the average
|
||||
double expectedMRR = (1.0 + 1.0 / 5.0) / 2.0;
|
||||
assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
|
||||
assertEquals(expectedMRR, response.getMetricScore(), 0.0);
|
||||
|
||||
// test that a different window size k affects the result
|
||||
metric = new MeanReciprocalRank(1, 3);
|
||||
|
@ -231,7 +231,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
|||
// the reciprocal rank for the berlin_query is 1/1
|
||||
// dividing by 2 to get the average
|
||||
expectedMRR = 1.0 / 2.0;
|
||||
assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
|
||||
assertEquals(expectedMRR, response.getMetricScore(), 0.0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -102,7 +102,7 @@ public class RankEvalResponseTests extends ESTestCase {
|
|||
try (StreamInput in = output.bytes().streamInput()) {
|
||||
RankEvalResponse deserializedResponse = new RankEvalResponse();
|
||||
deserializedResponse.readFrom(in);
|
||||
assertEquals(randomResponse.getEvaluationResult(), deserializedResponse.getEvaluationResult(), Double.MIN_VALUE);
|
||||
assertEquals(randomResponse.getMetricScore(), deserializedResponse.getMetricScore(), Double.MIN_VALUE);
|
||||
assertEquals(randomResponse.getPartialResults(), deserializedResponse.getPartialResults());
|
||||
assertEquals(randomResponse.getFailures().keySet(), deserializedResponse.getFailures().keySet());
|
||||
assertNotSame(randomResponse, deserializedResponse);
|
||||
|
@ -130,7 +130,7 @@ public class RankEvalResponseTests extends ESTestCase {
|
|||
assertNotSame(testItem, parsedItem);
|
||||
// We cannot check equality of object here because some information (e.g.
|
||||
// SearchHit#shard) cannot fully be parsed back.
|
||||
assertEquals(testItem.getEvaluationResult(), parsedItem.getEvaluationResult(), 0.0);
|
||||
assertEquals(testItem.getMetricScore(), parsedItem.getMetricScore(), 0.0);
|
||||
assertEquals(testItem.getPartialResults().keySet(), parsedItem.getPartialResults().keySet());
|
||||
for (EvalQueryQuality metricDetail : testItem.getPartialResults().values()) {
|
||||
EvalQueryQuality parsedEvalQueryQuality = parsedItem.getPartialResults().get(metricDetail.getId());
|
||||
|
@ -154,10 +154,10 @@ public class RankEvalResponseTests extends ESTestCase {
|
|||
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
|
||||
String xContent = BytesReference.bytes(response.toXContent(builder, ToXContent.EMPTY_PARAMS)).utf8ToString();
|
||||
assertEquals(("{" +
|
||||
" \"quality_level\": 0.123," +
|
||||
" \"metric_score\": 0.123," +
|
||||
" \"details\": {" +
|
||||
" \"coffee_query\": {" +
|
||||
" \"quality_level\": 0.1," +
|
||||
" \"metric_score\": 0.1," +
|
||||
" \"unrated_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," +
|
||||
" \"hits\":[{\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"123\",\"_score\":1.0}," +
|
||||
" \"rating\":5}," +
|
||||
|
|
|
@ -71,8 +71,8 @@ setup:
|
|||
"metric" : { "precision": { "ignore_unlabeled" : true }}
|
||||
}
|
||||
|
||||
- match: { quality_level: 1}
|
||||
- match: { details.amsterdam_query.quality_level: 1.0}
|
||||
- match: { metric_score: 1}
|
||||
- match: { details.amsterdam_query.metric_score: 1.0}
|
||||
- match: { details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]}
|
||||
- match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 2, "docs_retrieved": 2}}
|
||||
|
||||
|
@ -84,7 +84,7 @@ setup:
|
|||
- match: { details.amsterdam_query.hits.2.hit._id: "doc4"}
|
||||
- is_false: details.amsterdam_query.hits.2.rating
|
||||
|
||||
- match: { details.berlin_query.quality_level: 1.0}
|
||||
- match: { details.berlin_query.metric_score: 1.0}
|
||||
- match: { details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]}
|
||||
- match: { details.berlin_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
|
||||
- length: { details.berlin_query.hits: 2}
|
||||
|
@ -118,9 +118,9 @@ setup:
|
|||
"metric" : { "precision": { "ignore_unlabeled" : true }}
|
||||
}
|
||||
|
||||
- match: { quality_level: 1}
|
||||
- match: { details.amsterdam_query.quality_level: 1.0}
|
||||
- match: { details.berlin_query.quality_level: 1.0}
|
||||
- match: { metric_score: 1}
|
||||
- match: { details.amsterdam_query.metric_score: 1.0}
|
||||
- match: { details.berlin_query.metric_score: 1.0}
|
||||
|
||||
---
|
||||
"Mean Reciprocal Rank":
|
||||
|
@ -150,14 +150,14 @@ setup:
|
|||
}
|
||||
|
||||
# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
|
||||
- gt: {quality_level: 0.416}
|
||||
- lt: {quality_level: 0.417}
|
||||
- gt: {details.amsterdam_query.quality_level: 0.333}
|
||||
- lt: {details.amsterdam_query.quality_level: 0.334}
|
||||
- gt: {metric_score: 0.416}
|
||||
- lt: {metric_score: 0.417}
|
||||
- gt: {details.amsterdam_query.metric_score: 0.333}
|
||||
- lt: {details.amsterdam_query.metric_score: 0.334}
|
||||
- match: {details.amsterdam_query.metric_details.mean_reciprocal_rank: {"first_relevant": 3}}
|
||||
- match: {details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc2"},
|
||||
{"_index": "foo", "_id": "doc3"} ]}
|
||||
- match: {details.berlin_query.quality_level: 0.5}
|
||||
- match: {details.berlin_query.metric_score: 0.5}
|
||||
- match: {details.berlin_query.metric_details.mean_reciprocal_rank: {"first_relevant": 2}}
|
||||
- match: {details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc1"}]}
|
||||
|
||||
|
|
|
@ -69,10 +69,10 @@
|
|||
"metric" : { "dcg": {}}
|
||||
}
|
||||
|
||||
- gt: {quality_level: 13.848263 }
|
||||
- lt: {quality_level: 13.848264 }
|
||||
- gt: {details.dcg_query.quality_level: 13.848263}
|
||||
- lt: {details.dcg_query.quality_level: 13.848264}
|
||||
- gt: {metric_score: 13.848263 }
|
||||
- lt: {metric_score: 13.848264 }
|
||||
- gt: {details.dcg_query.metric_score: 13.848263}
|
||||
- lt: {details.dcg_query.metric_score: 13.848264}
|
||||
- match: {details.dcg_query.unrated_docs: [ ]}
|
||||
|
||||
# reverse the order in which the results are returned (less relevant docs first)
|
||||
|
@ -96,10 +96,10 @@
|
|||
"metric" : { "dcg": { }}
|
||||
}
|
||||
|
||||
- gt: {quality_level: 10.299674}
|
||||
- lt: {quality_level: 10.299675}
|
||||
- gt: {details.dcg_query_reverse.quality_level: 10.299674}
|
||||
- lt: {details.dcg_query_reverse.quality_level: 10.299675}
|
||||
- gt: {metric_score: 10.299674}
|
||||
- lt: {metric_score: 10.299675}
|
||||
- gt: {details.dcg_query_reverse.metric_score: 10.299674}
|
||||
- lt: {details.dcg_query_reverse.metric_score: 10.299675}
|
||||
- match: {details.dcg_query_reverse.unrated_docs: [ ]}
|
||||
|
||||
# if we mix both, we should get the average
|
||||
|
@ -134,11 +134,11 @@
|
|||
"metric" : { "dcg": { }}
|
||||
}
|
||||
|
||||
- gt: {quality_level: 12.073969}
|
||||
- lt: {quality_level: 12.073970}
|
||||
- gt: {details.dcg_query.quality_level: 13.848263}
|
||||
- lt: {details.dcg_query.quality_level: 13.848264}
|
||||
- gt: {metric_score: 12.073969}
|
||||
- lt: {metric_score: 12.073970}
|
||||
- gt: {details.dcg_query.metric_score: 13.848263}
|
||||
- lt: {details.dcg_query.metric_score: 13.848264}
|
||||
- match: {details.dcg_query.unrated_docs: [ ]}
|
||||
- gt: {details.dcg_query_reverse.quality_level: 10.299674}
|
||||
- lt: {details.dcg_query_reverse.quality_level: 10.299675}
|
||||
- gt: {details.dcg_query_reverse.metric_score: 10.299674}
|
||||
- lt: {details.dcg_query_reverse.metric_score: 10.299675}
|
||||
- match: {details.dcg_query_reverse.unrated_docs: [ ]}
|
||||
|
|
|
@ -34,8 +34,8 @@
|
|||
"metric" : { "precision": { "ignore_unlabeled" : true }}
|
||||
}
|
||||
|
||||
- match: { quality_level: 1}
|
||||
- match: { details.amsterdam_query.quality_level: 1.0}
|
||||
- match: { metric_score: 1}
|
||||
- match: { details.amsterdam_query.metric_score: 1.0}
|
||||
- match: { details.amsterdam_query.unrated_docs: [ ]}
|
||||
- match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ setup:
|
|||
"metric" : { "precision": { }}
|
||||
}
|
||||
|
||||
- match: {quality_level: 0.9}
|
||||
- match: {metric_score: 0.9}
|
||||
- match: {details.amsterdam_query.unrated_docs.0._id: "6"}
|
||||
|
||||
---
|
||||
|
|
|
@ -52,5 +52,5 @@
|
|||
"metric" : { "precision": { "ignore_unlabeled" : true }}
|
||||
}
|
||||
|
||||
- match: { quality_level: 1 }
|
||||
- match: { metric_score: 1 }
|
||||
|
||||
|
|
Loading…
Reference in New Issue