diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java index 2890257b236..330afafd9ef 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java @@ -80,7 +80,7 @@ public class RankEvalIT extends ESRestHighLevelClientTestCase { RankEvalResponse response = execute(rankEvalRequest, highLevelClient()::rankEval, highLevelClient()::rankEvalAsync); // the expected Prec@ for the first query is 5/7 and the expected Prec@ for the second is 1/7, divided by 2 to get the average double expectedPrecision = (1.0 / 7.0 + 5.0 / 7.0) / 2.0; - assertEquals(expectedPrecision, response.getEvaluationResult(), Double.MIN_VALUE); + assertEquals(expectedPrecision, response.getMetricScore(), Double.MIN_VALUE); Map partialResults = response.getPartialResults(); assertEquals(2, partialResults.size()); EvalQueryQuality amsterdamQueryQuality = partialResults.get("amsterdam_query"); diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/SearchDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/SearchDocumentationIT.java index c60f2d4c92b..2f743c786ba 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/SearchDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/SearchDocumentationIT.java @@ -1136,14 +1136,14 @@ public class SearchDocumentationIT extends ESRestHighLevelClientTestCase { // end::rank-eval-execute // tag::rank-eval-response - double evaluationResult = response.getEvaluationResult(); // <1> + double evaluationResult = response.getMetricScore(); // <1> assertEquals(1.0 / 3.0, evaluationResult, 0.0); Map partialResults = response.getPartialResults(); EvalQueryQuality evalQuality = partialResults.get("kimchy_query"); // <2> assertEquals("kimchy_query", evalQuality.getId()); - double qualityLevel = evalQuality.getQualityLevel(); // <3> + double qualityLevel = evalQuality.metricScore(); // <3> assertEquals(1.0 / 3.0, qualityLevel, 0.0); List hitsAndRatings = evalQuality.getHitsAndRatings(); RatedSearchHit ratedSearchHit = hitsAndRatings.get(2); diff --git a/docs/reference/search/rank-eval.asciidoc b/docs/reference/search/rank-eval.asciidoc index cf13b9f7b06..ef715dfca8c 100644 --- a/docs/reference/search/rank-eval.asciidoc +++ b/docs/reference/search/rank-eval.asciidoc @@ -270,10 +270,10 @@ that shows potential errors of individual queries. The response has the followin -------------------------------- { "rank_eval": { - "quality_level": 0.4, <1> + "metric_score": 0.4, <1> "details": { "my_query_id1": { <2> - "quality_level": 0.6, <3> + "metric_score": 0.6, <3> "unrated_docs": [ <4> { "_index": "my_index", @@ -308,7 +308,7 @@ that shows potential errors of individual queries. The response has the followin <1> the overall evaluation quality calculated by the defined metric <2> the `details` section contains one entry for every query in the original `requests` section, keyed by the search request id -<3> the `quality_level` in the `details` section shows the contribution of this query to the global quality score +<3> the `metric_score` in the `details` section shows the contribution of this query to the global quality metric score <4> the `unrated_docs` section contains an `_index` and `_id` entry for each document in the search result for this query that didn't have a ratings value. This can be used to ask the user to supply ratings for these documents <5> the `hits` section shows a grouping of the search results with their supplied rating diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java index cab32377323..a6a6830a99c 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java @@ -126,8 +126,6 @@ public class DiscountedCumulativeGain implements EvaluationMetric { @Override public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List ratedDocs) { - List allRatings = ratedDocs.stream().mapToInt(RatedDocument::getRating).boxed() - .collect(Collectors.toList()); List ratedHits = joinHitsWithRatings(hits, ratedDocs); List ratingsInSearchHits = new ArrayList<>(ratedHits.size()); int unratedResults = 0; @@ -144,6 +142,8 @@ public class DiscountedCumulativeGain implements EvaluationMetric { double idcg = 0; if (normalize) { + List allRatings = ratedDocs.stream().mapToInt(RatedDocument::getRating).boxed() + .collect(Collectors.toList()); Collections.sort(allRatings, Comparator.nullsLast(Collections.reverseOrder())); idcg = computeDCG(allRatings.subList(0, Math.min(ratingsInSearchHits.size(), allRatings.size()))); if (idcg != 0) { diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java index 91ba1ce6169..f065a34787c 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java @@ -41,19 +41,19 @@ import java.util.Objects; public class EvalQueryQuality implements ToXContentFragment, Writeable { private final String queryId; - private final double evaluationResult; + private final double metricScore; private MetricDetail optionalMetricDetails; private final List ratedHits; - public EvalQueryQuality(String id, double evaluationResult) { + public EvalQueryQuality(String id, double metricScore) { this.queryId = id; - this.evaluationResult = evaluationResult; + this.metricScore = metricScore; this.ratedHits = new ArrayList<>(); } public EvalQueryQuality(StreamInput in) throws IOException { this.queryId = in.readString(); - this.evaluationResult = in.readDouble(); + this.metricScore = in.readDouble(); this.ratedHits = in.readList(RatedSearchHit::new); this.optionalMetricDetails = in.readOptionalNamedWriteable(MetricDetail.class); } @@ -61,7 +61,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable { // only used for parsing internally private EvalQueryQuality(String queryId, ParsedEvalQueryQuality builder) { this.queryId = queryId; - this.evaluationResult = builder.evaluationResult; + this.metricScore = builder.evaluationResult; this.optionalMetricDetails = builder.optionalMetricDetails; this.ratedHits = builder.ratedHits; } @@ -69,7 +69,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable { @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(queryId); - out.writeDouble(evaluationResult); + out.writeDouble(metricScore); out.writeList(ratedHits); out.writeOptionalNamedWriteable(this.optionalMetricDetails); } @@ -78,8 +78,8 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable { return queryId; } - public double getQualityLevel() { - return evaluationResult; + public double metricScore() { + return metricScore; } public void setMetricDetails(MetricDetail breakdown) { @@ -101,7 +101,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(queryId); - builder.field(QUALITY_LEVEL_FIELD.getPreferredName(), this.evaluationResult); + builder.field(METRIC_SCORE_FIELD.getPreferredName(), this.metricScore); builder.startArray(UNRATED_DOCS_FIELD.getPreferredName()); for (DocumentKey key : EvaluationMetric.filterUnratedDocuments(ratedHits)) { builder.startObject(); @@ -122,7 +122,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable { return builder; } - private static final ParseField QUALITY_LEVEL_FIELD = new ParseField("quality_level"); + static final ParseField METRIC_SCORE_FIELD = new ParseField("metric_score"); private static final ParseField UNRATED_DOCS_FIELD = new ParseField("unrated_docs"); private static final ParseField HITS_FIELD = new ParseField("hits"); private static final ParseField METRIC_DETAILS_FIELD = new ParseField("metric_details"); @@ -136,7 +136,7 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable { } static { - PARSER.declareDouble((obj, value) -> obj.evaluationResult = value, QUALITY_LEVEL_FIELD); + PARSER.declareDouble((obj, value) -> obj.evaluationResult = value, METRIC_SCORE_FIELD); PARSER.declareObject((obj, value) -> obj.optionalMetricDetails = value, (p, c) -> parseMetricDetail(p), METRIC_DETAILS_FIELD); PARSER.declareObjectArray((obj, list) -> obj.ratedHits = list, (p, c) -> RatedSearchHit.parse(p), HITS_FIELD); @@ -164,13 +164,13 @@ public class EvalQueryQuality implements ToXContentFragment, Writeable { } EvalQueryQuality other = (EvalQueryQuality) obj; return Objects.equals(queryId, other.queryId) && - Objects.equals(evaluationResult, other.evaluationResult) && + Objects.equals(metricScore, other.metricScore) && Objects.equals(ratedHits, other.ratedHits) && Objects.equals(optionalMetricDetails, other.optionalMetricDetails); } @Override public final int hashCode() { - return Objects.hash(queryId, evaluationResult, ratedHits, optionalMetricDetails); + return Objects.hash(queryId, metricScore, ratedHits, optionalMetricDetails); } } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java index 37898fd9516..d1e89890477 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java @@ -39,23 +39,22 @@ import java.util.stream.Collectors; public interface EvaluationMetric extends ToXContentObject, NamedWriteable { /** - * Returns a single metric representing the ranking quality of a set of returned - * documents wrt. to a set of document ids labeled as relevant for this search. + * Evaluates a single ranking evaluation case. * * @param taskId - * the id of the query for which the ranking is currently evaluated + * an identifier of the query for which the search ranking is + * evaluated * @param hits - * the result hits as returned by a search request + * the search result hits * @param ratedDocs - * the documents that were ranked by human annotators for this query - * case - * @return some metric representing the quality of the result hit list wrt. to - * relevant doc ids. + * the documents that contain the document rating for this query case + * @return an {@link EvalQueryQuality} instance that contains the metric score + * with respect to the provided search hits and ratings */ EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List ratedDocs); /** - * join hits with rated documents using the joint _index/_id document key + * Joins hits with rated documents using the joint _index/_id document key. */ static List joinHitsWithRatings(SearchHit[] hits, List ratedDocs) { Map ratedDocumentMap = ratedDocs.stream() @@ -74,7 +73,7 @@ public interface EvaluationMetric extends ToXContentObject, NamedWriteable { } /** - * filter @link {@link RatedSearchHit} that don't have a rating + * Filter {@link RatedSearchHit}s that do not have a rating. */ static List filterUnratedDocuments(List ratedHits) { return ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false) @@ -82,11 +81,11 @@ public interface EvaluationMetric extends ToXContentObject, NamedWriteable { } /** - * how evaluation metrics for particular search queries get combined for the overall evaluation score. - * Defaults to averaging over the partial results. + * Combine several {@link EvalQueryQuality} results into the overall evaluation score. + * This defaults to averaging over the partial results, but can be overwritten to obtain a different behavior. */ default double combine(Collection partialResults) { - return partialResults.stream().mapToDouble(EvalQueryQuality::getQualityLevel).sum() / partialResults.size(); + return partialResults.stream().mapToDouble(EvalQueryQuality::metricScore).sum() / partialResults.size(); } /** diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/MeanReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/MeanReciprocalRank.java index eb20dc8c680..5781f13dafe 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/MeanReciprocalRank.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/MeanReciprocalRank.java @@ -110,8 +110,7 @@ public class MeanReciprocalRank implements EvaluationMetric { * Compute ReciprocalRank based on provided relevant document IDs. **/ @Override - public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, - List ratedDocs) { + public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List ratedDocs) { List ratedHits = joinHitsWithRatings(hits, ratedDocs); int firstRelevant = -1; int rank = 1; diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java index 6dd3c1338fa..6efff154b62 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java @@ -48,15 +48,15 @@ import java.util.stream.Collectors; public class RankEvalResponse extends ActionResponse implements ToXContentObject { /** The overall evaluation result. */ - private double evaluationResult; + private double metricScore; /** details about individual ranking evaluation queries, keyed by their id */ private Map details; /** exceptions for specific ranking evaluation queries, keyed by their id */ private Map failures; - public RankEvalResponse(double qualityLevel, Map partialResults, + public RankEvalResponse(double metricScore, Map partialResults, Map failures) { - this.evaluationResult = qualityLevel; + this.metricScore = metricScore; this.details = new HashMap<>(partialResults); this.failures = new HashMap<>(failures); } @@ -65,8 +65,8 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject // only used in RankEvalAction#newResponse() } - public double getEvaluationResult() { - return evaluationResult; + public double getMetricScore() { + return metricScore; } public Map getPartialResults() { @@ -85,7 +85,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeDouble(evaluationResult); + out.writeDouble(metricScore); out.writeVInt(details.size()); for (String queryId : details.keySet()) { out.writeString(queryId); @@ -101,7 +101,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); - this.evaluationResult = in.readDouble(); + this.metricScore = in.readDouble(); int partialResultSize = in.readVInt(); this.details = new HashMap<>(partialResultSize); for (int i = 0; i < partialResultSize; i++) { @@ -120,7 +120,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.field("quality_level", evaluationResult); + builder.field("metric_score", metricScore); builder.startObject("details"); for (String key : details.keySet()) { details.get(key).toXContent(builder, params); @@ -137,7 +137,6 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject return builder; } - private static final ParseField QUALITY_LEVEL_FIELD = new ParseField("quality_level"); private static final ParseField DETAILS_FIELD = new ParseField("details"); private static final ParseField FAILURES_FIELD = new ParseField("failures"); @SuppressWarnings("unchecked") @@ -147,7 +146,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContentObject ((List) a[1]).stream().collect(Collectors.toMap(EvalQueryQuality::getId, Function.identity())), ((List>) a[2]).stream().collect(Collectors.toMap(Tuple::v1, Tuple::v2)))); static { - PARSER.declareDouble(ConstructingObjectParser.constructorArg(), QUALITY_LEVEL_FIELD); + PARSER.declareDouble(ConstructingObjectParser.constructorArg(), EvalQueryQuality.METRIC_SCORE_FIELD); PARSER.declareNamedObjects(ConstructingObjectParser.optionalConstructorArg(), (p, c, n) -> EvalQueryQuality.fromXContent(p, n), DETAILS_FIELD); PARSER.declareNamedObjects(ConstructingObjectParser.optionalConstructorArg(), (p, c, n) -> { diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java index e768c297333..468a1ac2e57 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java @@ -76,7 +76,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase { hits[i].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null)); } DiscountedCumulativeGain dcg = new DiscountedCumulativeGain(); - assertEquals(EXPECTED_DCG, dcg.evaluate("id", hits, rated).getQualityLevel(), DELTA); + assertEquals(EXPECTED_DCG, dcg.evaluate("id", hits, rated).metricScore(), DELTA); /** * Check with normalization: to get the maximal possible dcg, sort documents by @@ -94,7 +94,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase { * idcg = 14.595390756454922 (sum of last column) */ dcg = new DiscountedCumulativeGain(true, null, 10); - assertEquals(EXPECTED_NDCG, dcg.evaluate("id", hits, rated).getQualityLevel(), DELTA); + assertEquals(EXPECTED_NDCG, dcg.evaluate("id", hits, rated).metricScore(), DELTA); } /** @@ -127,7 +127,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase { } DiscountedCumulativeGain dcg = new DiscountedCumulativeGain(); EvalQueryQuality result = dcg.evaluate("id", hits, rated); - assertEquals(12.779642067948913, result.getQualityLevel(), DELTA); + assertEquals(12.779642067948913, result.metricScore(), DELTA); assertEquals(2, filterUnratedDocuments(result.getHitsAndRatings()).size()); /** @@ -146,7 +146,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase { * idcg = 13.347184833073591 (sum of last column) */ dcg = new DiscountedCumulativeGain(true, null, 10); - assertEquals(12.779642067948913 / 13.347184833073591, dcg.evaluate("id", hits, rated).getQualityLevel(), DELTA); + assertEquals(12.779642067948913 / 13.347184833073591, dcg.evaluate("id", hits, rated).metricScore(), DELTA); } /** @@ -184,7 +184,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase { } DiscountedCumulativeGain dcg = new DiscountedCumulativeGain(); EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs); - assertEquals(12.392789260714371, result.getQualityLevel(), DELTA); + assertEquals(12.392789260714371, result.metricScore(), DELTA); assertEquals(1, filterUnratedDocuments(result.getHitsAndRatings()).size()); /** @@ -204,7 +204,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase { * idcg = 13.347184833073591 (sum of last column) */ dcg = new DiscountedCumulativeGain(true, null, 10); - assertEquals(12.392789260714371 / 13.347184833073591, dcg.evaluate("id", hits, ratedDocs).getQualityLevel(), DELTA); + assertEquals(12.392789260714371 / 13.347184833073591, dcg.evaluate("id", hits, ratedDocs).metricScore(), DELTA); } /** @@ -223,13 +223,13 @@ public class DiscountedCumulativeGainTests extends ESTestCase { SearchHit[] hits = new SearchHit[0]; DiscountedCumulativeGain dcg = new DiscountedCumulativeGain(); EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs); - assertEquals(0.0d, result.getQualityLevel(), DELTA); + assertEquals(0.0d, result.metricScore(), DELTA); assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size()); // also check normalized dcg = new DiscountedCumulativeGain(true, null, 10); result = dcg.evaluate("id", hits, ratedDocs); - assertEquals(0.0d, result.getQualityLevel(), DELTA); + assertEquals(0.0d, result.metricScore(), DELTA); assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size()); } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java index c9251bb8090..7424542ac26 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java @@ -129,7 +129,7 @@ public class EvalQueryQualityTests extends ESTestCase { private static EvalQueryQuality mutateTestItem(EvalQueryQuality original) { String id = original.getId(); - double qualityLevel = original.getQualityLevel(); + double metricScore = original.metricScore(); List ratedHits = new ArrayList<>(original.getHitsAndRatings()); MetricDetail metricDetails = original.getMetricDetails(); switch (randomIntBetween(0, 3)) { @@ -137,7 +137,7 @@ public class EvalQueryQualityTests extends ESTestCase { id = id + "_"; break; case 1: - qualityLevel = qualityLevel + 0.1; + metricScore = metricScore + 0.1; break; case 2: if (metricDetails == null) { @@ -152,7 +152,7 @@ public class EvalQueryQualityTests extends ESTestCase { default: throw new IllegalStateException("The test should only allow four parameters mutated"); } - EvalQueryQuality evalQueryQuality = new EvalQueryQuality(id, qualityLevel); + EvalQueryQuality evalQueryQuality = new EvalQueryQuality(id, metricScore); evalQueryQuality.setMetricDetails(metricDetails); evalQueryQuality.addHitsAndRatings(ratedHits); return evalQueryQuality; diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java index e2be8696e66..fe33c246f7d 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java @@ -76,10 +76,10 @@ public class ExpectedReciprocalRankTests extends ESTestCase { Integer[] relevanceRatings = new Integer[] { 3, 2, 0, 1}; SearchHit[] hits = createSearchHits(rated, relevanceRatings); ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, 0, 3); - assertEquals(0.8984375, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); + assertEquals(0.8984375, err.evaluate("id", hits, rated).metricScore(), DELTA); // take 4th rank into window err = new ExpectedReciprocalRank(3, 0, 4); - assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); + assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).metricScore(), DELTA); } /** @@ -102,11 +102,11 @@ public class ExpectedReciprocalRankTests extends ESTestCase { SearchHit[] hits = createSearchHits(rated, relevanceRatings); ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, null, 4); EvalQueryQuality evaluation = err.evaluate("id", hits, rated); - assertEquals(0.875 + 0.00390625, evaluation.getQualityLevel(), DELTA); + assertEquals(0.875 + 0.00390625, evaluation.metricScore(), DELTA); assertEquals(1, ((ExpectedReciprocalRank.Detail) evaluation.getMetricDetails()).getUnratedDocs()); // if we supply e.g. 2 as unknown docs rating, it should be the same as in the other test above err = new ExpectedReciprocalRank(3, 2, 4); - assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); + assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).metricScore(), DELTA); } private SearchHit[] createSearchHits(List rated, Integer[] relevanceRatings) { @@ -126,7 +126,7 @@ public class ExpectedReciprocalRankTests extends ESTestCase { */ public void testNoResults() throws Exception { ExpectedReciprocalRank err = new ExpectedReciprocalRank(5, 0, 10); - assertEquals(0.0, err.evaluate("id", new SearchHit[0], Collections.emptyList()).getQualityLevel(), DELTA); + assertEquals(0.0, err.evaluate("id", new SearchHit[0], Collections.emptyList()).metricScore(), DELTA); } public void testParseFromXContent() throws IOException { diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/MeanReciprocalRankTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/MeanReciprocalRankTests.java index f88b0cc6634..fdb64806d5c 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/MeanReciprocalRankTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/MeanReciprocalRankTests.java @@ -95,14 +95,14 @@ public class MeanReciprocalRankTests extends ESTestCase { int rankAtFirstRelevant = relevantAt + 1; EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, ratedDocs); - assertEquals(1.0 / rankAtFirstRelevant, evaluation.getQualityLevel(), Double.MIN_VALUE); + assertEquals(1.0 / rankAtFirstRelevant, evaluation.metricScore(), Double.MIN_VALUE); assertEquals(rankAtFirstRelevant, ((MeanReciprocalRank.Detail) evaluation.getMetricDetails()).getFirstRelevantRank()); // check that if we have fewer search hits than relevant doc position, - // we don't find any result and get 0.0 quality level + // we don't find any result and get 0.0 score reciprocalRank = new MeanReciprocalRank(); evaluation = reciprocalRank.evaluate("id", Arrays.copyOfRange(hits, 0, relevantAt), ratedDocs); - assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE); + assertEquals(0.0, evaluation.metricScore(), Double.MIN_VALUE); } public void testEvaluationOneRelevantInResults() { @@ -120,7 +120,7 @@ public class MeanReciprocalRankTests extends ESTestCase { } EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, ratedDocs); - assertEquals(1.0 / (relevantAt + 1), evaluation.getQualityLevel(), Double.MIN_VALUE); + assertEquals(1.0 / (relevantAt + 1), evaluation.metricScore(), Double.MIN_VALUE); assertEquals(relevantAt + 1, ((MeanReciprocalRank.Detail) evaluation.getMetricDetails()).getFirstRelevantRank()); } @@ -140,7 +140,7 @@ public class MeanReciprocalRankTests extends ESTestCase { MeanReciprocalRank reciprocalRank = new MeanReciprocalRank(2, 10); EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, rated); - assertEquals((double) 1 / 3, evaluation.getQualityLevel(), 0.00001); + assertEquals((double) 1 / 3, evaluation.metricScore(), 0.00001); assertEquals(3, ((MeanReciprocalRank.Detail) evaluation.getMetricDetails()).getFirstRelevantRank()); } @@ -158,13 +158,13 @@ public class MeanReciprocalRankTests extends ESTestCase { SearchHit[] hits = createSearchHits(0, 9, "test"); List ratedDocs = new ArrayList<>(); EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, ratedDocs); - assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE); + assertEquals(0.0, evaluation.metricScore(), Double.MIN_VALUE); } public void testNoResults() throws Exception { SearchHit[] hits = new SearchHit[0]; EvalQueryQuality evaluated = (new MeanReciprocalRank()).evaluate("id", hits, Collections.emptyList()); - assertEquals(0.0d, evaluated.getQualityLevel(), 0.00001); + assertEquals(0.0d, evaluated.metricScore(), 0.00001); assertEquals(-1, ((MeanReciprocalRank.Detail) evaluated.getMetricDetails()).getFirstRelevantRank()); } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtKTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtKTests.java index c0035d5dbb7..73149d5a8aa 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtKTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtKTests.java @@ -53,7 +53,7 @@ public class PrecisionAtKTests extends ESTestCase { List rated = new ArrayList<>(); rated.add(createRatedDoc("test", "0", RELEVANT_RATING_1)); EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", toSearchHits(rated, "test"), rated); - assertEquals(1, evaluated.getQualityLevel(), 0.00001); + assertEquals(1, evaluated.metricScore(), 0.00001); assertEquals(1, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(1, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); } @@ -66,7 +66,7 @@ public class PrecisionAtKTests extends ESTestCase { rated.add(createRatedDoc("test", "3", RELEVANT_RATING_1)); rated.add(createRatedDoc("test", "4", IRRELEVANT_RATING_0)); EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", toSearchHits(rated, "test"), rated); - assertEquals((double) 4 / 5, evaluated.getQualityLevel(), 0.00001); + assertEquals((double) 4 / 5, evaluated.metricScore(), 0.00001); assertEquals(4, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(5, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); } @@ -85,7 +85,7 @@ public class PrecisionAtKTests extends ESTestCase { rated.add(createRatedDoc("test", "4", 4)); PrecisionAtK precisionAtN = new PrecisionAtK(2, false, 5); EvalQueryQuality evaluated = precisionAtN.evaluate("id", toSearchHits(rated, "test"), rated); - assertEquals((double) 3 / 5, evaluated.getQualityLevel(), 0.00001); + assertEquals((double) 3 / 5, evaluated.metricScore(), 0.00001); assertEquals(3, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(5, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); } @@ -99,7 +99,7 @@ public class PrecisionAtKTests extends ESTestCase { rated.add(createRatedDoc("test", "2", IRRELEVANT_RATING_0)); // the following search hits contain only the last three documents EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", toSearchHits(rated.subList(2, 5), "test"), rated); - assertEquals((double) 2 / 3, evaluated.getQualityLevel(), 0.00001); + assertEquals((double) 2 / 3, evaluated.metricScore(), 0.00001); assertEquals(2, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(3, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); } @@ -114,14 +114,14 @@ public class PrecisionAtKTests extends ESTestCase { searchHits[2].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null)); EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", searchHits, rated); - assertEquals((double) 2 / 3, evaluated.getQualityLevel(), 0.00001); + assertEquals((double) 2 / 3, evaluated.metricScore(), 0.00001); assertEquals(2, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(3, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); // also try with setting `ignore_unlabeled` PrecisionAtK prec = new PrecisionAtK(1, true, 10); evaluated = prec.evaluate("id", searchHits, rated); - assertEquals((double) 2 / 2, evaluated.getQualityLevel(), 0.00001); + assertEquals((double) 2 / 2, evaluated.metricScore(), 0.00001); assertEquals(2, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(2, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); } @@ -133,14 +133,14 @@ public class PrecisionAtKTests extends ESTestCase { hits[i].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null)); } EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", hits, Collections.emptyList()); - assertEquals(0.0d, evaluated.getQualityLevel(), 0.00001); + assertEquals(0.0d, evaluated.metricScore(), 0.00001); assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(5, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); // also try with setting `ignore_unlabeled` PrecisionAtK prec = new PrecisionAtK(1, true, 10); evaluated = prec.evaluate("id", hits, Collections.emptyList()); - assertEquals(0.0d, evaluated.getQualityLevel(), 0.00001); + assertEquals(0.0d, evaluated.metricScore(), 0.00001); assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); } @@ -148,7 +148,7 @@ public class PrecisionAtKTests extends ESTestCase { public void testNoResults() throws Exception { SearchHit[] hits = new SearchHit[0]; EvalQueryQuality evaluated = (new PrecisionAtK()).evaluate("id", hits, Collections.emptyList()); - assertEquals(0.0d, evaluated.getQualityLevel(), 0.00001); + assertEquals(0.0d, evaluated.metricScore(), 0.00001); assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRelevantRetrieved()); assertEquals(0, ((PrecisionAtK.Detail) evaluated.getMetricDetails()).getRetrieved()); } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java index 28200e7d5a0..7d594c852da 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java @@ -114,7 +114,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { // the expected Prec@ for the first query is 4/6 and the expected Prec@ for the // second is 1/6, divided by 2 to get the average double expectedPrecision = (1.0 / 6.0 + 4.0 / 6.0) / 2.0; - assertEquals(expectedPrecision, response.getEvaluationResult(), Double.MIN_VALUE); + assertEquals(expectedPrecision, response.getMetricScore(), Double.MIN_VALUE); Set> entrySet = response.getPartialResults().entrySet(); assertEquals(2, entrySet.size()); for (Entry entry : entrySet) { @@ -157,7 +157,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { // if we look only at top 3 documente, the expected P@3 for the first query is // 2/3 and the expected Prec@ for the second is 1/3, divided by 2 to get the average expectedPrecision = (1.0 / 3.0 + 2.0 / 3.0) / 2.0; - assertEquals(expectedPrecision, response.getEvaluationResult(), Double.MIN_VALUE); + assertEquals(expectedPrecision, response.getMetricScore(), Double.MIN_VALUE); } /** @@ -186,7 +186,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { new RankEvalRequest(task, new String[] { TEST_INDEX })); RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); - assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), 10E-14); + assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getMetricScore(), 10E-14); // test that a different window size k affects the result metric = new DiscountedCumulativeGain(false, null, 3); @@ -195,7 +195,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { builder = new RankEvalRequestBuilder(client(), RankEvalAction.INSTANCE, new RankEvalRequest(task, new String[] { TEST_INDEX })); response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); - assertEquals(12.39278926071437, response.getEvaluationResult(), 10E-14); + assertEquals(12.39278926071437, response.getMetricScore(), 10E-14); } public void testMRRRequest() { @@ -218,7 +218,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { // the expected reciprocal rank for the berlin_query is 1/1 // dividing by 2 to get the average double expectedMRR = (1.0 + 1.0 / 5.0) / 2.0; - assertEquals(expectedMRR, response.getEvaluationResult(), 0.0); + assertEquals(expectedMRR, response.getMetricScore(), 0.0); // test that a different window size k affects the result metric = new MeanReciprocalRank(1, 3); @@ -231,7 +231,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { // the reciprocal rank for the berlin_query is 1/1 // dividing by 2 to get the average expectedMRR = 1.0 / 2.0; - assertEquals(expectedMRR, response.getEvaluationResult(), 0.0); + assertEquals(expectedMRR, response.getMetricScore(), 0.0); } /** diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java index 1e94e869d25..673808f8369 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java @@ -102,7 +102,7 @@ public class RankEvalResponseTests extends ESTestCase { try (StreamInput in = output.bytes().streamInput()) { RankEvalResponse deserializedResponse = new RankEvalResponse(); deserializedResponse.readFrom(in); - assertEquals(randomResponse.getEvaluationResult(), deserializedResponse.getEvaluationResult(), Double.MIN_VALUE); + assertEquals(randomResponse.getMetricScore(), deserializedResponse.getMetricScore(), Double.MIN_VALUE); assertEquals(randomResponse.getPartialResults(), deserializedResponse.getPartialResults()); assertEquals(randomResponse.getFailures().keySet(), deserializedResponse.getFailures().keySet()); assertNotSame(randomResponse, deserializedResponse); @@ -130,7 +130,7 @@ public class RankEvalResponseTests extends ESTestCase { assertNotSame(testItem, parsedItem); // We cannot check equality of object here because some information (e.g. // SearchHit#shard) cannot fully be parsed back. - assertEquals(testItem.getEvaluationResult(), parsedItem.getEvaluationResult(), 0.0); + assertEquals(testItem.getMetricScore(), parsedItem.getMetricScore(), 0.0); assertEquals(testItem.getPartialResults().keySet(), parsedItem.getPartialResults().keySet()); for (EvalQueryQuality metricDetail : testItem.getPartialResults().values()) { EvalQueryQuality parsedEvalQueryQuality = parsedItem.getPartialResults().get(metricDetail.getId()); @@ -154,10 +154,10 @@ public class RankEvalResponseTests extends ESTestCase { XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); String xContent = BytesReference.bytes(response.toXContent(builder, ToXContent.EMPTY_PARAMS)).utf8ToString(); assertEquals(("{" + - " \"quality_level\": 0.123," + + " \"metric_score\": 0.123," + " \"details\": {" + " \"coffee_query\": {" + - " \"quality_level\": 0.1," + + " \"metric_score\": 0.1," + " \"unrated_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," + " \"hits\":[{\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"123\",\"_score\":1.0}," + " \"rating\":5}," + diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml index 62c246fb320..fe877b37a68 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml @@ -71,8 +71,8 @@ setup: "metric" : { "precision": { "ignore_unlabeled" : true }} } - - match: { quality_level: 1} - - match: { details.amsterdam_query.quality_level: 1.0} + - match: { metric_score: 1} + - match: { details.amsterdam_query.metric_score: 1.0} - match: { details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]} - match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 2, "docs_retrieved": 2}} @@ -84,7 +84,7 @@ setup: - match: { details.amsterdam_query.hits.2.hit._id: "doc4"} - is_false: details.amsterdam_query.hits.2.rating - - match: { details.berlin_query.quality_level: 1.0} + - match: { details.berlin_query.metric_score: 1.0} - match: { details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]} - match: { details.berlin_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}} - length: { details.berlin_query.hits: 2} @@ -118,9 +118,9 @@ setup: "metric" : { "precision": { "ignore_unlabeled" : true }} } - - match: { quality_level: 1} - - match: { details.amsterdam_query.quality_level: 1.0} - - match: { details.berlin_query.quality_level: 1.0} + - match: { metric_score: 1} + - match: { details.amsterdam_query.metric_score: 1.0} + - match: { details.berlin_query.metric_score: 1.0} --- "Mean Reciprocal Rank": @@ -150,14 +150,14 @@ setup: } # average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663 - - gt: {quality_level: 0.416} - - lt: {quality_level: 0.417} - - gt: {details.amsterdam_query.quality_level: 0.333} - - lt: {details.amsterdam_query.quality_level: 0.334} + - gt: {metric_score: 0.416} + - lt: {metric_score: 0.417} + - gt: {details.amsterdam_query.metric_score: 0.333} + - lt: {details.amsterdam_query.metric_score: 0.334} - match: {details.amsterdam_query.metric_details.mean_reciprocal_rank: {"first_relevant": 3}} - match: {details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc2"}, {"_index": "foo", "_id": "doc3"} ]} - - match: {details.berlin_query.quality_level: 0.5} + - match: {details.berlin_query.metric_score: 0.5} - match: {details.berlin_query.metric_details.mean_reciprocal_rank: {"first_relevant": 2}} - match: {details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc1"}]} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml index baf10f1542c..1b159775d5c 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml @@ -69,10 +69,10 @@ "metric" : { "dcg": {}} } - - gt: {quality_level: 13.848263 } - - lt: {quality_level: 13.848264 } - - gt: {details.dcg_query.quality_level: 13.848263} - - lt: {details.dcg_query.quality_level: 13.848264} + - gt: {metric_score: 13.848263 } + - lt: {metric_score: 13.848264 } + - gt: {details.dcg_query.metric_score: 13.848263} + - lt: {details.dcg_query.metric_score: 13.848264} - match: {details.dcg_query.unrated_docs: [ ]} # reverse the order in which the results are returned (less relevant docs first) @@ -96,10 +96,10 @@ "metric" : { "dcg": { }} } - - gt: {quality_level: 10.299674} - - lt: {quality_level: 10.299675} - - gt: {details.dcg_query_reverse.quality_level: 10.299674} - - lt: {details.dcg_query_reverse.quality_level: 10.299675} + - gt: {metric_score: 10.299674} + - lt: {metric_score: 10.299675} + - gt: {details.dcg_query_reverse.metric_score: 10.299674} + - lt: {details.dcg_query_reverse.metric_score: 10.299675} - match: {details.dcg_query_reverse.unrated_docs: [ ]} # if we mix both, we should get the average @@ -134,11 +134,11 @@ "metric" : { "dcg": { }} } - - gt: {quality_level: 12.073969} - - lt: {quality_level: 12.073970} - - gt: {details.dcg_query.quality_level: 13.848263} - - lt: {details.dcg_query.quality_level: 13.848264} + - gt: {metric_score: 12.073969} + - lt: {metric_score: 12.073970} + - gt: {details.dcg_query.metric_score: 13.848263} + - lt: {details.dcg_query.metric_score: 13.848264} - match: {details.dcg_query.unrated_docs: [ ]} - - gt: {details.dcg_query_reverse.quality_level: 10.299674} - - lt: {details.dcg_query_reverse.quality_level: 10.299675} + - gt: {details.dcg_query_reverse.metric_score: 10.299674} + - lt: {details.dcg_query_reverse.metric_score: 10.299675} - match: {details.dcg_query_reverse.unrated_docs: [ ]} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml index d6119ad3a9e..42627a2590e 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml @@ -34,8 +34,8 @@ "metric" : { "precision": { "ignore_unlabeled" : true }} } - - match: { quality_level: 1} - - match: { details.amsterdam_query.quality_level: 1.0} + - match: { metric_score: 1} + - match: { details.amsterdam_query.metric_score: 1.0} - match: { details.amsterdam_query.unrated_docs: [ ]} - match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml index 5e0082d213c..fef25c3fc41 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml @@ -84,7 +84,7 @@ setup: "metric" : { "precision": { }} } - - match: {quality_level: 0.9} + - match: {metric_score: 0.9} - match: {details.amsterdam_query.unrated_docs.0._id: "6"} --- diff --git a/x-pack/qa/core-rest-tests-with-security/src/test/resources/rest-api-spec/test/rankeval/10_rankeval.yml b/x-pack/qa/core-rest-tests-with-security/src/test/resources/rest-api-spec/test/rankeval/10_rankeval.yml index 6dae2bb2a67..47203779093 100644 --- a/x-pack/qa/core-rest-tests-with-security/src/test/resources/rest-api-spec/test/rankeval/10_rankeval.yml +++ b/x-pack/qa/core-rest-tests-with-security/src/test/resources/rest-api-spec/test/rankeval/10_rankeval.yml @@ -52,5 +52,5 @@ "metric" : { "precision": { "ignore_unlabeled" : true }} } - - match: { quality_level: 1 } + - match: { metric_score: 1 }