From 29b07bb6c4de5a59c0ade78c5565982674d5f5af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 3 Jan 2018 17:23:00 +0100 Subject: [PATCH] [Test] Fix scores for dcg in RankEvalRequestIT and RankEvalYamlIT Allow small deviations when asserting ranking scores, otherwise some tests break on floating point calculation differences e.g. when running on ARM. --- .../index/rankeval/RankEvalRequestIT.java | 8 +++---- .../rest-api-spec/test/rank_eval/10_basic.yml | 6 ++++-- .../rest-api-spec/test/rank_eval/20_dcg.yml | 21 ++++++++++++------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java index 1c10da61fa1..e0108a9e416 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java @@ -171,7 +171,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { builder.setRankEvalSpec(task); RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); - assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), Double.MIN_VALUE); + assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), 10E-14); // test that a different window size k affects the result metric = new DiscountedCumulativeGain(false, null, 3); @@ -182,7 +182,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { builder.setRankEvalSpec(task); response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); - assertEquals(12.392789260714371, response.getEvaluationResult(), Double.MIN_VALUE); + assertEquals(12.39278926071437, response.getEvaluationResult(), 10E-14); } public void testMRRRequest() { @@ -205,7 +205,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { // the expected reciprocal rank for the amsterdam_query is 1/5 // the expected reciprocal rank for the berlin_query is 1/1 // dividing by 2 to get the average - double expectedMRR = (1.0 / 1.0 + 1.0 / 5.0) / 2.0; + double expectedMRR = (1.0 + 1.0 / 5.0) / 2.0; assertEquals(expectedMRR, response.getEvaluationResult(), 0.0); // test that a different window size k affects the result @@ -220,7 +220,7 @@ public class RankEvalRequestIT extends ESIntegTestCase { // limiting to top 3 results, the amsterdam_query has no relevant document in it // the reciprocal rank for the berlin_query is 1/1 // dividing by 2 to get the average - expectedMRR = (1.0/ 1.0) / 2.0; + expectedMRR = 1.0 / 2.0; assertEquals(expectedMRR, response.getEvaluationResult(), 0.0); } diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml index dc91f63420c..a81df5fa3fa 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml @@ -152,8 +152,10 @@ } # average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663 - - match: {rank_eval.quality_level: 0.41666666666666663} - - match: {rank_eval.details.amsterdam_query.quality_level: 0.3333333333333333} + - gt: {rank_eval.quality_level: 0.416} + - lt: {rank_eval.quality_level: 0.417} + - gt: {rank_eval.details.amsterdam_query.quality_level: 0.333} + - lt: {rank_eval.details.amsterdam_query.quality_level: 0.334} - match: {rank_eval.details.amsterdam_query.metric_details: {"first_relevant": 3}} - match: {rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc2"}, {"_index": "foo", "_id": "doc3"} ]} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml index 37c0b5897f5..0aca6fdde9e 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml @@ -69,8 +69,10 @@ "metric" : { "dcg": {}} } - - match: {rank_eval.quality_level: 13.84826362927298} - - match: {rank_eval.details.dcg_query.quality_level: 13.84826362927298} + - gt: {rank_eval.quality_level: 13.848263 } + - lt: {rank_eval.quality_level: 13.848264 } + - gt: {rank_eval.details.dcg_query.quality_level: 13.848263} + - lt: {rank_eval.details.dcg_query.quality_level: 13.848264} - match: {rank_eval.details.dcg_query.unknown_docs: [ ]} # reverse the order in which the results are returned (less relevant docs first) @@ -94,8 +96,10 @@ "metric" : { "dcg": { }} } - - match: {rank_eval.quality_level: 10.29967439154499} - - match: {rank_eval.details.dcg_query_reverse.quality_level: 10.29967439154499} + - gt: {rank_eval.quality_level: 10.299674} + - lt: {rank_eval.quality_level: 10.299675} + - gt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299674} + - lt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299675} - match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]} # if we mix both, we should get the average @@ -130,8 +134,11 @@ "metric" : { "dcg": { }} } - - match: {rank_eval.quality_level: 12.073969010408984} - - match: {rank_eval.details.dcg_query.quality_level: 13.84826362927298} + - gt: {rank_eval.quality_level: 12.073969} + - lt: {rank_eval.quality_level: 12.073970} + - gt: {rank_eval.details.dcg_query.quality_level: 13.848263} + - lt: {rank_eval.details.dcg_query.quality_level: 13.848264} - match: {rank_eval.details.dcg_query.unknown_docs: [ ]} - - match: {rank_eval.details.dcg_query_reverse.quality_level: 10.29967439154499} + - gt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299674} + - lt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299675} - match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}