[Test] Fix scores for dcg in RankEvalRequestIT and RankEvalYamlIT

Allow small deviations when asserting ranking scores, otherwise some tests break on floating point calculation differences e.g. when running on ARM.
2018-01-03 17:23:00 +01:00 · 2018-01-03 17:23:00 +01:00 · 29b07bb6c4
parent 556d77c9ad
commit 29b07bb6c4
3 changed files with 22 additions and 13 deletions
--- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java
+++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java
@ -171,7 +171,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
        builder.setRankEvalSpec(task);

        RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
-        assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), Double.MIN_VALUE);
+        assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), 10E-14);

        // test that a different window size k affects the result
        metric = new DiscountedCumulativeGain(false, null, 3);
@ -182,7 +182,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
        builder.setRankEvalSpec(task);

        response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
-        assertEquals(12.392789260714371, response.getEvaluationResult(), Double.MIN_VALUE);
+        assertEquals(12.39278926071437, response.getEvaluationResult(), 10E-14);
    }

    public void testMRRRequest() {
@ -205,7 +205,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
        // the expected reciprocal rank for the amsterdam_query is 1/5
        // the expected reciprocal rank for the berlin_query is 1/1
        // dividing by 2 to get the average
-        double expectedMRR = (1.0 / 1.0 + 1.0 / 5.0) / 2.0;
+        double expectedMRR = (1.0 + 1.0 / 5.0) / 2.0;
        assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);

        // test that a different window size k affects the result
@ -220,7 +220,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
        // limiting to top 3 results, the amsterdam_query has no relevant document in it
        // the reciprocal rank for the berlin_query is 1/1
        // dividing by 2 to get the average
-        expectedMRR = (1.0/ 1.0) / 2.0;
+        expectedMRR = 1.0 / 2.0;
        assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
    }

--- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml
+++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml
@ -152,8 +152,10 @@
        }

 # average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
-  - match: {rank_eval.quality_level: 0.41666666666666663}
-  - match: {rank_eval.details.amsterdam_query.quality_level: 0.3333333333333333}
+  - gt: {rank_eval.quality_level: 0.416}
+  - lt: {rank_eval.quality_level: 0.417}
+  - gt: {rank_eval.details.amsterdam_query.quality_level: 0.333}
+  - lt: {rank_eval.details.amsterdam_query.quality_level: 0.334}
  - match: {rank_eval.details.amsterdam_query.metric_details: {"first_relevant": 3}}
  - match: {rank_eval.details.amsterdam_query.unknown_docs:  [ {"_index": "foo", "_id": "doc2"},
                                                               {"_index": "foo", "_id": "doc3"} ]}
--- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml
+++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml
@ -69,8 +69,10 @@
          "metric" : { "dcg": {}}
        }

-  - match: {rank_eval.quality_level: 13.84826362927298}
-  - match: {rank_eval.details.dcg_query.quality_level: 13.84826362927298}
+  - gt: {rank_eval.quality_level: 13.848263 }
+  - lt: {rank_eval.quality_level: 13.848264 }
+  - gt: {rank_eval.details.dcg_query.quality_level: 13.848263}
+  - lt: {rank_eval.details.dcg_query.quality_level: 13.848264}
  - match: {rank_eval.details.dcg_query.unknown_docs: [ ]}

 # reverse the order in which the results are returned (less relevant docs first)
@ -94,8 +96,10 @@
          "metric" : { "dcg": { }}
        }

-  - match: {rank_eval.quality_level: 10.29967439154499}
-  - match: {rank_eval.details.dcg_query_reverse.quality_level: 10.29967439154499}
+  - gt: {rank_eval.quality_level: 10.299674}
+  - lt: {rank_eval.quality_level: 10.299675}
+  - gt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299674}
+  - lt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299675}
  - match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}

 # if we mix both, we should get the average
@ -130,8 +134,11 @@
          "metric" : { "dcg": { }}
        }

-  - match: {rank_eval.quality_level: 12.073969010408984}
-  - match: {rank_eval.details.dcg_query.quality_level: 13.84826362927298}
+  - gt: {rank_eval.quality_level: 12.073969}
+  - lt: {rank_eval.quality_level: 12.073970}
+  - gt: {rank_eval.details.dcg_query.quality_level: 13.848263}
+  - lt: {rank_eval.details.dcg_query.quality_level: 13.848264}
  - match: {rank_eval.details.dcg_query.unknown_docs: [ ]}
-  - match: {rank_eval.details.dcg_query_reverse.quality_level: 10.29967439154499}
+  - gt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299674}
+  - lt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299675}
  - match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}