[Test] Fix scores for dcg in RankEvalRequestIT and RankEvalYamlIT

Allow small deviations when asserting ranking scores, otherwise some tests break
on floating point calculation differences e.g. when running on ARM.
This commit is contained in:
Christoph Büscher 2018-01-03 17:23:00 +01:00
parent 556d77c9ad
commit 29b07bb6c4
3 changed files with 22 additions and 13 deletions

View File

@ -171,7 +171,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
builder.setRankEvalSpec(task); builder.setRankEvalSpec(task);
RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), Double.MIN_VALUE); assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), 10E-14);
// test that a different window size k affects the result // test that a different window size k affects the result
metric = new DiscountedCumulativeGain(false, null, 3); metric = new DiscountedCumulativeGain(false, null, 3);
@ -182,7 +182,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
builder.setRankEvalSpec(task); builder.setRankEvalSpec(task);
response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
assertEquals(12.392789260714371, response.getEvaluationResult(), Double.MIN_VALUE); assertEquals(12.39278926071437, response.getEvaluationResult(), 10E-14);
} }
public void testMRRRequest() { public void testMRRRequest() {
@ -205,7 +205,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
// the expected reciprocal rank for the amsterdam_query is 1/5 // the expected reciprocal rank for the amsterdam_query is 1/5
// the expected reciprocal rank for the berlin_query is 1/1 // the expected reciprocal rank for the berlin_query is 1/1
// dividing by 2 to get the average // dividing by 2 to get the average
double expectedMRR = (1.0 / 1.0 + 1.0 / 5.0) / 2.0; double expectedMRR = (1.0 + 1.0 / 5.0) / 2.0;
assertEquals(expectedMRR, response.getEvaluationResult(), 0.0); assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
// test that a different window size k affects the result // test that a different window size k affects the result
@ -220,7 +220,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
// limiting to top 3 results, the amsterdam_query has no relevant document in it // limiting to top 3 results, the amsterdam_query has no relevant document in it
// the reciprocal rank for the berlin_query is 1/1 // the reciprocal rank for the berlin_query is 1/1
// dividing by 2 to get the average // dividing by 2 to get the average
expectedMRR = (1.0/ 1.0) / 2.0; expectedMRR = 1.0 / 2.0;
assertEquals(expectedMRR, response.getEvaluationResult(), 0.0); assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
} }

View File

@ -152,8 +152,10 @@
} }
# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663 # average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
- match: {rank_eval.quality_level: 0.41666666666666663} - gt: {rank_eval.quality_level: 0.416}
- match: {rank_eval.details.amsterdam_query.quality_level: 0.3333333333333333} - lt: {rank_eval.quality_level: 0.417}
- gt: {rank_eval.details.amsterdam_query.quality_level: 0.333}
- lt: {rank_eval.details.amsterdam_query.quality_level: 0.334}
- match: {rank_eval.details.amsterdam_query.metric_details: {"first_relevant": 3}} - match: {rank_eval.details.amsterdam_query.metric_details: {"first_relevant": 3}}
- match: {rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc2"}, - match: {rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc2"},
{"_index": "foo", "_id": "doc3"} ]} {"_index": "foo", "_id": "doc3"} ]}

View File

@ -69,8 +69,10 @@
"metric" : { "dcg": {}} "metric" : { "dcg": {}}
} }
- match: {rank_eval.quality_level: 13.84826362927298} - gt: {rank_eval.quality_level: 13.848263 }
- match: {rank_eval.details.dcg_query.quality_level: 13.84826362927298} - lt: {rank_eval.quality_level: 13.848264 }
- gt: {rank_eval.details.dcg_query.quality_level: 13.848263}
- lt: {rank_eval.details.dcg_query.quality_level: 13.848264}
- match: {rank_eval.details.dcg_query.unknown_docs: [ ]} - match: {rank_eval.details.dcg_query.unknown_docs: [ ]}
# reverse the order in which the results are returned (less relevant docs first) # reverse the order in which the results are returned (less relevant docs first)
@ -94,8 +96,10 @@
"metric" : { "dcg": { }} "metric" : { "dcg": { }}
} }
- match: {rank_eval.quality_level: 10.29967439154499} - gt: {rank_eval.quality_level: 10.299674}
- match: {rank_eval.details.dcg_query_reverse.quality_level: 10.29967439154499} - lt: {rank_eval.quality_level: 10.299675}
- gt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299674}
- lt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299675}
- match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]} - match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}
# if we mix both, we should get the average # if we mix both, we should get the average
@ -130,8 +134,11 @@
"metric" : { "dcg": { }} "metric" : { "dcg": { }}
} }
- match: {rank_eval.quality_level: 12.073969010408984} - gt: {rank_eval.quality_level: 12.073969}
- match: {rank_eval.details.dcg_query.quality_level: 13.84826362927298} - lt: {rank_eval.quality_level: 12.073970}
- gt: {rank_eval.details.dcg_query.quality_level: 13.848263}
- lt: {rank_eval.details.dcg_query.quality_level: 13.848264}
- match: {rank_eval.details.dcg_query.unknown_docs: [ ]} - match: {rank_eval.details.dcg_query.unknown_docs: [ ]}
- match: {rank_eval.details.dcg_query_reverse.quality_level: 10.29967439154499} - gt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299674}
- lt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299675}
- match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]} - match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}