[Test] Fix scores for dcg in RankEvalRequestIT and RankEvalYamlIT
Allow small deviations when asserting ranking scores, otherwise some tests break on floating point calculation differences e.g. when running on ARM.
This commit is contained in:
parent
556d77c9ad
commit
29b07bb6c4
|
@ -171,7 +171,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
||||||
builder.setRankEvalSpec(task);
|
builder.setRankEvalSpec(task);
|
||||||
|
|
||||||
RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
|
RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
|
||||||
assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), Double.MIN_VALUE);
|
assertEquals(DiscountedCumulativeGainTests.EXPECTED_DCG, response.getEvaluationResult(), 10E-14);
|
||||||
|
|
||||||
// test that a different window size k affects the result
|
// test that a different window size k affects the result
|
||||||
metric = new DiscountedCumulativeGain(false, null, 3);
|
metric = new DiscountedCumulativeGain(false, null, 3);
|
||||||
|
@ -182,7 +182,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
||||||
builder.setRankEvalSpec(task);
|
builder.setRankEvalSpec(task);
|
||||||
|
|
||||||
response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
|
response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
|
||||||
assertEquals(12.392789260714371, response.getEvaluationResult(), Double.MIN_VALUE);
|
assertEquals(12.39278926071437, response.getEvaluationResult(), 10E-14);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMRRRequest() {
|
public void testMRRRequest() {
|
||||||
|
@ -205,7 +205,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
||||||
// the expected reciprocal rank for the amsterdam_query is 1/5
|
// the expected reciprocal rank for the amsterdam_query is 1/5
|
||||||
// the expected reciprocal rank for the berlin_query is 1/1
|
// the expected reciprocal rank for the berlin_query is 1/1
|
||||||
// dividing by 2 to get the average
|
// dividing by 2 to get the average
|
||||||
double expectedMRR = (1.0 / 1.0 + 1.0 / 5.0) / 2.0;
|
double expectedMRR = (1.0 + 1.0 / 5.0) / 2.0;
|
||||||
assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
|
assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
|
||||||
|
|
||||||
// test that a different window size k affects the result
|
// test that a different window size k affects the result
|
||||||
|
@ -220,7 +220,7 @@ public class RankEvalRequestIT extends ESIntegTestCase {
|
||||||
// limiting to top 3 results, the amsterdam_query has no relevant document in it
|
// limiting to top 3 results, the amsterdam_query has no relevant document in it
|
||||||
// the reciprocal rank for the berlin_query is 1/1
|
// the reciprocal rank for the berlin_query is 1/1
|
||||||
// dividing by 2 to get the average
|
// dividing by 2 to get the average
|
||||||
expectedMRR = (1.0/ 1.0) / 2.0;
|
expectedMRR = 1.0 / 2.0;
|
||||||
assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
|
assertEquals(expectedMRR, response.getEvaluationResult(), 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -152,8 +152,10 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
|
# average is (1/3 + 1/2)/2 = 5/12 ~ 0.41666666666666663
|
||||||
- match: {rank_eval.quality_level: 0.41666666666666663}
|
- gt: {rank_eval.quality_level: 0.416}
|
||||||
- match: {rank_eval.details.amsterdam_query.quality_level: 0.3333333333333333}
|
- lt: {rank_eval.quality_level: 0.417}
|
||||||
|
- gt: {rank_eval.details.amsterdam_query.quality_level: 0.333}
|
||||||
|
- lt: {rank_eval.details.amsterdam_query.quality_level: 0.334}
|
||||||
- match: {rank_eval.details.amsterdam_query.metric_details: {"first_relevant": 3}}
|
- match: {rank_eval.details.amsterdam_query.metric_details: {"first_relevant": 3}}
|
||||||
- match: {rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc2"},
|
- match: {rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc2"},
|
||||||
{"_index": "foo", "_id": "doc3"} ]}
|
{"_index": "foo", "_id": "doc3"} ]}
|
||||||
|
|
|
@ -69,8 +69,10 @@
|
||||||
"metric" : { "dcg": {}}
|
"metric" : { "dcg": {}}
|
||||||
}
|
}
|
||||||
|
|
||||||
- match: {rank_eval.quality_level: 13.84826362927298}
|
- gt: {rank_eval.quality_level: 13.848263 }
|
||||||
- match: {rank_eval.details.dcg_query.quality_level: 13.84826362927298}
|
- lt: {rank_eval.quality_level: 13.848264 }
|
||||||
|
- gt: {rank_eval.details.dcg_query.quality_level: 13.848263}
|
||||||
|
- lt: {rank_eval.details.dcg_query.quality_level: 13.848264}
|
||||||
- match: {rank_eval.details.dcg_query.unknown_docs: [ ]}
|
- match: {rank_eval.details.dcg_query.unknown_docs: [ ]}
|
||||||
|
|
||||||
# reverse the order in which the results are returned (less relevant docs first)
|
# reverse the order in which the results are returned (less relevant docs first)
|
||||||
|
@ -94,8 +96,10 @@
|
||||||
"metric" : { "dcg": { }}
|
"metric" : { "dcg": { }}
|
||||||
}
|
}
|
||||||
|
|
||||||
- match: {rank_eval.quality_level: 10.29967439154499}
|
- gt: {rank_eval.quality_level: 10.299674}
|
||||||
- match: {rank_eval.details.dcg_query_reverse.quality_level: 10.29967439154499}
|
- lt: {rank_eval.quality_level: 10.299675}
|
||||||
|
- gt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299674}
|
||||||
|
- lt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299675}
|
||||||
- match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}
|
- match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}
|
||||||
|
|
||||||
# if we mix both, we should get the average
|
# if we mix both, we should get the average
|
||||||
|
@ -130,8 +134,11 @@
|
||||||
"metric" : { "dcg": { }}
|
"metric" : { "dcg": { }}
|
||||||
}
|
}
|
||||||
|
|
||||||
- match: {rank_eval.quality_level: 12.073969010408984}
|
- gt: {rank_eval.quality_level: 12.073969}
|
||||||
- match: {rank_eval.details.dcg_query.quality_level: 13.84826362927298}
|
- lt: {rank_eval.quality_level: 12.073970}
|
||||||
|
- gt: {rank_eval.details.dcg_query.quality_level: 13.848263}
|
||||||
|
- lt: {rank_eval.details.dcg_query.quality_level: 13.848264}
|
||||||
- match: {rank_eval.details.dcg_query.unknown_docs: [ ]}
|
- match: {rank_eval.details.dcg_query.unknown_docs: [ ]}
|
||||||
- match: {rank_eval.details.dcg_query_reverse.quality_level: 10.29967439154499}
|
- gt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299674}
|
||||||
|
- lt: {rank_eval.details.dcg_query_reverse.quality_level: 10.299675}
|
||||||
- match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}
|
- match: {rank_eval.details.dcg_query_reverse.unknown_docs: [ ]}
|
||||||
|
|
Loading…
Reference in New Issue