[7.x][ML] Avoid classification integ test training on single class (#50072) (#50078)

The `ClassificationIT.testTwoJobsWithSameRandomizeSeedUseSameTrainingSet` test was previously set up to just have 10 rows. With `training_percent` of 50%, only 5 rows will be used for training. There is a good chance that all 5 rows will be of one class which results to failure. This commit increases the rows to 100. Now 50 rows should be used for training and the chance of failure should be very small. Backport of #50072
2019-12-11 18:50:26 +02:00 · 2019-12-11 18:50:26 +02:00 · 03ecaae221
parent d19c8db4e4
commit 03ecaae221
2 changed files with 5 additions and 2 deletions
--- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java
+++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java
@ -273,7 +273,10 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
    public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception {
        String sourceIndex = "classification_two_jobs_with_same_randomize_seed_source";
        String dependentVariable = KEYWORD_FIELD;
-        indexData(sourceIndex, 10, 0, dependentVariable);
+
+        // We use 100 rows as we can't set this too low. If too low it is possible
+        // we only train with rows of one of the two classes which leads to a failure.
+        indexData(sourceIndex, 100, 0, dependentVariable);

        String firstJobId = "classification_two_jobs_with_same_randomize_seed_1";
        String firstJobDestIndex = firstJobId + "_dest";
--- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java
+++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java
@ -259,7 +259,7 @@ abstract class MlNativeDataFrameAnalyticsIntegTestCase extends MlNativeIntegTest

    protected static Set<String> getTrainingRowsIds(String index) {
        Set<String> trainingRowsIds = new HashSet<>();
-        SearchResponse hits = client().prepareSearch(index).get();
+        SearchResponse hits = client().prepareSearch(index).setSize(10000).get();
        for (SearchHit hit : hits.getHits()) {
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();
            assertThat(sourceAsMap.containsKey("ml"), is(true));