[ML] Fix bug, add tests, improve estimates for estimate_model_memory (#54508)

This PR: 1. Fixes the bug where a cardinality estimate of zero could cause a 500 status 2. Adds tests for that scenario and a few others 3. Adds sensible estimates for the cases that were previously TODO Backport of #54462
2020-03-31 17:59:38 +01:00 · 2020-03-31 17:59:38 +01:00 · b8f06df53f
parent 0b25e3b66c
commit b8f06df53f
4 changed files with 210 additions and 81 deletions
--- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle
+++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle
@ -96,6 +96,8 @@ integTest.runner {
    'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k',
    'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one',
    'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred',
+    'ml/estimate_model_memory/Test missing overall cardinality',
+    'ml/estimate_model_memory/Test missing max bucket cardinality',
    'ml/evaluate_data_frame/Test given missing index',
    'ml/evaluate_data_frame/Test given index does not exist',
    'ml/evaluate_data_frame/Test given missing evaluation',
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
@ -5,8 +5,6 @@
 */
 package org.elasticsearch.xpack.ml.action;

-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.HandledTransportAction;
@ -37,8 +35,6 @@ import java.util.Set;
 public class TransportEstimateModelMemoryAction
    extends HandledTransportAction<EstimateModelMemoryAction.Request, EstimateModelMemoryAction.Response> {

-    private static final Logger logger = LogManager.getLogger(TransportEstimateModelMemoryAction.class);
-
    static final ByteSizeValue BASIC_REQUIREMENT = new ByteSizeValue(10, ByteSizeUnit.MB);
    static final long BYTES_PER_INFLUENCER_VALUE = new ByteSizeValue(10, ByteSizeUnit.KB).getBytes();
    private static final long BYTES_IN_MB = new ByteSizeValue(1, ByteSizeUnit.MB).getBytes();
@ -67,16 +63,24 @@ public class TransportEstimateModelMemoryAction
    }

    static long calculateDetectorsRequirementBytes(AnalysisConfig analysisConfig, Map<String, Long> overallCardinality) {
-        return analysisConfig.getDetectors().stream().map(detector -> calculateDetectorRequirementBytes(detector, overallCardinality))
+        long bucketSpanSeconds = analysisConfig.getBucketSpan().getSeconds();
+        return analysisConfig.getDetectors().stream()
+            .map(detector -> calculateDetectorRequirementBytes(detector, bucketSpanSeconds, overallCardinality))
            .reduce(0L, TransportEstimateModelMemoryAction::addNonNegativeLongsWithMaxValueCap);
    }

-    static long calculateDetectorRequirementBytes(Detector detector, Map<String, Long> overallCardinality) {
+    @SuppressWarnings("fallthrough")
+    static long calculateDetectorRequirementBytes(Detector detector, long bucketSpanSeconds, Map<String, Long> overallCardinality) {

        long answer = 0;
+        boolean addFieldValueWorkspace = false;

        // These values for detectors assume splitting is via a partition field
        switch (detector.getFunction()) {
+            case DISTINCT_COUNT:
+            case LOW_DISTINCT_COUNT:
+            case HIGH_DISTINCT_COUNT:
+                addFieldValueWorkspace = true;
            case COUNT:
            case LOW_COUNT:
            case HIGH_COUNT:
@ -85,24 +89,14 @@ public class TransportEstimateModelMemoryAction
            case HIGH_NON_ZERO_COUNT:
                answer = new ByteSizeValue(32, ByteSizeUnit.KB).getBytes();
                break;
-            case DISTINCT_COUNT:
-            case LOW_DISTINCT_COUNT:
-            case HIGH_DISTINCT_COUNT:
-                answer = 1; // TODO add realistic number
-                break;
            case RARE:
            case FREQ_RARE:
-                answer = 1; // TODO add realistic number
+                answer = new ByteSizeValue(2, ByteSizeUnit.KB).getBytes();
                break;
            case INFO_CONTENT:
            case LOW_INFO_CONTENT:
            case HIGH_INFO_CONTENT:
-                answer = 1; // TODO add realistic number
-                break;
-            case METRIC:
-                // metric analyses mean, min and max simultaneously, and uses about 2.5 times the memory of one of these
-                answer = new ByteSizeValue(160, ByteSizeUnit.KB).getBytes();
-                break;
+                addFieldValueWorkspace = true;
            case MEAN:
            case LOW_MEAN:
            case HIGH_MEAN:
@ -117,48 +111,70 @@ public class TransportEstimateModelMemoryAction
            case NON_NULL_SUM:
            case LOW_NON_NULL_SUM:
            case HIGH_NON_NULL_SUM:
-            case MEDIAN:
-            case LOW_MEDIAN:
-            case HIGH_MEDIAN:
            case VARP:
            case LOW_VARP:
            case HIGH_VARP:
-                // 64 comes from https://github.com/elastic/kibana/issues/18722
+                answer = new ByteSizeValue(48, ByteSizeUnit.KB).getBytes();
+                break;
+            case METRIC:
+                // metric analyses mean, min and max simultaneously, and uses about 2.5 times the memory of one of these
+                answer = new ByteSizeValue(120, ByteSizeUnit.KB).getBytes();
+                break;
+            case MEDIAN:
+            case LOW_MEDIAN:
+            case HIGH_MEDIAN:
                answer = new ByteSizeValue(64, ByteSizeUnit.KB).getBytes();
                break;
            case TIME_OF_DAY:
            case TIME_OF_WEEK:
-                answer = 1; // TODO add realistic number
+                answer = new ByteSizeValue(10, ByteSizeUnit.KB).getBytes();
                break;
            case LAT_LONG:
-                answer = 1; // TODO add realistic number
+                answer = new ByteSizeValue(64, ByteSizeUnit.KB).getBytes();
                break;
            default:
                assert false : "unhandled detector function: " + detector.getFunction().getFullName();
        }

+        long partitionFieldCardinalityEstimate = 1;
+        String partitionFieldName = detector.getPartitionFieldName();
+        if (partitionFieldName != null) {
+            partitionFieldCardinalityEstimate = Math.max(1,
+                cardinalityEstimate(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName, overallCardinality, true));
+        }
+
        String byFieldName = detector.getByFieldName();
        if (byFieldName != null) {
-            long cardinalityEstimate =
+            long byFieldCardinalityEstimate =
                cardinalityEstimate(Detector.BY_FIELD_NAME_FIELD.getPreferredName(), byFieldName, overallCardinality, true);
+            // Assume the number of by field values in each partition reduces if the cardinality of both by and partition fields is high
            // The memory cost of a by field is about 2/3rds that of a partition field
-            long multiplier = addNonNegativeLongsWithMaxValueCap(cardinalityEstimate, 2) / 3 * 2;
-            answer = multiplyNonNegativeLongsWithMaxValueCap(answer, multiplier);
+            double multiplier =
+                Math.ceil(reducedCardinality(byFieldCardinalityEstimate, partitionFieldCardinalityEstimate, bucketSpanSeconds) * 2.0 / 3.0);
+            answer = multiplyNonNegativeLongsWithMaxValueCap(answer, (long) multiplier);
        }

        String overFieldName = detector.getOverFieldName();
        if (overFieldName != null) {
-            long cardinalityEstimate =
+            long overFieldCardinalityEstimate =
                cardinalityEstimate(Detector.OVER_FIELD_NAME_FIELD.getPreferredName(), overFieldName, overallCardinality, true);
-            // Over fields don't multiply the whole estimate, just add a small amount (estimate 512 bytes) per value
-            answer = addNonNegativeLongsWithMaxValueCap(answer, multiplyNonNegativeLongsWithMaxValueCap(cardinalityEstimate, 512));
+            // Assume the number of over field values in each partition reduces if the cardinality of both over and partition fields is high
+            double multiplier =
+                Math.ceil(reducedCardinality(overFieldCardinalityEstimate, partitionFieldCardinalityEstimate, bucketSpanSeconds));
+            // Over fields don't multiply the whole estimate, just add a small amount (estimate 768 bytes) per value
+            answer = addNonNegativeLongsWithMaxValueCap(answer, multiplyNonNegativeLongsWithMaxValueCap(768, (long) multiplier));
        }

-        String partitionFieldName = detector.getPartitionFieldName();
        if (partitionFieldName != null) {
-            long multiplier =
-                cardinalityEstimate(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName, overallCardinality, true);
-            answer = multiplyNonNegativeLongsWithMaxValueCap(answer, multiplier);
+            answer = multiplyNonNegativeLongsWithMaxValueCap(answer, partitionFieldCardinalityEstimate);
+        }
+
+        if (addFieldValueWorkspace) {
+            // The field value workspace should really be the maximum over all buckets of the
+            // length of all the distinct values of the function field concatenated in the bucket.
+            // However, that would be very expensive and complex for the caller to calculate so
+            // we just allow a fixed amount.
+            answer = addNonNegativeLongsWithMaxValueCap(answer, new ByteSizeValue(5, ByteSizeUnit.MB).getBytes());
        }

        return answer;
@ -180,11 +196,12 @@ public class TransportEstimateModelMemoryAction

    static long calculateCategorizationRequirementBytes(AnalysisConfig analysisConfig) {

-        if (analysisConfig.getCategorizationFieldName() != null) {
-            return 1; // TODO add realistic number
-        } else {
+        if (analysisConfig.getCategorizationFieldName() == null) {
            return 0;
        }
+        // 5MB is a pretty conservative estimate of the memory requirement for categorization.
+        // Often it is considerably less, but it's very hard to predict from simple statistics.
+        return new ByteSizeValue(5, ByteSizeUnit.MB).getBytes();
    }

    static long cardinalityEstimate(String description, String fieldName, Map<String, Long> suppliedCardinailityEstimates,
@ -197,9 +214,8 @@ public class TransportEstimateModelMemoryAction
        if (AnalysisConfig.ML_CATEGORY_FIELD.equals(fieldName)) {
            return isOverall ? 500 : 50;
        }
-        logger.warn("[{}] cardinality estimate required for [{}] [{}] but not supplied",
-            isOverall ? "Overall" : "Bucket max", description, fieldName);
-        return 0;
+        throw new IllegalArgumentException("[" + (isOverall ? "Overall" : "Bucket max") + "] cardinality estimate required for [" +
+            description + "] [" + fieldName + "] but not supplied");
    }

    static ByteSizeValue roundUpToNextMb(long bytes) {
@ -207,7 +223,28 @@ public class TransportEstimateModelMemoryAction
        return new ByteSizeValue(addNonNegativeLongsWithMaxValueCap(bytes, BYTES_IN_MB - 1) / BYTES_IN_MB, ByteSizeUnit.MB);
    }

-    private static long addNonNegativeLongsWithMaxValueCap(long a, long b) {
+    /**
+     * The idea here is to reduce a by or over field cardinality to reflect the likelihood that only a subset of by or
+     * over field values will exist in any partition in a given bucket.  The principles are:
+     * 1. The greater the partition field cardinality, the greater the reduction
+     * 2. The shorter the bucket span, the greater the reduction
+     * A partition field cardinality of 1 means no reduction.  (And remember usenull is effectively always true for partition
+     * fields, so there will be at least one partition even if the partition field doesn't exist in any input documents.)
+     * A bucket span of 15 minutes means the cardinality to be reduced is divided by approximately the square root of the
+     * smaller of the two cardinalities.
+     */
+    static double reducedCardinality(long cardinalityToReduce, long partitionFieldCardinalityEstimate, long bucketSpanSeconds) {
+        assert cardinalityToReduce >= 0 : "negative cardinality to reduce " + cardinalityToReduce;
+        assert partitionFieldCardinalityEstimate > 0 : "non-positive partition field cardinality " + partitionFieldCardinalityEstimate;
+        assert bucketSpanSeconds > 0 : "non-positive bucket span " + bucketSpanSeconds;
+        if (cardinalityToReduce == 0) {
+            return 0;
+        }
+        double power = Math.min(1.0, (Math.log10(bucketSpanSeconds) + 1.0) / 8.0);
+        return cardinalityToReduce / Math.pow(Math.min(cardinalityToReduce, partitionFieldCardinalityEstimate), power);
+    }
+
+    static long addNonNegativeLongsWithMaxValueCap(long a, long b) {
        assert a >= 0;
        assert b >= 0;
        if (Long.MAX_VALUE - a - b < 0) {
@ -216,9 +253,12 @@ public class TransportEstimateModelMemoryAction
        return a + b;
    }

-    private static long multiplyNonNegativeLongsWithMaxValueCap(long a, long b) {
+    static long multiplyNonNegativeLongsWithMaxValueCap(long a, long b) {
        assert a >= 0;
        assert b >= 0;
+        if (a == 0 || b == 0) {
+            return 0;
+        }
        if (Long.MAX_VALUE / a < b) {
            return Long.MAX_VALUE;
        }
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java
@ -18,6 +18,8 @@ import java.util.Map;

 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.lessThan;

 public class TransportEstimateModelMemoryActionTests extends ESTestCase {

@ -31,20 +33,20 @@ public class TransportEstimateModelMemoryActionTests extends ESTestCase {
        String function = randomFrom("mean", "min", "max", "sum");

        Detector noSplit = createDetector(function, "field", null, null, null);
-        assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(noSplit,
-            overallCardinality), is(65536L));
+        assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(noSplit, 900,
+            overallCardinality), is(49152L));

        Detector withByField = createDetector(function, "field", "buy", null, null);
-        assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByField,
-            overallCardinality), is(134 * 65536L));
+        assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByField, 900,
+            overallCardinality), is(134 * 49152L));

        Detector withPartitionField = createDetector(function, "field", null, null, "part");
-        assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withPartitionField,
-            overallCardinality), is(100 * 65536L));
+        assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withPartitionField, 900,
+            overallCardinality), is(100 * 49152L));

        Detector withByAndPartitionFields = createDetector(function, "field", "buy", null, "part");
-        assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByAndPartitionFields,
-            overallCardinality), is(134 * 100 * 65536L));
+        assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByAndPartitionFields, 900,
+            overallCardinality), is((long) Math.ceil(200 / Math.sqrt(100) * 2 / 3) * 100 * 49152L));
    }

    public void testCalculateInfluencerRequirementBytes() {
@ -81,7 +83,8 @@ public class TransportEstimateModelMemoryActionTests extends ESTestCase {
        assertThat(TransportEstimateModelMemoryAction.calculateCategorizationRequirementBytes(analysisConfigWithoutCategorization), is(0L));

        AnalysisConfig analysisConfigWithCategorization = createCountAnalysisConfig(randomAlphaOfLength(10), null);
-        assertThat(TransportEstimateModelMemoryAction.calculateCategorizationRequirementBytes(analysisConfigWithCategorization), is(1L));
+        assertThat(TransportEstimateModelMemoryAction.calculateCategorizationRequirementBytes(analysisConfigWithCategorization),
+            is(5L * 1024 * 1024));
    }

    public void testRoundUpToNextMb() {
@ -104,6 +107,55 @@ public class TransportEstimateModelMemoryActionTests extends ESTestCase {
            equalTo(new ByteSizeValue(Long.MAX_VALUE / new ByteSizeValue(1, ByteSizeUnit.MB).getBytes() , ByteSizeUnit.MB)));
    }

+    public void testReducedCardinality() {
+
+        long cardinalityToReduce = randomIntBetween(1001, Integer.MAX_VALUE);
+        long saneBucketSpan = randomFrom(1, 30, 60, 300, 600, 900, 1800, 3600, 10800, 21600, 43200, 86400);
+
+        assertThat(TransportEstimateModelMemoryAction.reducedCardinality(0, randomNonNegativeLong(), saneBucketSpan),
+            closeTo(0.0, 1e-15));
+        assertThat(TransportEstimateModelMemoryAction.reducedCardinality(cardinalityToReduce, 1, saneBucketSpan),
+            closeTo(cardinalityToReduce, 1e-6));
+        assertThat(TransportEstimateModelMemoryAction.reducedCardinality(cardinalityToReduce, 1000, 900),
+            closeTo(cardinalityToReduce / Math.sqrt(1000), cardinalityToReduce / 20.0));
+        assertThat(TransportEstimateModelMemoryAction.reducedCardinality(
+            cardinalityToReduce, randomIntBetween(2, Integer.MAX_VALUE), saneBucketSpan),
+            lessThan((double) cardinalityToReduce));
+        assertThat(TransportEstimateModelMemoryAction.reducedCardinality(cardinalityToReduce, 1000, 10000000),
+            closeTo(cardinalityToReduce / 1000.0, 1e-4));
+    }
+
+    public void testAddNonNegativeLongsWithMaxValueCap() {
+
+        assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(0, 0), is(0L));
+        assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(0, 1), is(1L));
+        assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(1, 0), is(1L));
+        assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(1, 1), is(2L));
+        assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(Long.MAX_VALUE, Long.MAX_VALUE),
+            is(Long.MAX_VALUE));
+        assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(
+            Long.MAX_VALUE - randomIntBetween(1, Integer.MAX_VALUE), Long.MAX_VALUE - randomIntBetween(1, Integer.MAX_VALUE)),
+            is(Long.MAX_VALUE));
+    }
+
+    public void testMultiplyNonNegativeLongsWithMaxValueCap() {
+
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(0, 0), is(0L));
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(randomNonNegativeLong(), 0), is(0L));
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(0, randomNonNegativeLong()), is(0L));
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(1, 1), is(1L));
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(Long.MAX_VALUE, Long.MAX_VALUE),
+            is(Long.MAX_VALUE));
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(
+            Long.MAX_VALUE, Math.max(1L, randomNonNegativeLong())),
+            is(Long.MAX_VALUE));
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(
+            Math.max(1L, randomNonNegativeLong()), Long.MAX_VALUE),
+            is(Long.MAX_VALUE));
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(0, Long.MAX_VALUE), is(0L));
+        assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(Long.MAX_VALUE, 0), is(0L));
+    }
+
    public static Detector createDetector(String function, String fieldName, String byFieldName,
                                          String overFieldName, String partitionFieldName) {

--- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/estimate_model_memory.yml
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/estimate_model_memory.yml
@ -5,14 +5,14 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}]
            },
            "overall_cardinality": {
              "airline": 50000
            }
          }
-  - match: { model_memory_estimate: "2094mb" }
+  - match: { model_memory_estimate: "1573mb" }

 ---
 "Test by field also influencer":
@ -21,7 +21,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}],
              "influencers": [ "airline" ]
            },
@ -32,7 +32,7 @@
              "airline": 500
            }
          }
-  - match: { model_memory_estimate: "2094mb" }
+  - match: { model_memory_estimate: "1573mb" }

 ---
 "Test by field with independent influencer":
@ -41,7 +41,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}],
              "influencers": [ "country" ]
            },
@ -52,7 +52,7 @@
              "country": 500
            }
          }
-  - match: { model_memory_estimate: "2099mb" }
+  - match: { model_memory_estimate: "1578mb" }

 ---
 "Test over field":
@ -61,14 +61,14 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}]
            },
            "overall_cardinality": {
              "airline": 50000
            }
          }
-  - match: { model_memory_estimate: "35mb" }
+  - match: { model_memory_estimate: "47mb" }

 ---
 "Test over field also influencer":
@ -77,7 +77,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}],
              "influencers": [ "airline" ]
            },
@ -88,7 +88,7 @@
              "airline": 500
            }
          }
-  - match: { model_memory_estimate: "35mb" }
+  - match: { model_memory_estimate: "47mb" }

 ---
 "Test over field with independent influencer":
@ -97,7 +97,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}],
              "influencers": [ "country" ]
            },
@ -108,7 +108,7 @@
              "country": 500
            }
          }
-  - match: { model_memory_estimate: "40mb" }
+  - match: { model_memory_estimate: "52mb" }

 ---
 "Test partition field":
@ -117,14 +117,14 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "partition_field_name": "airline"}]
            },
            "overall_cardinality": {
              "airline": 50000
            }
          }
-  - match: { model_memory_estimate: "3135mb" }
+  - match: { model_memory_estimate: "2354mb" }

 ---
 "Test partition field also influencer":
@ -133,7 +133,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "partition_field_name": "airline"}],
              "influencers": [ "airline" ]
            },
@ -144,7 +144,7 @@
              "airline": 500
            }
          }
-  - match: { model_memory_estimate: "3135mb" }
+  - match: { model_memory_estimate: "2354mb" }

 ---
 "Test partition field with independent influencer":
@ -153,7 +153,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "partition_field_name": "airline"}],
              "influencers": [ "country" ]
            },
@ -164,7 +164,7 @@
              "country": 500
            }
          }
-  - match: { model_memory_estimate: "3140mb" }
+  - match: { model_memory_estimate: "2359mb" }

 ---
 "Test by and partition field":
@ -173,7 +173,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline", "partition_field_name": "country"}]
            },
            "overall_cardinality": {
@ -181,7 +181,7 @@
              "country": 600
            }
          }
-  - match: { model_memory_estimate: "100060mb" }
+  - match: { model_memory_estimate: "3189mb" }

 ---
 "Test by and partition fields also influencers":
@ -190,7 +190,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline", "partition_field_name": "country"}],
              "influencers": [ "airline", "country" ]
            },
@ -203,7 +203,7 @@
              "country": 40
            }
          }
-  - match: { model_memory_estimate: "100060mb" }
+  - match: { model_memory_estimate: "3189mb" }

 ---
 "Test by and partition fields with independent influencer":
@ -212,7 +212,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline", "partition_field_name": "country"}],
              "influencers": [ "src_ip" ]
            },
@ -224,7 +224,7 @@
              "src_ip": 500
            }
          }
-  - match: { model_memory_estimate: "100065mb" }
+  - match: { model_memory_estimate: "3194mb" }

 ---
 "Test over and partition field":
@ -233,7 +233,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}]
            },
            "overall_cardinality": {
@ -241,7 +241,7 @@
              "country": 600
            }
          }
-  - match: { model_memory_estimate: "1220mb" }
+  - match: { model_memory_estimate: "113mb" }

 ---
 "Test over and partition fields also influencers":
@ -250,7 +250,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}],
              "influencers": [ "airline", "country" ]
            },
@ -263,7 +263,7 @@
              "country": 40
            }
          }
-  - match: { model_memory_estimate: "1220mb" }
+  - match: { model_memory_estimate: "113mb" }

 ---
 "Test over and partition fields with independent influencer":
@ -272,7 +272,7 @@
        body: >
          {
            "analysis_config": {
-              "bucket_span": "1h",
+              "bucket_span": "15m",
              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}],
              "influencers": [ "src_ip" ]
            },
@ -284,5 +284,40 @@
              "src_ip": 500
            }
          }
-  - match: { model_memory_estimate: "1225mb" }
+  - match: { model_memory_estimate: "118mb" }

+---
+"Test missing overall cardinality":
+  - do:
+      catch: /\[Overall\] cardinality estimate required for \[by_field_name\] \[airline\] but not supplied/
+      ml.estimate_model_memory:
+        body: >
+          {
+            "analysis_config": {
+              "bucket_span": "15m",
+              "detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}]
+            },
+            "overall_cardinality": {
+              "wrong": 50000
+            }
+          }
+
+---
+"Test missing max bucket cardinality":
+  - do:
+      catch: /\[Bucket max\] cardinality estimate required for \[influencers\] \[country\] but not supplied/
+      ml.estimate_model_memory:
+        body: >
+          {
+            "analysis_config": {
+              "bucket_span": "15m",
+              "detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}],
+              "influencers": [ "country" ]
+            },
+            "overall_cardinality": {
+              "airline": 50000
+            },
+            "max_bucket_cardinality": {
+              "wrong": 500
+            }
+          }