[ML] Fix bug, add tests, improve estimates for estimate_model_memory (#54508)

This PR:

1. Fixes the bug where a cardinality estimate of zero could cause
   a 500 status
2. Adds tests for that scenario and a few others
3. Adds sensible estimates for the cases that were previously TODO

Backport of #54462
This commit is contained in:
David Roberts 2020-03-31 17:59:38 +01:00 committed by GitHub
parent 0b25e3b66c
commit b8f06df53f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 210 additions and 81 deletions

View File

@ -96,6 +96,8 @@ integTest.runner {
'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k',
'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one',
'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred',
'ml/estimate_model_memory/Test missing overall cardinality',
'ml/estimate_model_memory/Test missing max bucket cardinality',
'ml/evaluate_data_frame/Test given missing index',
'ml/evaluate_data_frame/Test given index does not exist',
'ml/evaluate_data_frame/Test given missing evaluation',

View File

@ -5,8 +5,6 @@
*/
package org.elasticsearch.xpack.ml.action;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.HandledTransportAction;
@ -37,8 +35,6 @@ import java.util.Set;
public class TransportEstimateModelMemoryAction
extends HandledTransportAction<EstimateModelMemoryAction.Request, EstimateModelMemoryAction.Response> {
private static final Logger logger = LogManager.getLogger(TransportEstimateModelMemoryAction.class);
static final ByteSizeValue BASIC_REQUIREMENT = new ByteSizeValue(10, ByteSizeUnit.MB);
static final long BYTES_PER_INFLUENCER_VALUE = new ByteSizeValue(10, ByteSizeUnit.KB).getBytes();
private static final long BYTES_IN_MB = new ByteSizeValue(1, ByteSizeUnit.MB).getBytes();
@ -67,16 +63,24 @@ public class TransportEstimateModelMemoryAction
}
static long calculateDetectorsRequirementBytes(AnalysisConfig analysisConfig, Map<String, Long> overallCardinality) {
return analysisConfig.getDetectors().stream().map(detector -> calculateDetectorRequirementBytes(detector, overallCardinality))
long bucketSpanSeconds = analysisConfig.getBucketSpan().getSeconds();
return analysisConfig.getDetectors().stream()
.map(detector -> calculateDetectorRequirementBytes(detector, bucketSpanSeconds, overallCardinality))
.reduce(0L, TransportEstimateModelMemoryAction::addNonNegativeLongsWithMaxValueCap);
}
static long calculateDetectorRequirementBytes(Detector detector, Map<String, Long> overallCardinality) {
@SuppressWarnings("fallthrough")
static long calculateDetectorRequirementBytes(Detector detector, long bucketSpanSeconds, Map<String, Long> overallCardinality) {
long answer = 0;
boolean addFieldValueWorkspace = false;
// These values for detectors assume splitting is via a partition field
switch (detector.getFunction()) {
case DISTINCT_COUNT:
case LOW_DISTINCT_COUNT:
case HIGH_DISTINCT_COUNT:
addFieldValueWorkspace = true;
case COUNT:
case LOW_COUNT:
case HIGH_COUNT:
@ -85,24 +89,14 @@ public class TransportEstimateModelMemoryAction
case HIGH_NON_ZERO_COUNT:
answer = new ByteSizeValue(32, ByteSizeUnit.KB).getBytes();
break;
case DISTINCT_COUNT:
case LOW_DISTINCT_COUNT:
case HIGH_DISTINCT_COUNT:
answer = 1; // TODO add realistic number
break;
case RARE:
case FREQ_RARE:
answer = 1; // TODO add realistic number
answer = new ByteSizeValue(2, ByteSizeUnit.KB).getBytes();
break;
case INFO_CONTENT:
case LOW_INFO_CONTENT:
case HIGH_INFO_CONTENT:
answer = 1; // TODO add realistic number
break;
case METRIC:
// metric analyses mean, min and max simultaneously, and uses about 2.5 times the memory of one of these
answer = new ByteSizeValue(160, ByteSizeUnit.KB).getBytes();
break;
addFieldValueWorkspace = true;
case MEAN:
case LOW_MEAN:
case HIGH_MEAN:
@ -117,48 +111,70 @@ public class TransportEstimateModelMemoryAction
case NON_NULL_SUM:
case LOW_NON_NULL_SUM:
case HIGH_NON_NULL_SUM:
case MEDIAN:
case LOW_MEDIAN:
case HIGH_MEDIAN:
case VARP:
case LOW_VARP:
case HIGH_VARP:
// 64 comes from https://github.com/elastic/kibana/issues/18722
answer = new ByteSizeValue(48, ByteSizeUnit.KB).getBytes();
break;
case METRIC:
// metric analyses mean, min and max simultaneously, and uses about 2.5 times the memory of one of these
answer = new ByteSizeValue(120, ByteSizeUnit.KB).getBytes();
break;
case MEDIAN:
case LOW_MEDIAN:
case HIGH_MEDIAN:
answer = new ByteSizeValue(64, ByteSizeUnit.KB).getBytes();
break;
case TIME_OF_DAY:
case TIME_OF_WEEK:
answer = 1; // TODO add realistic number
answer = new ByteSizeValue(10, ByteSizeUnit.KB).getBytes();
break;
case LAT_LONG:
answer = 1; // TODO add realistic number
answer = new ByteSizeValue(64, ByteSizeUnit.KB).getBytes();
break;
default:
assert false : "unhandled detector function: " + detector.getFunction().getFullName();
}
long partitionFieldCardinalityEstimate = 1;
String partitionFieldName = detector.getPartitionFieldName();
if (partitionFieldName != null) {
partitionFieldCardinalityEstimate = Math.max(1,
cardinalityEstimate(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName, overallCardinality, true));
}
String byFieldName = detector.getByFieldName();
if (byFieldName != null) {
long cardinalityEstimate =
long byFieldCardinalityEstimate =
cardinalityEstimate(Detector.BY_FIELD_NAME_FIELD.getPreferredName(), byFieldName, overallCardinality, true);
// Assume the number of by field values in each partition reduces if the cardinality of both by and partition fields is high
// The memory cost of a by field is about 2/3rds that of a partition field
long multiplier = addNonNegativeLongsWithMaxValueCap(cardinalityEstimate, 2) / 3 * 2;
answer = multiplyNonNegativeLongsWithMaxValueCap(answer, multiplier);
double multiplier =
Math.ceil(reducedCardinality(byFieldCardinalityEstimate, partitionFieldCardinalityEstimate, bucketSpanSeconds) * 2.0 / 3.0);
answer = multiplyNonNegativeLongsWithMaxValueCap(answer, (long) multiplier);
}
String overFieldName = detector.getOverFieldName();
if (overFieldName != null) {
long cardinalityEstimate =
long overFieldCardinalityEstimate =
cardinalityEstimate(Detector.OVER_FIELD_NAME_FIELD.getPreferredName(), overFieldName, overallCardinality, true);
// Over fields don't multiply the whole estimate, just add a small amount (estimate 512 bytes) per value
answer = addNonNegativeLongsWithMaxValueCap(answer, multiplyNonNegativeLongsWithMaxValueCap(cardinalityEstimate, 512));
// Assume the number of over field values in each partition reduces if the cardinality of both over and partition fields is high
double multiplier =
Math.ceil(reducedCardinality(overFieldCardinalityEstimate, partitionFieldCardinalityEstimate, bucketSpanSeconds));
// Over fields don't multiply the whole estimate, just add a small amount (estimate 768 bytes) per value
answer = addNonNegativeLongsWithMaxValueCap(answer, multiplyNonNegativeLongsWithMaxValueCap(768, (long) multiplier));
}
String partitionFieldName = detector.getPartitionFieldName();
if (partitionFieldName != null) {
long multiplier =
cardinalityEstimate(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName, overallCardinality, true);
answer = multiplyNonNegativeLongsWithMaxValueCap(answer, multiplier);
answer = multiplyNonNegativeLongsWithMaxValueCap(answer, partitionFieldCardinalityEstimate);
}
if (addFieldValueWorkspace) {
// The field value workspace should really be the maximum over all buckets of the
// length of all the distinct values of the function field concatenated in the bucket.
// However, that would be very expensive and complex for the caller to calculate so
// we just allow a fixed amount.
answer = addNonNegativeLongsWithMaxValueCap(answer, new ByteSizeValue(5, ByteSizeUnit.MB).getBytes());
}
return answer;
@ -180,11 +196,12 @@ public class TransportEstimateModelMemoryAction
static long calculateCategorizationRequirementBytes(AnalysisConfig analysisConfig) {
if (analysisConfig.getCategorizationFieldName() != null) {
return 1; // TODO add realistic number
} else {
if (analysisConfig.getCategorizationFieldName() == null) {
return 0;
}
// 5MB is a pretty conservative estimate of the memory requirement for categorization.
// Often it is considerably less, but it's very hard to predict from simple statistics.
return new ByteSizeValue(5, ByteSizeUnit.MB).getBytes();
}
static long cardinalityEstimate(String description, String fieldName, Map<String, Long> suppliedCardinailityEstimates,
@ -197,9 +214,8 @@ public class TransportEstimateModelMemoryAction
if (AnalysisConfig.ML_CATEGORY_FIELD.equals(fieldName)) {
return isOverall ? 500 : 50;
}
logger.warn("[{}] cardinality estimate required for [{}] [{}] but not supplied",
isOverall ? "Overall" : "Bucket max", description, fieldName);
return 0;
throw new IllegalArgumentException("[" + (isOverall ? "Overall" : "Bucket max") + "] cardinality estimate required for [" +
description + "] [" + fieldName + "] but not supplied");
}
static ByteSizeValue roundUpToNextMb(long bytes) {
@ -207,7 +223,28 @@ public class TransportEstimateModelMemoryAction
return new ByteSizeValue(addNonNegativeLongsWithMaxValueCap(bytes, BYTES_IN_MB - 1) / BYTES_IN_MB, ByteSizeUnit.MB);
}
private static long addNonNegativeLongsWithMaxValueCap(long a, long b) {
/**
* The idea here is to reduce a by or over field cardinality to reflect the likelihood that only a subset of by or
* over field values will exist in any partition in a given bucket. The principles are:
* 1. The greater the partition field cardinality, the greater the reduction
* 2. The shorter the bucket span, the greater the reduction
* A partition field cardinality of 1 means no reduction. (And remember usenull is effectively always true for partition
* fields, so there will be at least one partition even if the partition field doesn't exist in any input documents.)
* A bucket span of 15 minutes means the cardinality to be reduced is divided by approximately the square root of the
* smaller of the two cardinalities.
*/
static double reducedCardinality(long cardinalityToReduce, long partitionFieldCardinalityEstimate, long bucketSpanSeconds) {
assert cardinalityToReduce >= 0 : "negative cardinality to reduce " + cardinalityToReduce;
assert partitionFieldCardinalityEstimate > 0 : "non-positive partition field cardinality " + partitionFieldCardinalityEstimate;
assert bucketSpanSeconds > 0 : "non-positive bucket span " + bucketSpanSeconds;
if (cardinalityToReduce == 0) {
return 0;
}
double power = Math.min(1.0, (Math.log10(bucketSpanSeconds) + 1.0) / 8.0);
return cardinalityToReduce / Math.pow(Math.min(cardinalityToReduce, partitionFieldCardinalityEstimate), power);
}
static long addNonNegativeLongsWithMaxValueCap(long a, long b) {
assert a >= 0;
assert b >= 0;
if (Long.MAX_VALUE - a - b < 0) {
@ -216,9 +253,12 @@ public class TransportEstimateModelMemoryAction
return a + b;
}
private static long multiplyNonNegativeLongsWithMaxValueCap(long a, long b) {
static long multiplyNonNegativeLongsWithMaxValueCap(long a, long b) {
assert a >= 0;
assert b >= 0;
if (a == 0 || b == 0) {
return 0;
}
if (Long.MAX_VALUE / a < b) {
return Long.MAX_VALUE;
}

View File

@ -18,6 +18,8 @@ import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.lessThan;
public class TransportEstimateModelMemoryActionTests extends ESTestCase {
@ -31,20 +33,20 @@ public class TransportEstimateModelMemoryActionTests extends ESTestCase {
String function = randomFrom("mean", "min", "max", "sum");
Detector noSplit = createDetector(function, "field", null, null, null);
assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(noSplit,
overallCardinality), is(65536L));
assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(noSplit, 900,
overallCardinality), is(49152L));
Detector withByField = createDetector(function, "field", "buy", null, null);
assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByField,
overallCardinality), is(134 * 65536L));
assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByField, 900,
overallCardinality), is(134 * 49152L));
Detector withPartitionField = createDetector(function, "field", null, null, "part");
assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withPartitionField,
overallCardinality), is(100 * 65536L));
assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withPartitionField, 900,
overallCardinality), is(100 * 49152L));
Detector withByAndPartitionFields = createDetector(function, "field", "buy", null, "part");
assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByAndPartitionFields,
overallCardinality), is(134 * 100 * 65536L));
assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByAndPartitionFields, 900,
overallCardinality), is((long) Math.ceil(200 / Math.sqrt(100) * 2 / 3) * 100 * 49152L));
}
public void testCalculateInfluencerRequirementBytes() {
@ -81,7 +83,8 @@ public class TransportEstimateModelMemoryActionTests extends ESTestCase {
assertThat(TransportEstimateModelMemoryAction.calculateCategorizationRequirementBytes(analysisConfigWithoutCategorization), is(0L));
AnalysisConfig analysisConfigWithCategorization = createCountAnalysisConfig(randomAlphaOfLength(10), null);
assertThat(TransportEstimateModelMemoryAction.calculateCategorizationRequirementBytes(analysisConfigWithCategorization), is(1L));
assertThat(TransportEstimateModelMemoryAction.calculateCategorizationRequirementBytes(analysisConfigWithCategorization),
is(5L * 1024 * 1024));
}
public void testRoundUpToNextMb() {
@ -104,6 +107,55 @@ public class TransportEstimateModelMemoryActionTests extends ESTestCase {
equalTo(new ByteSizeValue(Long.MAX_VALUE / new ByteSizeValue(1, ByteSizeUnit.MB).getBytes() , ByteSizeUnit.MB)));
}
public void testReducedCardinality() {
long cardinalityToReduce = randomIntBetween(1001, Integer.MAX_VALUE);
long saneBucketSpan = randomFrom(1, 30, 60, 300, 600, 900, 1800, 3600, 10800, 21600, 43200, 86400);
assertThat(TransportEstimateModelMemoryAction.reducedCardinality(0, randomNonNegativeLong(), saneBucketSpan),
closeTo(0.0, 1e-15));
assertThat(TransportEstimateModelMemoryAction.reducedCardinality(cardinalityToReduce, 1, saneBucketSpan),
closeTo(cardinalityToReduce, 1e-6));
assertThat(TransportEstimateModelMemoryAction.reducedCardinality(cardinalityToReduce, 1000, 900),
closeTo(cardinalityToReduce / Math.sqrt(1000), cardinalityToReduce / 20.0));
assertThat(TransportEstimateModelMemoryAction.reducedCardinality(
cardinalityToReduce, randomIntBetween(2, Integer.MAX_VALUE), saneBucketSpan),
lessThan((double) cardinalityToReduce));
assertThat(TransportEstimateModelMemoryAction.reducedCardinality(cardinalityToReduce, 1000, 10000000),
closeTo(cardinalityToReduce / 1000.0, 1e-4));
}
public void testAddNonNegativeLongsWithMaxValueCap() {
assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(0, 0), is(0L));
assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(0, 1), is(1L));
assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(1, 0), is(1L));
assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(1, 1), is(2L));
assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(Long.MAX_VALUE, Long.MAX_VALUE),
is(Long.MAX_VALUE));
assertThat(TransportEstimateModelMemoryAction.addNonNegativeLongsWithMaxValueCap(
Long.MAX_VALUE - randomIntBetween(1, Integer.MAX_VALUE), Long.MAX_VALUE - randomIntBetween(1, Integer.MAX_VALUE)),
is(Long.MAX_VALUE));
}
public void testMultiplyNonNegativeLongsWithMaxValueCap() {
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(0, 0), is(0L));
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(randomNonNegativeLong(), 0), is(0L));
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(0, randomNonNegativeLong()), is(0L));
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(1, 1), is(1L));
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(Long.MAX_VALUE, Long.MAX_VALUE),
is(Long.MAX_VALUE));
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(
Long.MAX_VALUE, Math.max(1L, randomNonNegativeLong())),
is(Long.MAX_VALUE));
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(
Math.max(1L, randomNonNegativeLong()), Long.MAX_VALUE),
is(Long.MAX_VALUE));
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(0, Long.MAX_VALUE), is(0L));
assertThat(TransportEstimateModelMemoryAction.multiplyNonNegativeLongsWithMaxValueCap(Long.MAX_VALUE, 0), is(0L));
}
public static Detector createDetector(String function, String fieldName, String byFieldName,
String overFieldName, String partitionFieldName) {

View File

@ -5,14 +5,14 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}]
},
"overall_cardinality": {
"airline": 50000
}
}
- match: { model_memory_estimate: "2094mb" }
- match: { model_memory_estimate: "1573mb" }
---
"Test by field also influencer":
@ -21,7 +21,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}],
"influencers": [ "airline" ]
},
@ -32,7 +32,7 @@
"airline": 500
}
}
- match: { model_memory_estimate: "2094mb" }
- match: { model_memory_estimate: "1573mb" }
---
"Test by field with independent influencer":
@ -41,7 +41,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}],
"influencers": [ "country" ]
},
@ -52,7 +52,7 @@
"country": 500
}
}
- match: { model_memory_estimate: "2099mb" }
- match: { model_memory_estimate: "1578mb" }
---
"Test over field":
@ -61,14 +61,14 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}]
},
"overall_cardinality": {
"airline": 50000
}
}
- match: { model_memory_estimate: "35mb" }
- match: { model_memory_estimate: "47mb" }
---
"Test over field also influencer":
@ -77,7 +77,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}],
"influencers": [ "airline" ]
},
@ -88,7 +88,7 @@
"airline": 500
}
}
- match: { model_memory_estimate: "35mb" }
- match: { model_memory_estimate: "47mb" }
---
"Test over field with independent influencer":
@ -97,7 +97,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}],
"influencers": [ "country" ]
},
@ -108,7 +108,7 @@
"country": 500
}
}
- match: { model_memory_estimate: "40mb" }
- match: { model_memory_estimate: "52mb" }
---
"Test partition field":
@ -117,14 +117,14 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "partition_field_name": "airline"}]
},
"overall_cardinality": {
"airline": 50000
}
}
- match: { model_memory_estimate: "3135mb" }
- match: { model_memory_estimate: "2354mb" }
---
"Test partition field also influencer":
@ -133,7 +133,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "partition_field_name": "airline"}],
"influencers": [ "airline" ]
},
@ -144,7 +144,7 @@
"airline": 500
}
}
- match: { model_memory_estimate: "3135mb" }
- match: { model_memory_estimate: "2354mb" }
---
"Test partition field with independent influencer":
@ -153,7 +153,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "partition_field_name": "airline"}],
"influencers": [ "country" ]
},
@ -164,7 +164,7 @@
"country": 500
}
}
- match: { model_memory_estimate: "3140mb" }
- match: { model_memory_estimate: "2359mb" }
---
"Test by and partition field":
@ -173,7 +173,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline", "partition_field_name": "country"}]
},
"overall_cardinality": {
@ -181,7 +181,7 @@
"country": 600
}
}
- match: { model_memory_estimate: "100060mb" }
- match: { model_memory_estimate: "3189mb" }
---
"Test by and partition fields also influencers":
@ -190,7 +190,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline", "partition_field_name": "country"}],
"influencers": [ "airline", "country" ]
},
@ -203,7 +203,7 @@
"country": 40
}
}
- match: { model_memory_estimate: "100060mb" }
- match: { model_memory_estimate: "3189mb" }
---
"Test by and partition fields with independent influencer":
@ -212,7 +212,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline", "partition_field_name": "country"}],
"influencers": [ "src_ip" ]
},
@ -224,7 +224,7 @@
"src_ip": 500
}
}
- match: { model_memory_estimate: "100065mb" }
- match: { model_memory_estimate: "3194mb" }
---
"Test over and partition field":
@ -233,7 +233,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}]
},
"overall_cardinality": {
@ -241,7 +241,7 @@
"country": 600
}
}
- match: { model_memory_estimate: "1220mb" }
- match: { model_memory_estimate: "113mb" }
---
"Test over and partition fields also influencers":
@ -250,7 +250,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}],
"influencers": [ "airline", "country" ]
},
@ -263,7 +263,7 @@
"country": 40
}
}
- match: { model_memory_estimate: "1220mb" }
- match: { model_memory_estimate: "113mb" }
---
"Test over and partition fields with independent influencer":
@ -272,7 +272,7 @@
body: >
{
"analysis_config": {
"bucket_span": "1h",
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}],
"influencers": [ "src_ip" ]
},
@ -284,5 +284,40 @@
"src_ip": 500
}
}
- match: { model_memory_estimate: "1225mb" }
- match: { model_memory_estimate: "118mb" }
---
"Test missing overall cardinality":
- do:
catch: /\[Overall\] cardinality estimate required for \[by_field_name\] \[airline\] but not supplied/
ml.estimate_model_memory:
body: >
{
"analysis_config": {
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}]
},
"overall_cardinality": {
"wrong": 50000
}
}
---
"Test missing max bucket cardinality":
- do:
catch: /\[Bucket max\] cardinality estimate required for \[influencers\] \[country\] but not supplied/
ml.estimate_model_memory:
body: >
{
"analysis_config": {
"bucket_span": "15m",
"detectors": [{"function": "max", "field_name": "responsetime", "by_field_name": "airline"}],
"influencers": [ "country" ]
},
"overall_cardinality": {
"airline": 50000
},
"max_bucket_cardinality": {
"wrong": 500
}
}