[ML] Add new categorization stats to model_size_stats (#52009)

This change adds support for the following new model_size_stats
fields:

- categorized_doc_count
- total_category_count
- frequent_category_count
- rare_category_count
- dead_category_count
- categorization_status

Backport of #51879
This commit is contained in:
David Roberts 2020-02-10 09:10:50 +00:00 committed by GitHub
parent 9b7e688f5b
commit 1cefafdd14
11 changed files with 495 additions and 20 deletions

View File

@ -33,7 +33,7 @@ import java.util.Locale;
import java.util.Objects;
/**
* Provide access to the C++ model memory usage numbers for the Java process.
* Provide access to the C++ model size stats for the Java process.
*/
public class ModelSizeStats implements ToXContentObject {
@ -54,6 +54,12 @@ public class ModelSizeStats implements ToXContentObject {
public static final ParseField TOTAL_PARTITION_FIELD_COUNT_FIELD = new ParseField("total_partition_field_count");
public static final ParseField BUCKET_ALLOCATION_FAILURES_COUNT_FIELD = new ParseField("bucket_allocation_failures_count");
public static final ParseField MEMORY_STATUS_FIELD = new ParseField("memory_status");
public static final ParseField CATEGORIZED_DOC_COUNT_FIELD = new ParseField("categorized_doc_count");
public static final ParseField TOTAL_CATEGORY_COUNT_FIELD = new ParseField("total_category_count");
public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count");
public static final ParseField RARE_CATEGORY_COUNT_FIELD = new ParseField("rare_category_count");
public static final ParseField DEAD_CATEGORY_COUNT_FIELD = new ParseField("dead_category_count");
public static final ParseField CATEGORIZATION_STATUS_FIELD = new ParseField("categorization_status");
public static final ParseField LOG_TIME_FIELD = new ParseField("log_time");
public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp");
@ -69,6 +75,14 @@ public class ModelSizeStats implements ToXContentObject {
PARSER.declareLong(Builder::setTotalByFieldCount, TOTAL_BY_FIELD_COUNT_FIELD);
PARSER.declareLong(Builder::setTotalOverFieldCount, TOTAL_OVER_FIELD_COUNT_FIELD);
PARSER.declareLong(Builder::setTotalPartitionFieldCount, TOTAL_PARTITION_FIELD_COUNT_FIELD);
PARSER.declareField(Builder::setMemoryStatus, p -> MemoryStatus.fromString(p.text()), MEMORY_STATUS_FIELD, ValueType.STRING);
PARSER.declareLong(Builder::setCategorizedDocCount, CATEGORIZED_DOC_COUNT_FIELD);
PARSER.declareLong(Builder::setTotalCategoryCount, TOTAL_CATEGORY_COUNT_FIELD);
PARSER.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD);
PARSER.declareLong(Builder::setRareCategoryCount, RARE_CATEGORY_COUNT_FIELD);
PARSER.declareLong(Builder::setDeadCategoryCount, DEAD_CATEGORY_COUNT_FIELD);
PARSER.declareField(Builder::setCategorizationStatus,
p -> CategorizationStatus.fromString(p.text()), CATEGORIZATION_STATUS_FIELD, ValueType.STRING);
PARSER.declareField(Builder::setLogTime,
(p) -> TimeUtil.parseTimeField(p, LOG_TIME_FIELD.getPreferredName()),
LOG_TIME_FIELD,
@ -77,7 +91,6 @@ public class ModelSizeStats implements ToXContentObject {
(p) -> TimeUtil.parseTimeField(p, TIMESTAMP_FIELD.getPreferredName()),
TIMESTAMP_FIELD,
ValueType.VALUE);
PARSER.declareField(Builder::setMemoryStatus, p -> MemoryStatus.fromString(p.text()), MEMORY_STATUS_FIELD, ValueType.STRING);
}
/**
@ -99,6 +112,23 @@ public class ModelSizeStats implements ToXContentObject {
}
}
/**
* The status of categorization for a job. OK is default, WARN
* means that inappropriate numbers of categories are being found
*/
public enum CategorizationStatus {
OK, WARN;
public static CategorizationStatus fromString(String statusName) {
return valueOf(statusName.trim().toUpperCase(Locale.ROOT));
}
@Override
public String toString() {
return name().toLowerCase(Locale.ROOT);
}
}
private final String jobId;
private final long modelBytes;
private final Long modelBytesExceeded;
@ -108,12 +138,20 @@ public class ModelSizeStats implements ToXContentObject {
private final long totalPartitionFieldCount;
private final long bucketAllocationFailuresCount;
private final MemoryStatus memoryStatus;
private final long categorizedDocCount;
private final long totalCategoryCount;
private final long frequentCategoryCount;
private final long rareCategoryCount;
private final long deadCategoryCount;
private final CategorizationStatus categorizationStatus;
private final Date timestamp;
private final Date logTime;
private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, Long modelBytesMemoryLimit, long totalByFieldCount,
long totalOverFieldCount, long totalPartitionFieldCount, long bucketAllocationFailuresCount,
MemoryStatus memoryStatus, Date timestamp, Date logTime) {
MemoryStatus memoryStatus, long categorizedDocCount, long totalCategoryCount, long frequentCategoryCount,
long rareCategoryCount, long deadCategoryCount, CategorizationStatus categorizationStatus,
Date timestamp, Date logTime) {
this.jobId = jobId;
this.modelBytes = modelBytes;
this.modelBytesExceeded = modelBytesExceeded;
@ -123,6 +161,12 @@ public class ModelSizeStats implements ToXContentObject {
this.totalPartitionFieldCount = totalPartitionFieldCount;
this.bucketAllocationFailuresCount = bucketAllocationFailuresCount;
this.memoryStatus = memoryStatus;
this.categorizedDocCount = categorizedDocCount;
this.totalCategoryCount = totalCategoryCount;
this.frequentCategoryCount = frequentCategoryCount;
this.rareCategoryCount = rareCategoryCount;
this.deadCategoryCount = deadCategoryCount;
this.categorizationStatus = categorizationStatus;
this.timestamp = timestamp;
this.logTime = logTime;
}
@ -145,6 +189,12 @@ public class ModelSizeStats implements ToXContentObject {
builder.field(TOTAL_PARTITION_FIELD_COUNT_FIELD.getPreferredName(), totalPartitionFieldCount);
builder.field(BUCKET_ALLOCATION_FAILURES_COUNT_FIELD.getPreferredName(), bucketAllocationFailuresCount);
builder.field(MEMORY_STATUS_FIELD.getPreferredName(), memoryStatus);
builder.field(CATEGORIZED_DOC_COUNT_FIELD.getPreferredName(), categorizedDocCount);
builder.field(TOTAL_CATEGORY_COUNT_FIELD.getPreferredName(), totalCategoryCount);
builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount);
builder.field(RARE_CATEGORY_COUNT_FIELD.getPreferredName(), rareCategoryCount);
builder.field(DEAD_CATEGORY_COUNT_FIELD.getPreferredName(), deadCategoryCount);
builder.field(CATEGORIZATION_STATUS_FIELD.getPreferredName(), categorizationStatus);
builder.timeField(LOG_TIME_FIELD.getPreferredName(), LOG_TIME_FIELD.getPreferredName() + "_string", logTime.getTime());
if (timestamp != null) {
builder.timeField(TIMESTAMP_FIELD.getPreferredName(), TIMESTAMP_FIELD.getPreferredName() + "_string", timestamp.getTime());
@ -190,6 +240,30 @@ public class ModelSizeStats implements ToXContentObject {
return memoryStatus;
}
public long getCategorizedDocCount() {
return categorizedDocCount;
}
public long getTotalCategoryCount() {
return totalCategoryCount;
}
public long getFrequentCategoryCount() {
return frequentCategoryCount;
}
public long getRareCategoryCount() {
return rareCategoryCount;
}
public long getDeadCategoryCount() {
return deadCategoryCount;
}
public CategorizationStatus getCategorizationStatus() {
return categorizationStatus;
}
/**
* The timestamp of the last processed record when this instance was created.
*
@ -211,7 +285,8 @@ public class ModelSizeStats implements ToXContentObject {
@Override
public int hashCode() {
return Objects.hash(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount,
totalPartitionFieldCount, this.bucketAllocationFailuresCount, memoryStatus, timestamp, logTime);
totalPartitionFieldCount, this.bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount,
frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime);
}
/**
@ -233,7 +308,14 @@ public class ModelSizeStats implements ToXContentObject {
&& Objects.equals(this.modelBytesMemoryLimit, that.modelBytesMemoryLimit) && this.totalByFieldCount == that.totalByFieldCount
&& this.totalOverFieldCount == that.totalOverFieldCount && this.totalPartitionFieldCount == that.totalPartitionFieldCount
&& this.bucketAllocationFailuresCount == that.bucketAllocationFailuresCount
&& Objects.equals(this.memoryStatus, that.memoryStatus) && Objects.equals(this.timestamp, that.timestamp)
&& Objects.equals(this.memoryStatus, that.memoryStatus)
&& this.categorizedDocCount == that.categorizedDocCount
&& this.totalCategoryCount == that.totalCategoryCount
&& this.frequentCategoryCount == that.frequentCategoryCount
&& this.rareCategoryCount == that.rareCategoryCount
&& this.deadCategoryCount == that.deadCategoryCount
&& Objects.equals(this.categorizationStatus, that.categorizationStatus)
&& Objects.equals(this.timestamp, that.timestamp)
&& Objects.equals(this.logTime, that.logTime)
&& Objects.equals(this.jobId, that.jobId);
}
@ -249,12 +331,19 @@ public class ModelSizeStats implements ToXContentObject {
private long totalPartitionFieldCount;
private long bucketAllocationFailuresCount;
private MemoryStatus memoryStatus;
private long categorizedDocCount;
private long totalCategoryCount;
private long frequentCategoryCount;
private long rareCategoryCount;
private long deadCategoryCount;
private CategorizationStatus categorizationStatus;
private Date timestamp;
private Date logTime;
public Builder(String jobId) {
this.jobId = jobId;
memoryStatus = MemoryStatus.OK;
categorizationStatus = CategorizationStatus.OK;
logTime = new Date();
}
@ -268,6 +357,12 @@ public class ModelSizeStats implements ToXContentObject {
this.totalPartitionFieldCount = modelSizeStats.totalPartitionFieldCount;
this.bucketAllocationFailuresCount = modelSizeStats.bucketAllocationFailuresCount;
this.memoryStatus = modelSizeStats.memoryStatus;
this.categorizedDocCount = modelSizeStats.categorizedDocCount;
this.totalCategoryCount = modelSizeStats.totalCategoryCount;
this.frequentCategoryCount = modelSizeStats.frequentCategoryCount;
this.rareCategoryCount = modelSizeStats.rareCategoryCount;
this.deadCategoryCount = modelSizeStats.deadCategoryCount;
this.categorizationStatus = modelSizeStats.categorizationStatus;
this.timestamp = modelSizeStats.timestamp;
this.logTime = modelSizeStats.logTime;
}
@ -313,6 +408,37 @@ public class ModelSizeStats implements ToXContentObject {
return this;
}
public Builder setCategorizedDocCount(long categorizedDocCount) {
this.categorizedDocCount = categorizedDocCount;
return this;
}
public Builder setTotalCategoryCount(long totalCategoryCount) {
this.totalCategoryCount = totalCategoryCount;
return this;
}
public Builder setFrequentCategoryCount(long frequentCategoryCount) {
this.frequentCategoryCount = frequentCategoryCount;
return this;
}
public Builder setRareCategoryCount(long rareCategoryCount) {
this.rareCategoryCount = rareCategoryCount;
return this;
}
public Builder setDeadCategoryCount(long deadCategoryCount) {
this.deadCategoryCount = deadCategoryCount;
return this;
}
public Builder setCategorizationStatus(CategorizationStatus categorizationStatus) {
Objects.requireNonNull(categorizationStatus, "[" + CATEGORIZATION_STATUS_FIELD.getPreferredName() + "] must not be null");
this.categorizationStatus = categorizationStatus;
return this;
}
public Builder setTimestamp(Date timestamp) {
this.timestamp = timestamp;
return this;
@ -325,7 +451,8 @@ public class ModelSizeStats implements ToXContentObject {
public ModelSizeStats build() {
return new ModelSizeStats(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount,
totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, timestamp, logTime);
totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount,
frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime);
}
}
}

View File

@ -24,6 +24,7 @@ import org.elasticsearch.test.AbstractXContentTestCase;
import java.util.Date;
import static org.elasticsearch.client.ml.job.process.ModelSizeStats.CategorizationStatus;
import static org.elasticsearch.client.ml.job.process.ModelSizeStats.MemoryStatus;
public class ModelSizeStatsTests extends AbstractXContentTestCase<ModelSizeStats> {
@ -38,6 +39,12 @@ public class ModelSizeStatsTests extends AbstractXContentTestCase<ModelSizeStats
assertEquals(0, stats.getTotalPartitionFieldCount());
assertEquals(0, stats.getBucketAllocationFailuresCount());
assertEquals(MemoryStatus.OK, stats.getMemoryStatus());
assertEquals(0, stats.getCategorizedDocCount());
assertEquals(0, stats.getTotalCategoryCount());
assertEquals(0, stats.getFrequentCategoryCount());
assertEquals(0, stats.getRareCategoryCount());
assertEquals(0, stats.getDeadCategoryCount());
assertEquals(CategorizationStatus.OK, stats.getCategorizationStatus());
}
public void testSetMemoryStatus_GivenNull() {
@ -84,15 +91,33 @@ public class ModelSizeStatsTests extends AbstractXContentTestCase<ModelSizeStats
if (randomBoolean()) {
stats.setTotalPartitionFieldCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setMemoryStatus(randomFrom(MemoryStatus.values()));
}
if (randomBoolean()) {
stats.setCategorizedDocCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setTotalCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setFrequentCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setRareCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setDeadCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setCategorizationStatus(randomFrom(CategorizationStatus.values()));
}
if (randomBoolean()) {
stats.setLogTime(new Date(TimeValue.parseTimeValue(randomTimeValue(), "test").millis()));
}
if (randomBoolean()) {
stats.setTimestamp(new Date(TimeValue.parseTimeValue(randomTimeValue(), "test").millis()));
}
if (randomBoolean()) {
stats.setMemoryStatus(randomFrom(MemoryStatus.values()));
}
return stats.build();
}

View File

@ -202,6 +202,35 @@ model. It has the following properties:
processed due to insufficient model memory. This situation is also signified
by a `hard_limit: memory_status` property value.
`model_size_stats`.`categorized_doc_count`:::
(long) The number of documents that have had a field categorized.
`model_size_stats`.`categorization_status`:::
(string) The status of categorization for this job.
Contains one of the following values.
+
--
* `ok`: Categorization is performing acceptably well (or not being
used at all).
* `warn`: Categorization is detecting a distribution of categories
that suggests the input data is inappropriate for categorization.
Problems could be that there is only one category, more than 90% of
categories are rare, the number of categories is greater than 50% of
the number of categorized documents, there are no frequently
matched categories, or more than 50% of categories are dead.
--
`model_size_stats`.`dead_category_count`:::
(long) The number of categories created by categorization that will
never be assigned again because another category's definition
makes it a superset of the dead category. (Dead categories are a
side effect of the way categorization has no prior training.)
`model_size_stats`.`frequent_category_count`:::
(long) The number of categories that match more than 1% of categorized
documents.
`model_size_stats`.`job_id`:::
(string)
include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
@ -233,6 +262,9 @@ this value indicates the latest size.
`model_size_stats`.`model_bytes_memory_limit`:::
(long) The upper limit for memory usage, checked on increasing values.
`model_size_stats`.`rare_category_count`:::
(long) The number of categories that match just one categorized document.
`model_size_stats`.`result_type`:::
(string) For internal use. The type of result.
@ -240,6 +272,9 @@ this value indicates the latest size.
(long) The number of `by` field values that were analyzed by the models. This
value is cumulative for all detectors.
`model_size_stats`.`total_category_count`:::
(long) The number of categories created by categorization.
`model_size_stats`.`total_over_field_count`:::
(long) The number of `over` field values that were analyzed by the models. This
value is cumulative for all detectors.
@ -376,6 +411,12 @@ The API returns the following results:
"total_partition_field_count" : 2,
"bucket_allocation_failures_count" : 0,
"memory_status" : "ok",
"categorized_doc_count" : 0,
"total_category_count" : 0,
"frequent_category_count" : 0,
"rare_category_count" : 0,
"dead_category_count" : 0,
"categorization_status" : "ok",
"log_time" : 1576017596000,
"timestamp" : 1580410800000
},

View File

@ -89,6 +89,35 @@ the snapshot was created for.
(long) The number of buckets for which entities were not processed due to
memory limit constraints.
`model_size_stats`.`categorized_doc_count`:::
(long) The number of documents that have had a field categorized.
`model_size_stats`.`categorization_status`:::
(string) The status of categorization for this job.
Contains one of the following values.
+
--
* `ok`: Categorization is performing acceptably well (or not being
used at all).
* `warn`: Categorization is detecting a distribution of categories
that suggests the input data is inappropriate for categorization.
Problems could be that there is only one category, more than 90% of
categories are rare, the number of categories is greater than 50% of
the number of categorized documents, there are no frequently
matched categories, or more than 50% of categories are dead.
--
`model_size_stats`.`dead_category_count`:::
(long) The number of categories created by categorization that will
never be assigned again because another category's definition
makes it a superset of the dead category. (Dead categories are a
side effect of the way categorization has no prior training.)
`model_size_stats`.`frequent_category_count`:::
(long) The number of categories that match more than 1% of categorized
documents.
`model_size_stats`.`job_id`:::
(string)
include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
@ -119,6 +148,9 @@ reclaim space.
`model_size_stats`.`model_bytes_memory_limit`:::
(long) The upper limit for memory usage, checked on increasing values.
`model_size_stats`.`rare_category_count`:::
(long) The number of categories that match just one categorized document.
`model_size_stats`.`result_type`:::
(string) Internal. This value is always set to `model_size_stats`.
@ -130,6 +162,9 @@ the bucket timestamp of the data.
(long) The number of _by_ field values analyzed. Note that these are counted
separately for each detector and partition.
`model_size_stats`.`total_category_count`:::
(long) The number of categories created by categorization.
`model_size_stats`.`total_over_field_count`:::
(long) The number of _over_ field values analyzed. Note that these are counted
separately for each detector and partition.
@ -187,6 +222,12 @@ In this example, the API provides a single result:
"total_partition_field_count" : 2,
"bucket_allocation_failures_count" : 0,
"memory_status" : "ok",
"categorized_doc_count" : 0,
"total_category_count" : 0,
"frequent_category_count" : 0,
"rare_category_count" : 0,
"dead_category_count" : 0,
"categorization_status" : "ok",
"log_time" : 1575402237000,
"timestamp" : 1576965600000
},

View File

@ -89,6 +89,12 @@ When the operation is complete, you receive the following results:
"total_partition_field_count" : 2,
"bucket_allocation_failures_count" : 0,
"memory_status" : "ok",
"categorized_doc_count" : 0,
"total_category_count" : 0,
"frequent_category_count" : 0,
"rare_category_count" : 0,
"dead_category_count" : 0,
"categorization_status" : "ok",
"log_time" : 1575402237000,
"timestamp" : 1576965600000
},

View File

@ -22,10 +22,9 @@ import java.io.IOException;
import java.util.Date;
import java.util.Locale;
import java.util.Objects;
import java.util.function.BiConsumer;
/**
* Provide access to the C++ model memory usage numbers for the Java process.
* Provide access to the C++ model size stats for the Java process.
*/
public class ModelSizeStats implements ToXContentObject, Writeable {
@ -46,6 +45,12 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
public static final ParseField TOTAL_PARTITION_FIELD_COUNT_FIELD = new ParseField("total_partition_field_count");
public static final ParseField BUCKET_ALLOCATION_FAILURES_COUNT_FIELD = new ParseField("bucket_allocation_failures_count");
public static final ParseField MEMORY_STATUS_FIELD = new ParseField("memory_status");
public static final ParseField CATEGORIZED_DOC_COUNT_FIELD = new ParseField("categorized_doc_count");
public static final ParseField TOTAL_CATEGORY_COUNT_FIELD = new ParseField("total_category_count");
public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count");
public static final ParseField RARE_CATEGORY_COUNT_FIELD = new ParseField("rare_category_count");
public static final ParseField DEAD_CATEGORY_COUNT_FIELD = new ParseField("dead_category_count");
public static final ParseField CATEGORIZATION_STATUS_FIELD = new ParseField("categorization_status");
public static final ParseField LOG_TIME_FIELD = new ParseField("log_time");
public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp");
@ -65,12 +70,18 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
parser.declareLong(Builder::setTotalByFieldCount, TOTAL_BY_FIELD_COUNT_FIELD);
parser.declareLong(Builder::setTotalOverFieldCount, TOTAL_OVER_FIELD_COUNT_FIELD);
parser.declareLong(Builder::setTotalPartitionFieldCount, TOTAL_PARTITION_FIELD_COUNT_FIELD);
parser.declareField(Builder::setMemoryStatus, p -> MemoryStatus.fromString(p.text()), MEMORY_STATUS_FIELD, ValueType.STRING);
parser.declareLong(Builder::setCategorizedDocCount, CATEGORIZED_DOC_COUNT_FIELD);
parser.declareLong(Builder::setTotalCategoryCount, TOTAL_CATEGORY_COUNT_FIELD);
parser.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD);
parser.declareLong(Builder::setRareCategoryCount, RARE_CATEGORY_COUNT_FIELD);
parser.declareLong(Builder::setDeadCategoryCount, DEAD_CATEGORY_COUNT_FIELD);
parser.declareField(Builder::setCategorizationStatus,
p -> CategorizationStatus.fromString(p.text()), CATEGORIZATION_STATUS_FIELD, ValueType.STRING);
parser.declareField(Builder::setLogTime,
p -> TimeUtils.parseTimeField(p, LOG_TIME_FIELD.getPreferredName()), LOG_TIME_FIELD, ValueType.VALUE);
parser.declareField(Builder::setTimestamp,
p -> TimeUtils.parseTimeField(p, TIMESTAMP_FIELD.getPreferredName()), TIMESTAMP_FIELD, ValueType.VALUE);
BiConsumer<Builder, MemoryStatus> setMemoryStatus = Builder::setMemoryStatus;
parser.declareField(setMemoryStatus, p -> MemoryStatus.fromString(p.text()), MEMORY_STATUS_FIELD, ValueType.STRING);
return parser;
}
@ -103,6 +114,32 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
}
}
/**
* The status of categorization for a job. OK is default, WARN
* means that inappropriate numbers of categories are being found
*/
public enum CategorizationStatus implements Writeable {
OK, WARN;
public static CategorizationStatus fromString(String statusName) {
return valueOf(statusName.trim().toUpperCase(Locale.ROOT));
}
public static CategorizationStatus readFromStream(StreamInput in) throws IOException {
return in.readEnum(CategorizationStatus.class);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeEnum(this);
}
@Override
public String toString() {
return name().toLowerCase(Locale.ROOT);
}
}
private final String jobId;
private final long modelBytes;
private final Long modelBytesExceeded;
@ -112,12 +149,19 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
private final long totalPartitionFieldCount;
private final long bucketAllocationFailuresCount;
private final MemoryStatus memoryStatus;
private final long categorizedDocCount;
private final long totalCategoryCount;
private final long frequentCategoryCount;
private final long rareCategoryCount;
private final long deadCategoryCount;
private final CategorizationStatus categorizationStatus;
private final Date timestamp;
private final Date logTime;
private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, Long modelBytesMemoryLimit, long totalByFieldCount,
long totalOverFieldCount, long totalPartitionFieldCount, long bucketAllocationFailuresCount,
MemoryStatus memoryStatus,
MemoryStatus memoryStatus, long categorizedDocCount, long totalCategoryCount, long frequentCategoryCount,
long rareCategoryCount, long deadCategoryCount, CategorizationStatus categorizationStatus,
Date timestamp, Date logTime) {
this.jobId = jobId;
this.modelBytes = modelBytes;
@ -128,6 +172,12 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
this.totalPartitionFieldCount = totalPartitionFieldCount;
this.bucketAllocationFailuresCount = bucketAllocationFailuresCount;
this.memoryStatus = memoryStatus;
this.categorizedDocCount = categorizedDocCount;
this.totalCategoryCount = totalCategoryCount;
this.frequentCategoryCount = frequentCategoryCount;
this.rareCategoryCount = rareCategoryCount;
this.deadCategoryCount = deadCategoryCount;
this.categorizationStatus = categorizationStatus;
this.timestamp = timestamp;
this.logTime = logTime;
}
@ -150,6 +200,21 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
totalPartitionFieldCount = in.readVLong();
bucketAllocationFailuresCount = in.readVLong();
memoryStatus = MemoryStatus.readFromStream(in);
if (in.getVersion().onOrAfter(Version.V_7_7_0)) {
categorizedDocCount = in.readVLong();
totalCategoryCount = in.readVLong();
frequentCategoryCount = in.readVLong();
rareCategoryCount = in.readVLong();
deadCategoryCount = in.readVLong();
categorizationStatus = CategorizationStatus.readFromStream(in);
} else {
categorizedDocCount = 0;
totalCategoryCount = 0;
frequentCategoryCount = 0;
rareCategoryCount = 0;
deadCategoryCount = 0;
categorizationStatus = CategorizationStatus.OK;
}
logTime = new Date(in.readVLong());
timestamp = in.readBoolean() ? new Date(in.readVLong()) : null;
}
@ -177,6 +242,14 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
out.writeVLong(totalPartitionFieldCount);
out.writeVLong(bucketAllocationFailuresCount);
memoryStatus.writeTo(out);
if (out.getVersion().onOrAfter(Version.V_7_7_0)) {
out.writeVLong(categorizedDocCount);
out.writeVLong(totalCategoryCount);
out.writeVLong(frequentCategoryCount);
out.writeVLong(rareCategoryCount);
out.writeVLong(deadCategoryCount);
categorizationStatus.writeTo(out);
}
out.writeVLong(logTime.getTime());
boolean hasTimestamp = timestamp != null;
out.writeBoolean(hasTimestamp);
@ -208,6 +281,12 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
builder.field(TOTAL_PARTITION_FIELD_COUNT_FIELD.getPreferredName(), totalPartitionFieldCount);
builder.field(BUCKET_ALLOCATION_FAILURES_COUNT_FIELD.getPreferredName(), bucketAllocationFailuresCount);
builder.field(MEMORY_STATUS_FIELD.getPreferredName(), memoryStatus);
builder.field(CATEGORIZED_DOC_COUNT_FIELD.getPreferredName(), categorizedDocCount);
builder.field(TOTAL_CATEGORY_COUNT_FIELD.getPreferredName(), totalCategoryCount);
builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount);
builder.field(RARE_CATEGORY_COUNT_FIELD.getPreferredName(), rareCategoryCount);
builder.field(DEAD_CATEGORY_COUNT_FIELD.getPreferredName(), deadCategoryCount);
builder.field(CATEGORIZATION_STATUS_FIELD.getPreferredName(), categorizationStatus);
builder.timeField(LOG_TIME_FIELD.getPreferredName(), LOG_TIME_FIELD.getPreferredName() + "_string", logTime.getTime());
if (timestamp != null) {
builder.timeField(TIMESTAMP_FIELD.getPreferredName(), TIMESTAMP_FIELD.getPreferredName() + "_string", timestamp.getTime());
@ -252,6 +331,30 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
return memoryStatus;
}
public long getCategorizedDocCount() {
return categorizedDocCount;
}
public long getTotalCategoryCount() {
return totalCategoryCount;
}
public long getFrequentCategoryCount() {
return frequentCategoryCount;
}
public long getRareCategoryCount() {
return rareCategoryCount;
}
public long getDeadCategoryCount() {
return deadCategoryCount;
}
public CategorizationStatus getCategorizationStatus() {
return categorizationStatus;
}
/**
* The timestamp of the last processed record when this instance was created.
* @return The record time
@ -272,7 +375,8 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
public int hashCode() {
// this.id excluded here as it is generated by the datastore
return Objects.hash(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount,
totalPartitionFieldCount, this.bucketAllocationFailuresCount, memoryStatus, timestamp, logTime);
totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount,
frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime);
}
/**
@ -295,7 +399,14 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
&& this.totalByFieldCount == that.totalByFieldCount
&& this.totalOverFieldCount == that.totalOverFieldCount && this.totalPartitionFieldCount == that.totalPartitionFieldCount
&& this.bucketAllocationFailuresCount == that.bucketAllocationFailuresCount
&& Objects.equals(this.memoryStatus, that.memoryStatus) && Objects.equals(this.timestamp, that.timestamp)
&& Objects.equals(this.memoryStatus, that.memoryStatus)
&& Objects.equals(this.categorizedDocCount, that.categorizedDocCount)
&& Objects.equals(this.totalCategoryCount, that.totalCategoryCount)
&& Objects.equals(this.frequentCategoryCount, that.frequentCategoryCount)
&& Objects.equals(this.rareCategoryCount, that.rareCategoryCount)
&& Objects.equals(this.deadCategoryCount, that.deadCategoryCount)
&& Objects.equals(this.categorizationStatus, that.categorizationStatus)
&& Objects.equals(this.timestamp, that.timestamp)
&& Objects.equals(this.logTime, that.logTime)
&& Objects.equals(this.jobId, that.jobId);
}
@ -311,12 +422,19 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
private long totalPartitionFieldCount;
private long bucketAllocationFailuresCount;
private MemoryStatus memoryStatus;
private long categorizedDocCount;
private long totalCategoryCount;
private long frequentCategoryCount;
private long rareCategoryCount;
private long deadCategoryCount;
private CategorizationStatus categorizationStatus;
private Date timestamp;
private Date logTime;
public Builder(String jobId) {
this.jobId = jobId;
memoryStatus = MemoryStatus.OK;
categorizationStatus = CategorizationStatus.OK;
logTime = new Date();
}
@ -330,6 +448,12 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
this.totalPartitionFieldCount = modelSizeStats.totalPartitionFieldCount;
this.bucketAllocationFailuresCount = modelSizeStats.bucketAllocationFailuresCount;
this.memoryStatus = modelSizeStats.memoryStatus;
this.categorizedDocCount = modelSizeStats.categorizedDocCount;
this.totalCategoryCount = modelSizeStats.totalCategoryCount;
this.frequentCategoryCount = modelSizeStats.frequentCategoryCount;
this.rareCategoryCount = modelSizeStats.rareCategoryCount;
this.deadCategoryCount = modelSizeStats.deadCategoryCount;
this.categorizationStatus = modelSizeStats.categorizationStatus;
this.timestamp = modelSizeStats.timestamp;
this.logTime = modelSizeStats.logTime;
}
@ -375,6 +499,37 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
return this;
}
public Builder setCategorizedDocCount(long categorizedDocCount) {
this.categorizedDocCount = categorizedDocCount;
return this;
}
public Builder setTotalCategoryCount(long totalCategoryCount) {
this.totalCategoryCount = totalCategoryCount;
return this;
}
public Builder setFrequentCategoryCount(long frequentCategoryCount) {
this.frequentCategoryCount = frequentCategoryCount;
return this;
}
public Builder setRareCategoryCount(long rareCategoryCount) {
this.rareCategoryCount = rareCategoryCount;
return this;
}
public Builder setDeadCategoryCount(long deadCategoryCount) {
this.deadCategoryCount = deadCategoryCount;
return this;
}
public Builder setCategorizationStatus(CategorizationStatus categorizationStatus) {
Objects.requireNonNull(categorizationStatus, "[" + CATEGORIZATION_STATUS_FIELD.getPreferredName() + "] must not be null");
this.categorizationStatus = categorizationStatus;
return this;
}
public Builder setTimestamp(Date timestamp) {
this.timestamp = timestamp;
return this;
@ -387,7 +542,8 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
public ModelSizeStats build() {
return new ModelSizeStats(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount,
totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, timestamp, logTime);
totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount,
frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime);
}
}
}

View File

@ -10,6 +10,7 @@ import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.test.AbstractSerializingTestCase;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats.CategorizationStatus;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats.MemoryStatus;
import java.io.IOException;
@ -22,13 +23,19 @@ public class ModelSizeStatsTests extends AbstractSerializingTestCase<ModelSizeSt
public void testDefaultConstructor() {
ModelSizeStats stats = new ModelSizeStats.Builder("foo").build();
assertEquals(0, stats.getModelBytes());
assertEquals(null, stats.getModelBytesExceeded());
assertEquals(null, stats.getModelBytesMemoryLimit());
assertNull(stats.getModelBytesExceeded());
assertNull(stats.getModelBytesMemoryLimit());
assertEquals(0, stats.getTotalByFieldCount());
assertEquals(0, stats.getTotalOverFieldCount());
assertEquals(0, stats.getTotalPartitionFieldCount());
assertEquals(0, stats.getBucketAllocationFailuresCount());
assertEquals(MemoryStatus.OK, stats.getMemoryStatus());
assertEquals(0, stats.getCategorizedDocCount());
assertEquals(0, stats.getTotalCategoryCount());
assertEquals(0, stats.getFrequentCategoryCount());
assertEquals(0, stats.getRareCategoryCount());
assertEquals(0, stats.getDeadCategoryCount());
assertEquals(CategorizationStatus.OK, stats.getCategorizationStatus());
}
public void testSetMemoryStatus_GivenNull() {
@ -84,6 +91,24 @@ public class ModelSizeStatsTests extends AbstractSerializingTestCase<ModelSizeSt
if (randomBoolean()) {
stats.setMemoryStatus(randomFrom(MemoryStatus.values()));
}
if (randomBoolean()) {
stats.setCategorizedDocCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setTotalCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setFrequentCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setRareCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setDeadCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setCategorizationStatus(randomFrom(CategorizationStatus.values()));
}
return stats.build();
}

View File

@ -190,6 +190,31 @@ public class RestCatJobsAction extends AbstractCatAction {
TableColumnAttributeBuilder.builder("number of bucket allocation failures", false)
.setAliases("mbaf", "modelBucketAllocationFailures")
.build());
table.addCell("model.categorization_status",
TableColumnAttributeBuilder.builder("current categorization status", false)
.setAliases("mcs", "modelCategorizationStatus")
.setTextAlignment(TableColumnAttributeBuilder.TextAlign.RIGHT)
.build());
table.addCell("model.categorized_doc_count",
TableColumnAttributeBuilder.builder("count of categorized documents", false)
.setAliases("mcdc", "modelCategorizedDocCount")
.build());
table.addCell("model.total_category_count",
TableColumnAttributeBuilder.builder("count of categories", false)
.setAliases("mtcc", "modelTotalCategoryCount")
.build());
table.addCell("model.frequent_category_count",
TableColumnAttributeBuilder.builder("count of frequent categories", false)
.setAliases("mfcc", "modelFrequentCategoryCount")
.build());
table.addCell("model.rare_category_count",
TableColumnAttributeBuilder.builder("count of rare categories", false)
.setAliases("mrcc", "modelRareCategoryCount")
.build());
table.addCell("model.dead_category_count",
TableColumnAttributeBuilder.builder("count of dead categories", false)
.setAliases("mdcc", "modelDeadCategoryCount")
.build());
table.addCell("model.log_time",
TableColumnAttributeBuilder.builder("when the model stats were gathered", false)
.setAliases("mlt", "modelLogTime")
@ -325,7 +350,6 @@ public class RestCatJobsAction extends AbstractCatAction {
table.addCell(dataCounts.getLatestEmptyBucketTimeStamp());
table.addCell(dataCounts.getLatestSparseBucketTimeStamp());
ModelSizeStats modelSizeStats = job.getModelSizeStats();
table.addCell(modelSizeStats == null ? null : new ByteSizeValue(modelSizeStats.getModelBytes()));
table.addCell(modelSizeStats == null ? null : modelSizeStats.getMemoryStatus().toString());
@ -339,6 +363,12 @@ public class RestCatJobsAction extends AbstractCatAction {
table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalOverFieldCount());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalPartitionFieldCount());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getBucketAllocationFailuresCount());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getCategorizationStatus().toString());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getCategorizedDocCount());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalCategoryCount());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getFrequentCategoryCount());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getRareCategoryCount());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getDeadCategoryCount());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getLogTime());
table.addCell(modelSizeStats == null ? null : modelSizeStats.getTimestamp());

View File

@ -98,6 +98,12 @@ public class JobStatsMonitoringDocTests extends BaseMonitoringDocTestCase<JobSta
.setTotalPartitionFieldCount(103L)
.setBucketAllocationFailuresCount(104L)
.setMemoryStatus(ModelSizeStats.MemoryStatus.OK)
.setCategorizedDocCount(42)
.setTotalCategoryCount(8)
.setFrequentCategoryCount(4)
.setRareCategoryCount(2)
.setDeadCategoryCount(1)
.setCategorizationStatus(ModelSizeStats.CategorizationStatus.WARN)
.setTimestamp(date1)
.setLogTime(date2)
.build();
@ -156,6 +162,12 @@ public class JobStatsMonitoringDocTests extends BaseMonitoringDocTestCase<JobSta
+ "\"total_partition_field_count\":103,"
+ "\"bucket_allocation_failures_count\":104,"
+ "\"memory_status\":\"ok\","
+ "\"categorized_doc_count\":42,"
+ "\"total_category_count\":8,"
+ "\"frequent_category_count\":4,"
+ "\"rare_category_count\":2,"
+ "\"dead_category_count\":1,"
+ "\"categorization_status\":\"warn\","
+ "\"log_time\":1483315322002,"
+ "\"timestamp\":1483228861001"
+ "},"

View File

@ -98,6 +98,12 @@ setup:
"total_partition_field_count" : 0,
"bucket_allocation_failures_count" : 0,
"memory_status" : "ok",
"categorized_doc_count" : 0,
"total_category_count" : 0,
"frequent_category_count" : 0,
"rare_category_count" : 0,
"dead_category_count" : 0,
"categorization_status" : "ok",
"log_time" : 1495808248662,
"timestamp" : 1495808248662
},

View File

@ -292,6 +292,12 @@ setup:
total_partition_field_count : 0,
bucket_allocation_failures_count : 0,
memory_status : ok,
categorized_doc_count : 0,
total_category_count : 0,
frequent_category_count : 0,
rare_category_count : 0,
dead_category_count : 0,
categorization_status : ok,
log_time : 1495808248662
}