diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/job/process/ModelSizeStats.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/job/process/ModelSizeStats.java index 5f7a1e2988560..822ac0208b0ef 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/job/process/ModelSizeStats.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/job/process/ModelSizeStats.java @@ -33,7 +33,7 @@ import java.util.Objects; /** - * Provide access to the C++ model memory usage numbers for the Java process. + * Provide access to the C++ model size stats for the Java process. */ public class ModelSizeStats implements ToXContentObject { @@ -54,6 +54,12 @@ public class ModelSizeStats implements ToXContentObject { public static final ParseField TOTAL_PARTITION_FIELD_COUNT_FIELD = new ParseField("total_partition_field_count"); public static final ParseField BUCKET_ALLOCATION_FAILURES_COUNT_FIELD = new ParseField("bucket_allocation_failures_count"); public static final ParseField MEMORY_STATUS_FIELD = new ParseField("memory_status"); + public static final ParseField CATEGORIZED_DOC_COUNT_FIELD = new ParseField("categorized_doc_count"); + public static final ParseField TOTAL_CATEGORY_COUNT_FIELD = new ParseField("total_category_count"); + public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count"); + public static final ParseField RARE_CATEGORY_COUNT_FIELD = new ParseField("rare_category_count"); + public static final ParseField DEAD_CATEGORY_COUNT_FIELD = new ParseField("dead_category_count"); + public static final ParseField CATEGORIZATION_STATUS_FIELD = new ParseField("categorization_status"); public static final ParseField LOG_TIME_FIELD = new ParseField("log_time"); public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp"); @@ -69,6 +75,14 @@ public class ModelSizeStats implements ToXContentObject { PARSER.declareLong(Builder::setTotalByFieldCount, TOTAL_BY_FIELD_COUNT_FIELD); PARSER.declareLong(Builder::setTotalOverFieldCount, TOTAL_OVER_FIELD_COUNT_FIELD); PARSER.declareLong(Builder::setTotalPartitionFieldCount, TOTAL_PARTITION_FIELD_COUNT_FIELD); + PARSER.declareField(Builder::setMemoryStatus, p -> MemoryStatus.fromString(p.text()), MEMORY_STATUS_FIELD, ValueType.STRING); + PARSER.declareLong(Builder::setCategorizedDocCount, CATEGORIZED_DOC_COUNT_FIELD); + PARSER.declareLong(Builder::setTotalCategoryCount, TOTAL_CATEGORY_COUNT_FIELD); + PARSER.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD); + PARSER.declareLong(Builder::setRareCategoryCount, RARE_CATEGORY_COUNT_FIELD); + PARSER.declareLong(Builder::setDeadCategoryCount, DEAD_CATEGORY_COUNT_FIELD); + PARSER.declareField(Builder::setCategorizationStatus, + p -> CategorizationStatus.fromString(p.text()), CATEGORIZATION_STATUS_FIELD, ValueType.STRING); PARSER.declareField(Builder::setLogTime, (p) -> TimeUtil.parseTimeField(p, LOG_TIME_FIELD.getPreferredName()), LOG_TIME_FIELD, @@ -77,7 +91,6 @@ public class ModelSizeStats implements ToXContentObject { (p) -> TimeUtil.parseTimeField(p, TIMESTAMP_FIELD.getPreferredName()), TIMESTAMP_FIELD, ValueType.VALUE); - PARSER.declareField(Builder::setMemoryStatus, p -> MemoryStatus.fromString(p.text()), MEMORY_STATUS_FIELD, ValueType.STRING); } /** @@ -99,6 +112,23 @@ public String toString() { } } + /** + * The status of categorization for a job. OK is default, WARN + * means that inappropriate numbers of categories are being found + */ + public enum CategorizationStatus { + OK, WARN; + + public static CategorizationStatus fromString(String statusName) { + return valueOf(statusName.trim().toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + private final String jobId; private final long modelBytes; private final Long modelBytesExceeded; @@ -108,12 +138,20 @@ public String toString() { private final long totalPartitionFieldCount; private final long bucketAllocationFailuresCount; private final MemoryStatus memoryStatus; + private final long categorizedDocCount; + private final long totalCategoryCount; + private final long frequentCategoryCount; + private final long rareCategoryCount; + private final long deadCategoryCount; + private final CategorizationStatus categorizationStatus; private final Date timestamp; private final Date logTime; private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, Long modelBytesMemoryLimit, long totalByFieldCount, long totalOverFieldCount, long totalPartitionFieldCount, long bucketAllocationFailuresCount, - MemoryStatus memoryStatus, Date timestamp, Date logTime) { + MemoryStatus memoryStatus, long categorizedDocCount, long totalCategoryCount, long frequentCategoryCount, + long rareCategoryCount, long deadCategoryCount, CategorizationStatus categorizationStatus, + Date timestamp, Date logTime) { this.jobId = jobId; this.modelBytes = modelBytes; this.modelBytesExceeded = modelBytesExceeded; @@ -123,6 +161,12 @@ private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, L this.totalPartitionFieldCount = totalPartitionFieldCount; this.bucketAllocationFailuresCount = bucketAllocationFailuresCount; this.memoryStatus = memoryStatus; + this.categorizedDocCount = categorizedDocCount; + this.totalCategoryCount = totalCategoryCount; + this.frequentCategoryCount = frequentCategoryCount; + this.rareCategoryCount = rareCategoryCount; + this.deadCategoryCount = deadCategoryCount; + this.categorizationStatus = categorizationStatus; this.timestamp = timestamp; this.logTime = logTime; } @@ -145,6 +189,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(TOTAL_PARTITION_FIELD_COUNT_FIELD.getPreferredName(), totalPartitionFieldCount); builder.field(BUCKET_ALLOCATION_FAILURES_COUNT_FIELD.getPreferredName(), bucketAllocationFailuresCount); builder.field(MEMORY_STATUS_FIELD.getPreferredName(), memoryStatus); + builder.field(CATEGORIZED_DOC_COUNT_FIELD.getPreferredName(), categorizedDocCount); + builder.field(TOTAL_CATEGORY_COUNT_FIELD.getPreferredName(), totalCategoryCount); + builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount); + builder.field(RARE_CATEGORY_COUNT_FIELD.getPreferredName(), rareCategoryCount); + builder.field(DEAD_CATEGORY_COUNT_FIELD.getPreferredName(), deadCategoryCount); + builder.field(CATEGORIZATION_STATUS_FIELD.getPreferredName(), categorizationStatus); builder.timeField(LOG_TIME_FIELD.getPreferredName(), LOG_TIME_FIELD.getPreferredName() + "_string", logTime.getTime()); if (timestamp != null) { builder.timeField(TIMESTAMP_FIELD.getPreferredName(), TIMESTAMP_FIELD.getPreferredName() + "_string", timestamp.getTime()); @@ -190,6 +240,30 @@ public MemoryStatus getMemoryStatus() { return memoryStatus; } + public long getCategorizedDocCount() { + return categorizedDocCount; + } + + public long getTotalCategoryCount() { + return totalCategoryCount; + } + + public long getFrequentCategoryCount() { + return frequentCategoryCount; + } + + public long getRareCategoryCount() { + return rareCategoryCount; + } + + public long getDeadCategoryCount() { + return deadCategoryCount; + } + + public CategorizationStatus getCategorizationStatus() { + return categorizationStatus; + } + /** * The timestamp of the last processed record when this instance was created. * @@ -211,7 +285,8 @@ public Date getLogTime() { @Override public int hashCode() { return Objects.hash(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount, - totalPartitionFieldCount, this.bucketAllocationFailuresCount, memoryStatus, timestamp, logTime); + totalPartitionFieldCount, this.bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount, + frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime); } /** @@ -233,7 +308,14 @@ public boolean equals(Object other) { && Objects.equals(this.modelBytesMemoryLimit, that.modelBytesMemoryLimit) && this.totalByFieldCount == that.totalByFieldCount && this.totalOverFieldCount == that.totalOverFieldCount && this.totalPartitionFieldCount == that.totalPartitionFieldCount && this.bucketAllocationFailuresCount == that.bucketAllocationFailuresCount - && Objects.equals(this.memoryStatus, that.memoryStatus) && Objects.equals(this.timestamp, that.timestamp) + && Objects.equals(this.memoryStatus, that.memoryStatus) + && this.categorizedDocCount == that.categorizedDocCount + && this.totalCategoryCount == that.totalCategoryCount + && this.frequentCategoryCount == that.frequentCategoryCount + && this.rareCategoryCount == that.rareCategoryCount + && this.deadCategoryCount == that.deadCategoryCount + && Objects.equals(this.categorizationStatus, that.categorizationStatus) + && Objects.equals(this.timestamp, that.timestamp) && Objects.equals(this.logTime, that.logTime) && Objects.equals(this.jobId, that.jobId); } @@ -249,12 +331,19 @@ public static class Builder { private long totalPartitionFieldCount; private long bucketAllocationFailuresCount; private MemoryStatus memoryStatus; + private long categorizedDocCount; + private long totalCategoryCount; + private long frequentCategoryCount; + private long rareCategoryCount; + private long deadCategoryCount; + private CategorizationStatus categorizationStatus; private Date timestamp; private Date logTime; public Builder(String jobId) { this.jobId = jobId; memoryStatus = MemoryStatus.OK; + categorizationStatus = CategorizationStatus.OK; logTime = new Date(); } @@ -268,6 +357,12 @@ public Builder(ModelSizeStats modelSizeStats) { this.totalPartitionFieldCount = modelSizeStats.totalPartitionFieldCount; this.bucketAllocationFailuresCount = modelSizeStats.bucketAllocationFailuresCount; this.memoryStatus = modelSizeStats.memoryStatus; + this.categorizedDocCount = modelSizeStats.categorizedDocCount; + this.totalCategoryCount = modelSizeStats.totalCategoryCount; + this.frequentCategoryCount = modelSizeStats.frequentCategoryCount; + this.rareCategoryCount = modelSizeStats.rareCategoryCount; + this.deadCategoryCount = modelSizeStats.deadCategoryCount; + this.categorizationStatus = modelSizeStats.categorizationStatus; this.timestamp = modelSizeStats.timestamp; this.logTime = modelSizeStats.logTime; } @@ -313,6 +408,37 @@ public Builder setMemoryStatus(MemoryStatus memoryStatus) { return this; } + public Builder setCategorizedDocCount(long categorizedDocCount) { + this.categorizedDocCount = categorizedDocCount; + return this; + } + + public Builder setTotalCategoryCount(long totalCategoryCount) { + this.totalCategoryCount = totalCategoryCount; + return this; + } + + public Builder setFrequentCategoryCount(long frequentCategoryCount) { + this.frequentCategoryCount = frequentCategoryCount; + return this; + } + + public Builder setRareCategoryCount(long rareCategoryCount) { + this.rareCategoryCount = rareCategoryCount; + return this; + } + + public Builder setDeadCategoryCount(long deadCategoryCount) { + this.deadCategoryCount = deadCategoryCount; + return this; + } + + public Builder setCategorizationStatus(CategorizationStatus categorizationStatus) { + Objects.requireNonNull(categorizationStatus, "[" + CATEGORIZATION_STATUS_FIELD.getPreferredName() + "] must not be null"); + this.categorizationStatus = categorizationStatus; + return this; + } + public Builder setTimestamp(Date timestamp) { this.timestamp = timestamp; return this; @@ -325,7 +451,8 @@ public Builder setLogTime(Date logTime) { public ModelSizeStats build() { return new ModelSizeStats(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount, - totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, timestamp, logTime); + totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount, + frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime); } } } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/job/process/ModelSizeStatsTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/job/process/ModelSizeStatsTests.java index 8c43feb545a26..6f4676a457044 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/job/process/ModelSizeStatsTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/job/process/ModelSizeStatsTests.java @@ -24,6 +24,7 @@ import java.util.Date; +import static org.elasticsearch.client.ml.job.process.ModelSizeStats.CategorizationStatus; import static org.elasticsearch.client.ml.job.process.ModelSizeStats.MemoryStatus; public class ModelSizeStatsTests extends AbstractXContentTestCase { @@ -38,6 +39,12 @@ public void testDefaultConstructor() { assertEquals(0, stats.getTotalPartitionFieldCount()); assertEquals(0, stats.getBucketAllocationFailuresCount()); assertEquals(MemoryStatus.OK, stats.getMemoryStatus()); + assertEquals(0, stats.getCategorizedDocCount()); + assertEquals(0, stats.getTotalCategoryCount()); + assertEquals(0, stats.getFrequentCategoryCount()); + assertEquals(0, stats.getRareCategoryCount()); + assertEquals(0, stats.getDeadCategoryCount()); + assertEquals(CategorizationStatus.OK, stats.getCategorizationStatus()); } public void testSetMemoryStatus_GivenNull() { @@ -85,13 +92,31 @@ public static ModelSizeStats createRandomized() { stats.setTotalPartitionFieldCount(randomNonNegativeLong()); } if (randomBoolean()) { - stats.setLogTime(new Date(TimeValue.parseTimeValue(randomTimeValue(), "test").millis())); + stats.setMemoryStatus(randomFrom(MemoryStatus.values())); } if (randomBoolean()) { - stats.setTimestamp(new Date(TimeValue.parseTimeValue(randomTimeValue(), "test").millis())); + stats.setCategorizedDocCount(randomNonNegativeLong()); } if (randomBoolean()) { - stats.setMemoryStatus(randomFrom(MemoryStatus.values())); + stats.setTotalCategoryCount(randomNonNegativeLong()); + } + if (randomBoolean()) { + stats.setFrequentCategoryCount(randomNonNegativeLong()); + } + if (randomBoolean()) { + stats.setRareCategoryCount(randomNonNegativeLong()); + } + if (randomBoolean()) { + stats.setDeadCategoryCount(randomNonNegativeLong()); + } + if (randomBoolean()) { + stats.setCategorizationStatus(randomFrom(CategorizationStatus.values())); + } + if (randomBoolean()) { + stats.setLogTime(new Date(TimeValue.parseTimeValue(randomTimeValue(), "test").millis())); + } + if (randomBoolean()) { + stats.setTimestamp(new Date(TimeValue.parseTimeValue(randomTimeValue(), "test").millis())); } return stats.build(); } diff --git a/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc index 22ff5372fc11d..0afed42ca98f1 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc @@ -195,6 +195,35 @@ model. It has the following properties: processed due to insufficient model memory. This situation is also signified by a `hard_limit: memory_status` property value. +`model_size_stats`.`categorized_doc_count`::: +(long) The number of documents that have had a field categorized. + +`model_size_stats`.`categorization_status`::: +(string) The status of categorization for this job. +Contains one of the following values. ++ +-- +* `ok`: Categorization is performing acceptably well (or not being +used at all). +* `warn`: Categorization is detecting a distribution of categories +that suggests the input data is inappropriate for categorization. +Problems could be that there is only one category, more than 90% of +categories are rare, the number of categories is greater than 50% of +the number of categorized documents, there are no frequently +matched categories, or more than 50% of categories are dead. + +-- + +`model_size_stats`.`dead_category_count`::: +(long) The number of categories created by categorization that will +never be assigned again because another category's definition +makes it a superset of the dead category. (Dead categories are a +side effect of the way categorization has no prior training.) + +`model_size_stats`.`frequent_category_count`::: +(long) The number of categories that match more than 1% of categorized +documents. + `model_size_stats`.`job_id`::: (string) include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] @@ -226,6 +255,9 @@ this value indicates the latest size. `model_size_stats`.`model_bytes_memory_limit`::: (long) The upper limit for memory usage, checked on increasing values. +`model_size_stats`.`rare_category_count`::: +(long) The number of categories that match just one categorized document. + `model_size_stats`.`result_type`::: (string) For internal use. The type of result. @@ -233,6 +265,9 @@ this value indicates the latest size. (long) The number of `by` field values that were analyzed by the models. This value is cumulative for all detectors. +`model_size_stats`.`total_category_count`::: +(long) The number of categories created by categorization. + `model_size_stats`.`total_over_field_count`::: (long) The number of `over` field values that were analyzed by the models. This value is cumulative for all detectors. @@ -371,6 +406,12 @@ The API returns the following results: "total_partition_field_count" : 2, "bucket_allocation_failures_count" : 0, "memory_status" : "ok", + "categorized_doc_count" : 0, + "total_category_count" : 0, + "frequent_category_count" : 0, + "rare_category_count" : 0, + "dead_category_count" : 0, + "categorization_status" : "ok", "log_time" : 1576017596000, "timestamp" : 1580410800000 }, diff --git a/docs/reference/ml/anomaly-detection/apis/get-snapshot.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-snapshot.asciidoc index 7a7fbfb5a4e75..afa79962327c7 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-snapshot.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-snapshot.asciidoc @@ -89,6 +89,35 @@ properties: (long) The number of buckets for which entities were not processed due to memory limit constraints. +`model_size_stats`.`categorized_doc_count`::: +(long) The number of documents that have had a field categorized. + +`model_size_stats`.`categorization_status`::: +(string) The status of categorization for this job. +Contains one of the following values. ++ +-- +* `ok`: Categorization is performing acceptably well (or not being +used at all). +* `warn`: Categorization is detecting a distribution of categories +that suggests the input data is inappropriate for categorization. +Problems could be that there is only one category, more than 90% of +categories are rare, the number of categories is greater than 50% of +the number of categorized documents, there are no frequently +matched categories, or more than 50% of categories are dead. + +-- + +`model_size_stats`.`dead_category_count`::: +(long) The number of categories created by categorization that will +never be assigned again because another category's definition +makes it a superset of the dead category. (Dead categories are a +side effect of the way categorization has no prior training.) + +`model_size_stats`.`frequent_category_count`::: +(long) The number of categories that match more than 1% of categorized +documents. + `model_size_stats`.`job_id`::: (string) include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] @@ -119,6 +148,9 @@ reclaim space. `model_size_stats`.`model_bytes_memory_limit`::: (long) The upper limit for memory usage, checked on increasing values. +`model_size_stats`.`rare_category_count`::: +(long) The number of categories that match just one categorized document. + `model_size_stats`.`result_type`::: (string) Internal. This value is always `model_size_stats`. @@ -130,6 +162,9 @@ bucket timestamp of the data. (long) The number of _by_ field values analyzed. Note that these are counted separately for each detector and partition. +`model_size_stats`.`total_category_count`::: +(long) The number of categories created by categorization. + `model_size_stats`.`total_over_field_count`::: (long) The number of _over_ field values analyzed. Note that these are counted separately for each detector and partition. @@ -187,6 +222,12 @@ In this example, the API provides a single result: "total_partition_field_count" : 2, "bucket_allocation_failures_count" : 0, "memory_status" : "ok", + "categorized_doc_count" : 0, + "total_category_count" : 0, + "frequent_category_count" : 0, + "rare_category_count" : 0, + "dead_category_count" : 0, + "categorization_status" : "ok", "log_time" : 1575402237000, "timestamp" : 1576965600000 }, diff --git a/docs/reference/ml/anomaly-detection/apis/revert-snapshot.asciidoc b/docs/reference/ml/anomaly-detection/apis/revert-snapshot.asciidoc index e119cf2b40a33..0f0f44cc76620 100644 --- a/docs/reference/ml/anomaly-detection/apis/revert-snapshot.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/revert-snapshot.asciidoc @@ -89,6 +89,12 @@ When the operation is complete, you receive the following results: "total_partition_field_count" : 2, "bucket_allocation_failures_count" : 0, "memory_status" : "ok", + "categorized_doc_count" : 0, + "total_category_count" : 0, + "frequent_category_count" : 0, + "rare_category_count" : 0, + "dead_category_count" : 0, + "categorization_status" : "ok", "log_time" : 1575402237000, "timestamp" : 1576965600000 }, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java index 2e78ab39fe6e3..44ca9ef365d6a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java @@ -22,10 +22,9 @@ import java.util.Date; import java.util.Locale; import java.util.Objects; -import java.util.function.BiConsumer; /** - * Provide access to the C++ model memory usage numbers for the Java process. + * Provide access to the C++ model size stats for the Java process. */ public class ModelSizeStats implements ToXContentObject, Writeable { @@ -46,6 +45,12 @@ public class ModelSizeStats implements ToXContentObject, Writeable { public static final ParseField TOTAL_PARTITION_FIELD_COUNT_FIELD = new ParseField("total_partition_field_count"); public static final ParseField BUCKET_ALLOCATION_FAILURES_COUNT_FIELD = new ParseField("bucket_allocation_failures_count"); public static final ParseField MEMORY_STATUS_FIELD = new ParseField("memory_status"); + public static final ParseField CATEGORIZED_DOC_COUNT_FIELD = new ParseField("categorized_doc_count"); + public static final ParseField TOTAL_CATEGORY_COUNT_FIELD = new ParseField("total_category_count"); + public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count"); + public static final ParseField RARE_CATEGORY_COUNT_FIELD = new ParseField("rare_category_count"); + public static final ParseField DEAD_CATEGORY_COUNT_FIELD = new ParseField("dead_category_count"); + public static final ParseField CATEGORIZATION_STATUS_FIELD = new ParseField("categorization_status"); public static final ParseField LOG_TIME_FIELD = new ParseField("log_time"); public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp"); @@ -65,12 +70,18 @@ private static ConstructingObjectParser createParser(boolean igno parser.declareLong(Builder::setTotalByFieldCount, TOTAL_BY_FIELD_COUNT_FIELD); parser.declareLong(Builder::setTotalOverFieldCount, TOTAL_OVER_FIELD_COUNT_FIELD); parser.declareLong(Builder::setTotalPartitionFieldCount, TOTAL_PARTITION_FIELD_COUNT_FIELD); + parser.declareField(Builder::setMemoryStatus, p -> MemoryStatus.fromString(p.text()), MEMORY_STATUS_FIELD, ValueType.STRING); + parser.declareLong(Builder::setCategorizedDocCount, CATEGORIZED_DOC_COUNT_FIELD); + parser.declareLong(Builder::setTotalCategoryCount, TOTAL_CATEGORY_COUNT_FIELD); + parser.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD); + parser.declareLong(Builder::setRareCategoryCount, RARE_CATEGORY_COUNT_FIELD); + parser.declareLong(Builder::setDeadCategoryCount, DEAD_CATEGORY_COUNT_FIELD); + parser.declareField(Builder::setCategorizationStatus, + p -> CategorizationStatus.fromString(p.text()), CATEGORIZATION_STATUS_FIELD, ValueType.STRING); parser.declareField(Builder::setLogTime, p -> TimeUtils.parseTimeField(p, LOG_TIME_FIELD.getPreferredName()), LOG_TIME_FIELD, ValueType.VALUE); parser.declareField(Builder::setTimestamp, p -> TimeUtils.parseTimeField(p, TIMESTAMP_FIELD.getPreferredName()), TIMESTAMP_FIELD, ValueType.VALUE); - BiConsumer setMemoryStatus = Builder::setMemoryStatus; - parser.declareField(setMemoryStatus, p -> MemoryStatus.fromString(p.text()), MEMORY_STATUS_FIELD, ValueType.STRING); return parser; } @@ -103,6 +114,32 @@ public String toString() { } } + /** + * The status of categorization for a job. OK is default, WARN + * means that inappropriate numbers of categories are being found + */ + public enum CategorizationStatus implements Writeable { + OK, WARN; + + public static CategorizationStatus fromString(String statusName) { + return valueOf(statusName.trim().toUpperCase(Locale.ROOT)); + } + + public static CategorizationStatus readFromStream(StreamInput in) throws IOException { + return in.readEnum(CategorizationStatus.class); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeEnum(this); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + private final String jobId; private final long modelBytes; private final Long modelBytesExceeded; @@ -112,12 +149,19 @@ public String toString() { private final long totalPartitionFieldCount; private final long bucketAllocationFailuresCount; private final MemoryStatus memoryStatus; + private final long categorizedDocCount; + private final long totalCategoryCount; + private final long frequentCategoryCount; + private final long rareCategoryCount; + private final long deadCategoryCount; + private final CategorizationStatus categorizationStatus; private final Date timestamp; private final Date logTime; private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, Long modelBytesMemoryLimit, long totalByFieldCount, long totalOverFieldCount, long totalPartitionFieldCount, long bucketAllocationFailuresCount, - MemoryStatus memoryStatus, + MemoryStatus memoryStatus, long categorizedDocCount, long totalCategoryCount, long frequentCategoryCount, + long rareCategoryCount, long deadCategoryCount, CategorizationStatus categorizationStatus, Date timestamp, Date logTime) { this.jobId = jobId; this.modelBytes = modelBytes; @@ -128,6 +172,12 @@ private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, L this.totalPartitionFieldCount = totalPartitionFieldCount; this.bucketAllocationFailuresCount = bucketAllocationFailuresCount; this.memoryStatus = memoryStatus; + this.categorizedDocCount = categorizedDocCount; + this.totalCategoryCount = totalCategoryCount; + this.frequentCategoryCount = frequentCategoryCount; + this.rareCategoryCount = rareCategoryCount; + this.deadCategoryCount = deadCategoryCount; + this.categorizationStatus = categorizationStatus; this.timestamp = timestamp; this.logTime = logTime; } @@ -150,6 +200,22 @@ public ModelSizeStats(StreamInput in) throws IOException { totalPartitionFieldCount = in.readVLong(); bucketAllocationFailuresCount = in.readVLong(); memoryStatus = MemoryStatus.readFromStream(in); + // TODO change to 7.7.0 on backport + if (in.getVersion().onOrAfter(Version.V_8_0_0)) { + categorizedDocCount = in.readVLong(); + totalCategoryCount = in.readVLong(); + frequentCategoryCount = in.readVLong(); + rareCategoryCount = in.readVLong(); + deadCategoryCount = in.readVLong(); + categorizationStatus = CategorizationStatus.readFromStream(in); + } else { + categorizedDocCount = 0; + totalCategoryCount = 0; + frequentCategoryCount = 0; + rareCategoryCount = 0; + deadCategoryCount = 0; + categorizationStatus = CategorizationStatus.OK; + } logTime = new Date(in.readVLong()); timestamp = in.readBoolean() ? new Date(in.readVLong()) : null; } @@ -177,6 +243,15 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(totalPartitionFieldCount); out.writeVLong(bucketAllocationFailuresCount); memoryStatus.writeTo(out); + // TODO change to 7.7.0 on backport + if (out.getVersion().onOrAfter(Version.V_8_0_0)) { + out.writeVLong(categorizedDocCount); + out.writeVLong(totalCategoryCount); + out.writeVLong(frequentCategoryCount); + out.writeVLong(rareCategoryCount); + out.writeVLong(deadCategoryCount); + categorizationStatus.writeTo(out); + } out.writeVLong(logTime.getTime()); boolean hasTimestamp = timestamp != null; out.writeBoolean(hasTimestamp); @@ -208,6 +283,12 @@ public XContentBuilder doXContentBody(XContentBuilder builder) throws IOExceptio builder.field(TOTAL_PARTITION_FIELD_COUNT_FIELD.getPreferredName(), totalPartitionFieldCount); builder.field(BUCKET_ALLOCATION_FAILURES_COUNT_FIELD.getPreferredName(), bucketAllocationFailuresCount); builder.field(MEMORY_STATUS_FIELD.getPreferredName(), memoryStatus); + builder.field(CATEGORIZED_DOC_COUNT_FIELD.getPreferredName(), categorizedDocCount); + builder.field(TOTAL_CATEGORY_COUNT_FIELD.getPreferredName(), totalCategoryCount); + builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount); + builder.field(RARE_CATEGORY_COUNT_FIELD.getPreferredName(), rareCategoryCount); + builder.field(DEAD_CATEGORY_COUNT_FIELD.getPreferredName(), deadCategoryCount); + builder.field(CATEGORIZATION_STATUS_FIELD.getPreferredName(), categorizationStatus); builder.timeField(LOG_TIME_FIELD.getPreferredName(), LOG_TIME_FIELD.getPreferredName() + "_string", logTime.getTime()); if (timestamp != null) { builder.timeField(TIMESTAMP_FIELD.getPreferredName(), TIMESTAMP_FIELD.getPreferredName() + "_string", timestamp.getTime()); @@ -252,6 +333,30 @@ public MemoryStatus getMemoryStatus() { return memoryStatus; } + public long getCategorizedDocCount() { + return categorizedDocCount; + } + + public long getTotalCategoryCount() { + return totalCategoryCount; + } + + public long getFrequentCategoryCount() { + return frequentCategoryCount; + } + + public long getRareCategoryCount() { + return rareCategoryCount; + } + + public long getDeadCategoryCount() { + return deadCategoryCount; + } + + public CategorizationStatus getCategorizationStatus() { + return categorizationStatus; + } + /** * The timestamp of the last processed record when this instance was created. * @return The record time @@ -272,7 +377,8 @@ public Date getLogTime() { public int hashCode() { // this.id excluded here as it is generated by the datastore return Objects.hash(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount, - totalPartitionFieldCount, this.bucketAllocationFailuresCount, memoryStatus, timestamp, logTime); + totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount, + frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime); } /** @@ -295,7 +401,14 @@ public boolean equals(Object other) { && this.totalByFieldCount == that.totalByFieldCount && this.totalOverFieldCount == that.totalOverFieldCount && this.totalPartitionFieldCount == that.totalPartitionFieldCount && this.bucketAllocationFailuresCount == that.bucketAllocationFailuresCount - && Objects.equals(this.memoryStatus, that.memoryStatus) && Objects.equals(this.timestamp, that.timestamp) + && Objects.equals(this.memoryStatus, that.memoryStatus) + && Objects.equals(this.categorizedDocCount, that.categorizedDocCount) + && Objects.equals(this.totalCategoryCount, that.totalCategoryCount) + && Objects.equals(this.frequentCategoryCount, that.frequentCategoryCount) + && Objects.equals(this.rareCategoryCount, that.rareCategoryCount) + && Objects.equals(this.deadCategoryCount, that.deadCategoryCount) + && Objects.equals(this.categorizationStatus, that.categorizationStatus) + && Objects.equals(this.timestamp, that.timestamp) && Objects.equals(this.logTime, that.logTime) && Objects.equals(this.jobId, that.jobId); } @@ -311,12 +424,19 @@ public static class Builder { private long totalPartitionFieldCount; private long bucketAllocationFailuresCount; private MemoryStatus memoryStatus; + private long categorizedDocCount; + private long totalCategoryCount; + private long frequentCategoryCount; + private long rareCategoryCount; + private long deadCategoryCount; + private CategorizationStatus categorizationStatus; private Date timestamp; private Date logTime; public Builder(String jobId) { this.jobId = jobId; memoryStatus = MemoryStatus.OK; + categorizationStatus = CategorizationStatus.OK; logTime = new Date(); } @@ -330,6 +450,12 @@ public Builder(ModelSizeStats modelSizeStats) { this.totalPartitionFieldCount = modelSizeStats.totalPartitionFieldCount; this.bucketAllocationFailuresCount = modelSizeStats.bucketAllocationFailuresCount; this.memoryStatus = modelSizeStats.memoryStatus; + this.categorizedDocCount = modelSizeStats.categorizedDocCount; + this.totalCategoryCount = modelSizeStats.totalCategoryCount; + this.frequentCategoryCount = modelSizeStats.frequentCategoryCount; + this.rareCategoryCount = modelSizeStats.rareCategoryCount; + this.deadCategoryCount = modelSizeStats.deadCategoryCount; + this.categorizationStatus = modelSizeStats.categorizationStatus; this.timestamp = modelSizeStats.timestamp; this.logTime = modelSizeStats.logTime; } @@ -375,6 +501,37 @@ public Builder setMemoryStatus(MemoryStatus memoryStatus) { return this; } + public Builder setCategorizedDocCount(long categorizedDocCount) { + this.categorizedDocCount = categorizedDocCount; + return this; + } + + public Builder setTotalCategoryCount(long totalCategoryCount) { + this.totalCategoryCount = totalCategoryCount; + return this; + } + + public Builder setFrequentCategoryCount(long frequentCategoryCount) { + this.frequentCategoryCount = frequentCategoryCount; + return this; + } + + public Builder setRareCategoryCount(long rareCategoryCount) { + this.rareCategoryCount = rareCategoryCount; + return this; + } + + public Builder setDeadCategoryCount(long deadCategoryCount) { + this.deadCategoryCount = deadCategoryCount; + return this; + } + + public Builder setCategorizationStatus(CategorizationStatus categorizationStatus) { + Objects.requireNonNull(categorizationStatus, "[" + CATEGORIZATION_STATUS_FIELD.getPreferredName() + "] must not be null"); + this.categorizationStatus = categorizationStatus; + return this; + } + public Builder setTimestamp(Date timestamp) { this.timestamp = timestamp; return this; @@ -387,7 +544,8 @@ public Builder setLogTime(Date logTime) { public ModelSizeStats build() { return new ModelSizeStats(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount, - totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, timestamp, logTime); + totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount, + frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime); } } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java index 90e4bacc3f8b1..ba6854513e6b9 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats.CategorizationStatus; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats.MemoryStatus; import java.io.IOException; @@ -22,13 +23,19 @@ public class ModelSizeStatsTests extends AbstractSerializingTestCase