[Ml] Read v5.4 datacounts (elastic/x-pack-elasticsearch#1565)

* Read v54 datacounts * Rename method legacyDocumentId -> v54DocumentId Original commit: elastic/x-pack-elasticsearch@7dd297c287
2025-03-25 01:19:02 +00:00 · 2017-05-26 16:29:21 +01:00 · 2017-05-26 16:29:21 +01:00 · b284fc3c91
commit b284fc3c91
parent 8dc50990a3
9 changed files with 90 additions and 11 deletions
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobDataDeleter.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobDataDeleter.java
@ -67,7 +67,7 @@ public class JobDataDeleter {
                    ElasticsearchMappings.DOC_TYPE, ModelSnapshot.documentId(modelSnapshot)));
            // TODO: remove in 7.0
            bulkRequestBuilder.add(client.prepareDelete(AnomalyDetectorsIndex.jobResultsAliasedName(modelSnapshot.getJobId()),
-                    ModelSnapshot.TYPE.getPreferredName(), ModelSnapshot.legacyDocumentId(modelSnapshot)));
+                    ModelSnapshot.TYPE.getPreferredName(), ModelSnapshot.v54DocumentId(modelSnapshot)));
        }

        bulkRequestBuilder.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobProvider.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobProvider.java
@ -239,7 +239,8 @@ public class JobProvider {
    private SearchRequestBuilder createLatestDataCountsSearch(String indexName, String jobId) {
        return client.prepareSearch(indexName)
                .setSize(1)
-                .setQuery(QueryBuilders.idsQuery().addIds(DataCounts.documentId(jobId)))
+                // look for both old and new formats
+                .setQuery(QueryBuilders.idsQuery().addIds(DataCounts.documentId(jobId), DataCounts.v54DocumentId(jobId)))
                .addSort(SortBuilders.fieldSort(DataCounts.LATEST_RECORD_TIME.getPreferredName()).order(SortOrder.DESC));
    }

--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobStorageDeletionTask.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobStorageDeletionTask.java
@ -111,7 +111,7 @@ public class JobStorageDeletionTask extends Task {
        bulkRequestBuilder.add(client.prepareDelete(AnomalyDetectorsIndex.jobStateIndexName(), ElasticsearchMappings.DOC_TYPE,
                Quantiles.documentId(jobId)));
        bulkRequestBuilder.add(client.prepareDelete(AnomalyDetectorsIndex.jobStateIndexName(), Quantiles.TYPE.getPreferredName(),
-                Quantiles.legacyDocumentId(jobId)));
+                Quantiles.v54DocumentId(jobId)));
        bulkRequestBuilder.execute(ActionListener.wrap(
                        response -> finishedHandler.onResponse(true),
                        e -> {
@ -142,7 +142,7 @@ public class JobStorageDeletionTask extends Task {
                CategorizerState.documentId(jobId, docNum)));
        // TODO: remove in 7.0
        bulkRequestBuilder.add(client.prepareDelete(AnomalyDetectorsIndex.jobStateIndexName(), CategorizerState.TYPE,
-                CategorizerState.legacyDocumentId(jobId, docNum)));
+                CategorizerState.v54DocumentId(jobId, docNum)));
        bulkRequestBuilder.execute(ActionListener.wrap(
                response -> {
                    // If we successfully deleted either document try the next one; if not we're done
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/CategorizerState.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/CategorizerState.java
@ -25,7 +25,7 @@ public class CategorizerState {
    /**
     * This is how the IDs were formed in v5.4
     */
-    public static final String legacyDocumentId(String jobId, int docNum) {
+    public static final String v54DocumentId(String jobId, int docNum) {
        return jobId + "#" + docNum;
    }

--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/DataCounts.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/DataCounts.java
@ -142,6 +142,10 @@ public class DataCounts extends ToXContentToBytes implements Writeable {
        return jobId + DOCUMENT_SUFFIX;
    }

+    public static String v54DocumentId(String jobId) {
+        return jobId + "-data-counts";
+    }
+
    private final String jobId;
    private long processedRecordCount;
    private long processedFieldCount;
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/ModelSnapshot.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/ModelSnapshot.java
@ -277,7 +277,7 @@ public class ModelSnapshot extends ToXContentToBytes implements Writeable {
        List<String> stateDocumentIds = new ArrayList<>(snapshotDocCount);
        // The state documents count suffices are 1-based
        for (int i = 1; i <= snapshotDocCount; i++) {
-            stateDocumentIds.add(ModelState.legacyDocumentId(jobId, snapshotId, i));
+            stateDocumentIds.add(ModelState.v54DocumentId(jobId, snapshotId, i));
        }
        return stateDocumentIds;
    }
@ -293,8 +293,8 @@ public class ModelSnapshot extends ToXContentToBytes implements Writeable {
    /**
     * This is how the IDs were formed in v5.4
     */
-    public static String legacyDocumentId(ModelSnapshot snapshot) {
-        return legacyDocumentId(snapshot.getJobId(), snapshot.getSnapshotId());
+    public static String v54DocumentId(ModelSnapshot snapshot) {
+        return v54DocumentId(snapshot.getJobId(), snapshot.getSnapshotId());
    }

    public static String documentId(String jobId, String snapshotId) {
@ -304,7 +304,7 @@ public class ModelSnapshot extends ToXContentToBytes implements Writeable {
    /**
     * This is how the IDs were formed in v5.4
     */
-    public static String legacyDocumentId(String jobId, String snapshotId) {
+    public static String v54DocumentId(String jobId, String snapshotId) {
        return jobId + "-" + snapshotId;
    }

--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/ModelState.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/ModelState.java
@ -25,7 +25,7 @@ public class ModelState {
    /**
     * This is how the IDs were formed in v5.4
     */
-    public static final String legacyDocumentId(String jobId, String snapshotId, int docNum) {
+    public static final String v54DocumentId(String jobId, String snapshotId, int docNum) {
        return jobId + "-" + snapshotId + "#" + docNum;
    }

--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/Quantiles.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/state/Quantiles.java
@ -51,7 +51,7 @@ public class Quantiles extends ToXContentToBytes implements Writeable {
    /**
     * This is how the IDs were formed in v5.4
     */
-    public static String legacyDocumentId(String jobId) {
+    public static String v54DocumentId(String jobId) {
        return jobId + "-" + TYPE;
    }

--- a/plugin/src/test/resources/rest-api-spec/test/ml/jobs_get_stats.yml
+++ b/plugin/src/test/resources/rest-api-spec/test/ml/jobs_get_stats.yml
@ -150,3 +150,77 @@ setup:
      catch: missing
      xpack.ml.get_job_stats:
        job_id: unknown-job
+
+---
+"Test reading v54 data counts and model size stats":
+
+  - do:
+      xpack.ml.put_job:
+        job_id: job-stats-v54-bwc-test
+        body:  >
+          {
+            "analysis_config" : {
+                "bucket_span": "1h",
+                "detectors" :[{"function":"metric","field_name":"responsetime","by_field_name":"airline"}]
+            },
+            "data_description" : {
+            }
+          }
+
+  - do:
+      indices.refresh: {}
+
+  # This is testing that the documents with v5.4 IDs are fetched.
+  # Ideally we would use the v5.4 type but we can't put a mapping
+  # for another type into the single type indices. Type isn't used
+  # in the query so the test is valid
+  - do:
+      index:
+        index: .ml-anomalies-shared
+        type: doc
+        id: job-stats-v54-bwc-test-data-counts
+        body:
+          {
+            job_id : job-stats-v54-bwc-test,
+            processed_record_count : 10,
+            processed_field_count : 0,
+            input_bytes : 0,
+            input_field_count : 0,
+            invalid_date_count : 0,
+            missing_field_count : 0,
+            out_of_order_timestamp_count : 0,
+            empty_bucket_count : 0,
+            sparse_bucket_count : 0,
+            bucket_count : 0,
+            input_record_count : 0,
+            latest_record_timestamp: 2000000000000
+          }
+
+  - do:
+      index:
+        index: .ml-anomalies-shared
+        type: doc
+        id: job-stats-v54-bwc-test-model_size_stats
+        body:
+          {
+            job_id : job-stats-v54-bwc-test,
+            result_type : model_size_stats,
+            model_bytes : 0,
+            total_by_field_count : 101,
+            total_over_field_count : 0,
+            total_partition_field_count : 0,
+            bucket_allocation_failures_count : 0,
+            memory_status : ok,
+            log_time : 1495808248662
+          }
+
+  - do:
+      indices.refresh:
+        index: [.ml-anomalies-shared]
+
+  - do:
+      xpack.ml.get_job_stats:
+        job_id: job-stats-v54-bwc-test
+  - match: { jobs.0.job_id : job-stats-v54-bwc-test }
+  - match: { jobs.0.data_counts.processed_record_count: 10 }
+  - match: { jobs.0.model_size_stats.total_by_field_count: 101 }