From 7118ff797667594ba781526cc16d6eda371ead6d Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Thu, 17 Sep 2020 23:43:28 +0300 Subject: [PATCH] [7.x][ML] Remove model snapshot legacy doc ids (#62434) (#62569) Removes methods that were no longer used regarding version 5.4 doc ids of ModelState. Also adds clean up of 5.4 model state and quantile docs in the daily maintenance. Backport of #62434 --- .../autodetect/state/ModelSnapshot.java | 14 +------- .../process/autodetect/state/ModelState.java | 33 ++++++++++++------- .../process/autodetect/state/Quantiles.java | 7 +++- .../autodetect/state/ModelStateTests.java | 11 ++++++- .../autodetect/state/QuantilesTests.java | 7 ++++ .../xpack/ml/utils/MlStringsTests.java | 1 + 6 files changed, 47 insertions(+), 26 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSnapshot.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSnapshot.java index 13415d2bbd5..c230c9336f6 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSnapshot.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSnapshot.java @@ -21,8 +21,8 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.common.time.TimeUtils; +import org.elasticsearch.xpack.core.ml.job.config.Job; import java.io.IOException; import java.io.InputStream; @@ -287,18 +287,6 @@ public class ModelSnapshot implements ToXContentObject, Writeable { return stateDocumentIds; } - /** - * This is how the IDs were formed in v5.4 - */ - public List legacyStateDocumentIds() { - List stateDocumentIds = new ArrayList<>(snapshotDocCount); - // The state documents count suffices are 1-based - for (int i = 1; i <= snapshotDocCount; i++) { - stateDocumentIds.add(ModelState.v54DocumentId(jobId, snapshotId, i)); - } - return stateDocumentIds; - } - public static String documentIdPrefix(String jobId) { return jobId + "_" + TYPE + "_"; } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelState.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelState.java index fbec7bb6c72..30a8511e186 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelState.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelState.java @@ -6,27 +6,25 @@ package org.elasticsearch.xpack.core.ml.job.process.autodetect.state; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * The model state does not need to be understood on the Java side. * The Java code only needs to know how to form the document IDs so that * it can retrieve and delete the correct documents. */ -public class ModelState { +public final class ModelState { /** * Legacy type, now used only as a discriminant in the document ID */ public static final String TYPE = "model_state"; - public static final String documentId(String jobId, String snapshotId, int docNum) { - return jobId + "_" + TYPE + "_" + snapshotId + "#" + docNum; - } + private static final Pattern V_5_4_DOC_ID_REGEX = Pattern.compile("(.*)-\\d{10}#\\d+"); - /** - * This is how the IDs were formed in v5.4 - */ - public static final String v54DocumentId(String jobId, String snapshotId, int docNum) { - return jobId + "-" + snapshotId + "#" + docNum; + public static String documentId(String jobId, String snapshotId, int docNum) { + return jobId + "_" + TYPE + "_" + snapshotId + "#" + docNum; } /** @@ -34,9 +32,22 @@ public class ModelState { * @param docId the state document id * @return the job id or {@code null} if the id is not valid */ - public static final String extractJobId(String docId) { + public static String extractJobId(String docId) { int suffixIndex = docId.lastIndexOf("_" + TYPE + "_"); - return suffixIndex <= 0 ? null : docId.substring(0, suffixIndex); + return suffixIndex <= 0 ? v54ExtractJobId(docId) : docId.substring(0, suffixIndex); + } + + /** + * On version 5.4 the state doc ids had a different pattern. + * The pattern started with the job id, followed by a hyphen, the snapshot id, + * and ended with hash and an integer. + */ + private static String v54ExtractJobId(String docId) { + Matcher matcher = V_5_4_DOC_ID_REGEX.matcher(docId); + if (matcher.matches()) { + return matcher.group(1); + } + return null; } private ModelState() { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/Quantiles.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/Quantiles.java index 0b3ddcc7b51..c7d93fa6312 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/Quantiles.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/Quantiles.java @@ -65,8 +65,13 @@ public class Quantiles implements ToXContentObject, Writeable { * @param docId the quantiles document id * @return the job id or {@code null} if the id is not valid */ - public static final String extractJobId(String docId) { + public static String extractJobId(String docId) { int suffixIndex = docId.lastIndexOf("_" + TYPE); + return suffixIndex <= 0 ? v54extractJobId(docId) : docId.substring(0, suffixIndex); + } + + private static String v54extractJobId(String docId) { + int suffixIndex = docId.lastIndexOf("-" + TYPE); return suffixIndex <= 0 ? null : docId.substring(0, suffixIndex); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelStateTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelStateTests.java index 0e42a061119..b18b53e5724 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelStateTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelStateTests.java @@ -28,4 +28,13 @@ public class ModelStateTests extends ESTestCase { assertThat(ModelState.extractJobId("_model_state_3141341341"), is(nullValue())); assertThat(ModelState.extractJobId("foo_quantiles"), is(nullValue())); } -} \ No newline at end of file + + public void testExtractJobId_GivenV54DocId() { + assertThat(ModelState.extractJobId("test_job_id-1-0123456789#1"), equalTo("test_job_id-1")); + assertThat(ModelState.extractJobId("test_job_id-2-9876543210#42"), equalTo("test_job_id-2")); + assertThat(ModelState.extractJobId("test_job_id-0123456789-9876543210#42"), equalTo("test_job_id-0123456789")); + + // This one has one less digit for snapshot id + assertThat(ModelState.extractJobId("test_job_id-123456789#42"), is(nullValue())); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/QuantilesTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/QuantilesTests.java index 146e3ed5bd5..b0eeeac7b1e 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/QuantilesTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/QuantilesTests.java @@ -28,6 +28,13 @@ public class QuantilesTests extends AbstractSerializingTestCase { assertThat(Quantiles.extractJobId("_quantiles_quantiles"), equalTo("_quantiles")); } + public void testExtractJobId_GivenV54DocId() { + assertThat(Quantiles.extractJobId("foo-quantiles"), equalTo("foo")); + assertThat(Quantiles.extractJobId("bar-quantiles"), equalTo("bar")); + assertThat(Quantiles.extractJobId("foo-bar-quantiles"), equalTo("foo-bar")); + assertThat(Quantiles.extractJobId("-quantiles-quantiles"), equalTo("-quantiles")); + } + public void testExtractJobId_GivenInvalidDocId() { assertThat(Quantiles.extractJobId(""), is(nullValue())); assertThat(Quantiles.extractJobId("foo"), is(nullValue())); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/MlStringsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/MlStringsTests.java index f4e638fc919..04396c284d0 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/MlStringsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/MlStringsTests.java @@ -34,6 +34,7 @@ public class MlStringsTests extends ESTestCase { assertThat(MlStrings.isValidId("b.-_3"), is(true)); assertThat(MlStrings.isValidId("a-b.c_d"), is(true)); + assertThat(MlStrings.isValidId("1_-.a#"), is(false)); assertThat(MlStrings.isValidId("a1_-."), is(false)); assertThat(MlStrings.isValidId("-.a1_"), is(false)); assertThat(MlStrings.isValidId(".a1_-"), is(false));