[7.x][ML] Remove model snapshot legacy doc ids (#62434) (#62569)

Removes methods that were no longer used regarding version 5.4 doc ids of ModelState.

Also adds clean up of 5.4 model state and quantile docs in the daily maintenance.

Backport of #62434
This commit is contained in:
Dimitris Athanasiou 2020-09-17 23:43:28 +03:00 committed by GitHub
parent 0127b71901
commit 7118ff7976
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 47 additions and 26 deletions

View File

@ -21,8 +21,8 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.common.time.TimeUtils;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import java.io.IOException;
import java.io.InputStream;
@ -287,18 +287,6 @@ public class ModelSnapshot implements ToXContentObject, Writeable {
return stateDocumentIds;
}
/**
* This is how the IDs were formed in v5.4
*/
public List<String> legacyStateDocumentIds() {
List<String> stateDocumentIds = new ArrayList<>(snapshotDocCount);
// The state documents count suffices are 1-based
for (int i = 1; i <= snapshotDocCount; i++) {
stateDocumentIds.add(ModelState.v54DocumentId(jobId, snapshotId, i));
}
return stateDocumentIds;
}
public static String documentIdPrefix(String jobId) {
return jobId + "_" + TYPE + "_";
}

View File

@ -6,27 +6,25 @@
package org.elasticsearch.xpack.core.ml.job.process.autodetect.state;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* The model state does not need to be understood on the Java side.
* The Java code only needs to know how to form the document IDs so that
* it can retrieve and delete the correct documents.
*/
public class ModelState {
public final class ModelState {
/**
* Legacy type, now used only as a discriminant in the document ID
*/
public static final String TYPE = "model_state";
public static final String documentId(String jobId, String snapshotId, int docNum) {
return jobId + "_" + TYPE + "_" + snapshotId + "#" + docNum;
}
private static final Pattern V_5_4_DOC_ID_REGEX = Pattern.compile("(.*)-\\d{10}#\\d+");
/**
* This is how the IDs were formed in v5.4
*/
public static final String v54DocumentId(String jobId, String snapshotId, int docNum) {
return jobId + "-" + snapshotId + "#" + docNum;
public static String documentId(String jobId, String snapshotId, int docNum) {
return jobId + "_" + TYPE + "_" + snapshotId + "#" + docNum;
}
/**
@ -34,9 +32,22 @@ public class ModelState {
* @param docId the state document id
* @return the job id or {@code null} if the id is not valid
*/
public static final String extractJobId(String docId) {
public static String extractJobId(String docId) {
int suffixIndex = docId.lastIndexOf("_" + TYPE + "_");
return suffixIndex <= 0 ? null : docId.substring(0, suffixIndex);
return suffixIndex <= 0 ? v54ExtractJobId(docId) : docId.substring(0, suffixIndex);
}
/**
* On version 5.4 the state doc ids had a different pattern.
* The pattern started with the job id, followed by a hyphen, the snapshot id,
* and ended with hash and an integer.
*/
private static String v54ExtractJobId(String docId) {
Matcher matcher = V_5_4_DOC_ID_REGEX.matcher(docId);
if (matcher.matches()) {
return matcher.group(1);
}
return null;
}
private ModelState() {

View File

@ -65,8 +65,13 @@ public class Quantiles implements ToXContentObject, Writeable {
* @param docId the quantiles document id
* @return the job id or {@code null} if the id is not valid
*/
public static final String extractJobId(String docId) {
public static String extractJobId(String docId) {
int suffixIndex = docId.lastIndexOf("_" + TYPE);
return suffixIndex <= 0 ? v54extractJobId(docId) : docId.substring(0, suffixIndex);
}
private static String v54extractJobId(String docId) {
int suffixIndex = docId.lastIndexOf("-" + TYPE);
return suffixIndex <= 0 ? null : docId.substring(0, suffixIndex);
}

View File

@ -28,4 +28,13 @@ public class ModelStateTests extends ESTestCase {
assertThat(ModelState.extractJobId("_model_state_3141341341"), is(nullValue()));
assertThat(ModelState.extractJobId("foo_quantiles"), is(nullValue()));
}
public void testExtractJobId_GivenV54DocId() {
assertThat(ModelState.extractJobId("test_job_id-1-0123456789#1"), equalTo("test_job_id-1"));
assertThat(ModelState.extractJobId("test_job_id-2-9876543210#42"), equalTo("test_job_id-2"));
assertThat(ModelState.extractJobId("test_job_id-0123456789-9876543210#42"), equalTo("test_job_id-0123456789"));
// This one has one less digit for snapshot id
assertThat(ModelState.extractJobId("test_job_id-123456789#42"), is(nullValue()));
}
}

View File

@ -28,6 +28,13 @@ public class QuantilesTests extends AbstractSerializingTestCase<Quantiles> {
assertThat(Quantiles.extractJobId("_quantiles_quantiles"), equalTo("_quantiles"));
}
public void testExtractJobId_GivenV54DocId() {
assertThat(Quantiles.extractJobId("foo-quantiles"), equalTo("foo"));
assertThat(Quantiles.extractJobId("bar-quantiles"), equalTo("bar"));
assertThat(Quantiles.extractJobId("foo-bar-quantiles"), equalTo("foo-bar"));
assertThat(Quantiles.extractJobId("-quantiles-quantiles"), equalTo("-quantiles"));
}
public void testExtractJobId_GivenInvalidDocId() {
assertThat(Quantiles.extractJobId(""), is(nullValue()));
assertThat(Quantiles.extractJobId("foo"), is(nullValue()));

View File

@ -34,6 +34,7 @@ public class MlStringsTests extends ESTestCase {
assertThat(MlStrings.isValidId("b.-_3"), is(true));
assertThat(MlStrings.isValidId("a-b.c_d"), is(true));
assertThat(MlStrings.isValidId("1_-.a#"), is(false));
assertThat(MlStrings.isValidId("a1_-."), is(false));
assertThat(MlStrings.isValidId("-.a1_"), is(false));
assertThat(MlStrings.isValidId(".a1_-"), is(false));