[ML] Better error when persistent task assignment disabled (#52014)

Changes the misleading error message when attempting to open
a job while the "cluster.persistent_tasks.allocation.enable"
setting is set to "none" to a clearer message that names the
setting.

Closes #51956
This commit is contained in:
David Roberts 2020-02-11 15:22:21 +00:00
parent 87854573e4
commit 473468d763
4 changed files with 54 additions and 3 deletions

View File

@ -43,6 +43,7 @@ public class EnableAssignmentDecider {
public static final Setting<Allocation> CLUSTER_TASKS_ALLOCATION_ENABLE_SETTING =
new Setting<>("cluster.persistent_tasks.allocation.enable", Allocation.ALL.toString(), Allocation::fromString, Dynamic, NodeScope);
public static final String ALLOCATION_NONE_EXPLANATION = "no persistent task assignments are allowed due to cluster settings";
private volatile Allocation enableAssignment;
@ -64,7 +65,7 @@ public class EnableAssignmentDecider {
*/
public AssignmentDecision canAssign() {
if (enableAssignment == Allocation.NONE) {
return new AssignmentDecision(AssignmentDecision.Type.NO, "no persistent task assignments are allowed due to cluster settings");
return new AssignmentDecision(AssignmentDecision.Type.NO, ALLOCATION_NONE_EXPLANATION);
}
return AssignmentDecision.YES;
}

View File

@ -398,6 +398,42 @@ public class MlJobIT extends ESRestTestCase {
"avoid the clash by assigning a dedicated results index"));
}
public void testOpenJobFailsWhenPersistentTaskAssignmentDisabled() throws Exception {
String jobId = "open-job-with-persistent-task-assignment-disabled";
createFarequoteJob(jobId);
Request disablePersistentTaskAssignmentRequest = new Request("PUT", "_cluster/settings");
disablePersistentTaskAssignmentRequest.setJsonEntity("{\n" +
" \"transient\": {\n" +
" \"cluster.persistent_tasks.allocation.enable\": \"none\"\n" +
" }\n" +
"}");
Response disablePersistentTaskAssignmentResponse = client().performRequest(disablePersistentTaskAssignmentRequest);
assertThat(entityAsMap(disablePersistentTaskAssignmentResponse), hasEntry("acknowledged", true));
try {
ResponseException exception = expectThrows(
ResponseException.class,
() -> client().performRequest(
new Request("POST", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId + "/_open")));
assertThat(exception.getResponse().getStatusLine().getStatusCode(), equalTo(429));
assertThat(EntityUtils.toString(exception.getResponse().getEntity()),
containsString("Cannot open jobs because persistent task assignment is disabled by the " +
"[cluster.persistent_tasks.allocation.enable] setting"));
} finally {
// Try to revert the cluster setting change even if the test fails,
// because otherwise this setting will cause many other tests to fail
Request enablePersistentTaskAssignmentRequest = new Request("PUT", "_cluster/settings");
enablePersistentTaskAssignmentRequest.setJsonEntity("{\n" +
" \"transient\": {\n" +
" \"cluster.persistent_tasks.allocation.enable\": \"all\"\n" +
" }\n" +
"}");
Response enablePersistentTaskAssignmentResponse = client().performRequest(disablePersistentTaskAssignmentRequest);
assertThat(entityAsMap(enablePersistentTaskAssignmentResponse), hasEntry("acknowledged", true));
}
}
public void testDeleteJob() throws Exception {
String jobId = "delete-job-job";
String indexName = AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX + AnomalyDetectorsIndexFields.RESULTS_INDEX_DEFAULT;

View File

@ -36,6 +36,7 @@ import org.elasticsearch.persistent.PersistentTaskState;
import org.elasticsearch.persistent.PersistentTasksCustomMetaData;
import org.elasticsearch.persistent.PersistentTasksExecutor;
import org.elasticsearch.persistent.PersistentTasksService;
import org.elasticsearch.persistent.decider.EnableAssignmentDecider;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.threadpool.ThreadPool;
@ -558,7 +559,13 @@ public class TransportOpenJobAction extends TransportMasterNodeAction<OpenJobAct
assignment.isAssigned() == false) {
OpenJobAction.JobParams params = (OpenJobAction.JobParams) persistentTask.getParams();
// Assignment has failed on the master node despite passing our "fast fail" validation
exception = makeNoSuitableNodesException(logger, params.getJobId(), assignment.getExplanation());
if (assignment.equals(AWAITING_UPGRADE)) {
exception = makeCurrentlyBeingUpgradedException(logger, params.getJobId(), assignment.getExplanation());
} else if (assignment.getExplanation().contains("[" + EnableAssignmentDecider.ALLOCATION_NONE_EXPLANATION + "]")) {
exception = makeAssignmentsNotAllowedException(logger, params.getJobId());
} else {
exception = makeNoSuitableNodesException(logger, params.getJobId(), assignment.getExplanation());
}
// The persistent task should be cancelled so that the observed outcome is the
// same as if the "fast fail" validation on the coordinating node had failed
shouldCancel = true;
@ -598,6 +605,13 @@ public class TransportOpenJobAction extends TransportMasterNodeAction<OpenJobAct
RestStatus.TOO_MANY_REQUESTS, detail);
}
static ElasticsearchException makeAssignmentsNotAllowedException(Logger logger, String jobId) {
String msg = "Cannot open jobs because persistent task assignment is disabled by the ["
+ EnableAssignmentDecider.CLUSTER_TASKS_ALLOCATION_ENABLE_SETTING.getKey() + "] setting";
logger.warn("[{}] {}", jobId, msg);
return new ElasticsearchStatusException(msg, RestStatus.TOO_MANY_REQUESTS);
}
static ElasticsearchException makeCurrentlyBeingUpgradedException(Logger logger, String jobId, String explanation) {
String msg = "Cannot open jobs when upgrade mode is enabled";
logger.warn("[{}] {}", jobId, msg);

View File

@ -1425,7 +1425,7 @@
- match: { job_id: "persistent-task-allocation-allowed-test" }
- do:
catch: /no persistent task assignments are allowed due to cluster settings/
catch: /Cannot open jobs because persistent task assignment is disabled by the \[cluster.persistent_tasks.allocation.enable\] setting/
ml.open_job:
job_id: persistent-task-allocation-allowed-test