[ML] Stop using the management thread pool unnecessarily for ML actions (elastic/x-pack-elasticsearch#1213)
The management thread pool only has 5 threads and clogging it up makes monitoring think the cluster is dead. relates elastic/x-pack-elasticsearch#1210 Original commit: elastic/x-pack-elasticsearch@f4ad7578d9
This commit is contained in:
parent
6b4db0fc36
commit
f3f9cb6d74
|
@ -149,7 +149,7 @@ public class MachineLearning implements ActionPlugin {
|
|||
public static final String BASE_PATH = "/_xpack/ml/";
|
||||
public static final String DATAFEED_THREAD_POOL_NAME = NAME + "_datafeed";
|
||||
public static final String AUTODETECT_THREAD_POOL_NAME = NAME + "_autodetect";
|
||||
public static final String NORMALIZER_THREAD_POOL_NAME = NAME + "_normalizer";
|
||||
public static final String UTILITY_THREAD_POOL_NAME = NAME + "_utility";
|
||||
|
||||
public static final Setting<Boolean> AUTODETECT_PROCESS =
|
||||
Setting.boolSetting("xpack.ml.autodetect_process", true, Property.NodeScope);
|
||||
|
@ -296,7 +296,7 @@ public class MachineLearning implements ActionPlugin {
|
|||
executorService) -> new MultiplyingNormalizerProcess(settings, 1.0);
|
||||
}
|
||||
NormalizerFactory normalizerFactory = new NormalizerFactory(normalizerProcessFactory,
|
||||
threadPool.executor(MachineLearning.NORMALIZER_THREAD_POOL_NAME));
|
||||
threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME));
|
||||
AutodetectProcessManager autodetectProcessManager = new AutodetectProcessManager(settings, internalClient, threadPool,
|
||||
jobManager, jobProvider, jobResultsPersister, jobDataCountsPersister, autodetectProcessFactory,
|
||||
normalizerFactory, xContentRegistry, auditor);
|
||||
|
@ -437,15 +437,17 @@ public class MachineLearning implements ActionPlugin {
|
|||
return emptyList();
|
||||
}
|
||||
int maxNumberOfJobs = AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE.get(settings);
|
||||
// 4 threads: for cpp logging, result processing, state processing and
|
||||
// 4 threads per job: for cpp logging, result processing, state processing and
|
||||
// AutodetectProcessManager worker thread:
|
||||
FixedExecutorBuilder autoDetect = new FixedExecutorBuilder(settings, AUTODETECT_THREAD_POOL_NAME,
|
||||
maxNumberOfJobs * 4, 4, "xpack.ml.autodetect_thread_pool");
|
||||
|
||||
// 3 threads: normalization (cpp logging, result handling) and
|
||||
// renormalization (ShortCircuitingRenormalizer):
|
||||
FixedExecutorBuilder renormalizer = new FixedExecutorBuilder(settings, NORMALIZER_THREAD_POOL_NAME,
|
||||
maxNumberOfJobs * 3, 200, "xpack.ml.normalizer_thread_pool");
|
||||
// 4 threads per job: processing logging, result and state of the renormalization process.
|
||||
// Renormalization does't run for the entire lifetime of a job, so additionally autodetect process
|
||||
// based operation (open, close, flush, post data), datafeed based operations (start and stop)
|
||||
// and deleting expired data use this threadpool too and queue up if all threads are busy.
|
||||
FixedExecutorBuilder renormalizer = new FixedExecutorBuilder(settings, UTILITY_THREAD_POOL_NAME,
|
||||
maxNumberOfJobs * 4, 500, "xpack.ml.utility_thread_pool");
|
||||
|
||||
// TODO: if datafeed and non datafeed jobs are considered more equal and the datafeed and
|
||||
// autodetect process are created at the same time then these two different TPs can merge.
|
||||
|
|
|
@ -362,7 +362,7 @@ public class CloseJobAction extends Action<CloseJobAction.Request, CloseJobActio
|
|||
JobTaskStatus taskStatus = new JobTaskStatus(JobState.CLOSING, jobTask.getAllocationId());
|
||||
jobTask.updatePersistentStatus(taskStatus, ActionListener.wrap(task -> {
|
||||
// we need to fork because we are now on a network threadpool and closeJob method may take a while to complete:
|
||||
threadPool.executor(ThreadPool.Names.MANAGEMENT).execute(new AbstractRunnable() {
|
||||
threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(new AbstractRunnable() {
|
||||
@Override
|
||||
public void onFailure(Exception e) {
|
||||
listener.onFailure(e);
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.elasticsearch.common.xcontent.ToXContentObject;
|
|||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
import org.elasticsearch.transport.TransportService;
|
||||
import org.elasticsearch.xpack.ml.MachineLearning;
|
||||
import org.elasticsearch.xpack.ml.job.retention.ExpiredModelSnapshotsRemover;
|
||||
import org.elasticsearch.xpack.ml.job.retention.ExpiredResultsRemover;
|
||||
import org.elasticsearch.xpack.ml.notifications.Auditor;
|
||||
|
@ -133,7 +134,7 @@ public class DeleteExpiredDataAction extends Action<DeleteExpiredDataAction.Requ
|
|||
@Override
|
||||
protected void doExecute(Request request, ActionListener<Response> listener) {
|
||||
logger.info("Deleting expired data");
|
||||
threadPool.executor(ThreadPool.Names.MANAGEMENT).execute(() -> deleteExpiredData(listener));
|
||||
threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(() -> deleteExpiredData(listener));
|
||||
}
|
||||
|
||||
private void deleteExpiredData(ActionListener<Response> listener) {
|
||||
|
|
|
@ -482,7 +482,7 @@ public class OpenJobAction extends Action<OpenJobAction.Request, OpenJobAction.R
|
|||
|
||||
public OpenJobPersistentTasksExecutor(Settings settings, ClusterService clusterService,
|
||||
AutodetectProcessManager autodetectProcessManager) {
|
||||
super(settings, TASK_NAME, ThreadPool.Names.MANAGEMENT);
|
||||
super(settings, TASK_NAME, MachineLearning.UTILITY_THREAD_POOL_NAME);
|
||||
this.autodetectProcessManager = autodetectProcessManager;
|
||||
this.maxNumberOfOpenJobs = AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE.get(settings);
|
||||
this.maxConcurrentJobAllocations = MachineLearning.CONCURRENT_JOB_ALLOCATIONS.get(settings);
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.elasticsearch.tasks.TaskId;
|
|||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
import org.elasticsearch.transport.TransportService;
|
||||
import org.elasticsearch.xpack.XPackPlugin;
|
||||
import org.elasticsearch.xpack.ml.MachineLearning;
|
||||
import org.elasticsearch.xpack.ml.MlMetadata;
|
||||
import org.elasticsearch.xpack.ml.datafeed.DatafeedConfig;
|
||||
import org.elasticsearch.xpack.ml.datafeed.DatafeedJobValidator;
|
||||
|
@ -505,7 +506,7 @@ public class StartDatafeedAction
|
|||
private final IndexNameExpressionResolver resolver;
|
||||
|
||||
public StartDatafeedPersistentTasksExecutor(Settings settings, DatafeedManager datafeedManager) {
|
||||
super(settings, TASK_NAME, ThreadPool.Names.MANAGEMENT);
|
||||
super(settings, TASK_NAME, MachineLearning.UTILITY_THREAD_POOL_NAME);
|
||||
this.datafeedManager = datafeedManager;
|
||||
this.resolver = new IndexNameExpressionResolver(settings);
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.elasticsearch.discovery.MasterNotDiscoveredException;
|
|||
import org.elasticsearch.tasks.Task;
|
||||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
import org.elasticsearch.transport.TransportService;
|
||||
import org.elasticsearch.xpack.ml.MachineLearning;
|
||||
import org.elasticsearch.xpack.ml.MlMetadata;
|
||||
import org.elasticsearch.xpack.ml.datafeed.DatafeedConfig;
|
||||
import org.elasticsearch.xpack.ml.datafeed.DatafeedState;
|
||||
|
@ -261,7 +262,7 @@ public class StopDatafeedAction
|
|||
ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver,
|
||||
ClusterService clusterService, PersistentTasksService persistentTasksService) {
|
||||
super(settings, StopDatafeedAction.NAME, threadPool, clusterService, transportService, actionFilters,
|
||||
indexNameExpressionResolver, Request::new, Response::new, ThreadPool.Names.MANAGEMENT);
|
||||
indexNameExpressionResolver, Request::new, Response::new, MachineLearning.UTILITY_THREAD_POOL_NAME);
|
||||
this.persistentTasksService = persistentTasksService;
|
||||
}
|
||||
|
||||
|
|
|
@ -213,7 +213,7 @@ public class AutodetectProcessManager extends AbstractComponent {
|
|||
Job job = jobManager.getJobOrThrowIfUnknown(jobId);
|
||||
jobProvider.getAutodetectParams(job, params -> {
|
||||
// We need to fork, otherwise we restore model state from a network thread (several GET api calls):
|
||||
threadPool.executor(ThreadPool.Names.MANAGEMENT).execute(new AbstractRunnable() {
|
||||
threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(new AbstractRunnable() {
|
||||
@Override
|
||||
public void onFailure(Exception e) {
|
||||
handler.accept(e);
|
||||
|
@ -272,7 +272,7 @@ public class AutodetectProcessManager extends AbstractComponent {
|
|||
jobDataCountsPersister);
|
||||
ScoresUpdater scoresUpdater = new ScoresUpdater(job, jobProvider, new JobRenormalizedResultsPersister(settings, client),
|
||||
normalizerFactory);
|
||||
ExecutorService renormalizerExecutorService = threadPool.executor(MachineLearning.NORMALIZER_THREAD_POOL_NAME);
|
||||
ExecutorService renormalizerExecutorService = threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME);
|
||||
Renormalizer renormalizer = new ShortCircuitingRenormalizer(jobId, scoresUpdater,
|
||||
renormalizerExecutorService, job.getAnalysisConfig().getUsePerPartitionNormalization());
|
||||
|
||||
|
|
Loading…
Reference in New Issue