This implementation lazily (on 1st forecast request) checks for available

diskspace and creates a subfolder for storing data outside of Lucene
indexes, but as part of the ES data paths.

Details:
 - tmp storage is managed and does not allow allocation if disk space is
   below a threshold (5GB at the moment)
 - tmp storage is supposed to be managed by the native component but in
   case this fails cleanup is provided:
    - on job close
    - on process crash
    - after node crash, on restart
 - available space is re-checked for every forecast call (the native
   component has to check again before writing)

Note: The 1st path that has enough space is chosen on job open (job
close/reopen triggers a new search)
This commit is contained in:
Hendrik Muhs 2018-05-18 14:04:09 +02:00
parent b5a793b569
commit 6c313a9871
10 changed files with 406 additions and 26 deletions

View File

@ -59,10 +59,7 @@ For more information about any of these functions, see <<ml-functions>>.
* Forecasts run concurrently with real-time {ml} analysis. That is to say, {ml} * Forecasts run concurrently with real-time {ml} analysis. That is to say, {ml}
analysis does not stop while forecasts are generated. Forecasts can have an analysis does not stop while forecasts are generated. Forecasts can have an
impact on {ml} jobs, however, especially in terms of memory usage. For this impact on {ml} jobs, however, especially in terms of memory usage. For this
reason, forecasts run only if the model memory status is acceptable and the reason, forecasts run only if the model memory status is acceptable.
snapshot models for the forecast do not require more than 20 MB. If these memory
limits are reached, consider splitting the job into multiple smaller jobs and
creating forecasts for these.
* The job must be open when you create a forecast. Otherwise, an error occurs. * The job must be open when you create a forecast. Otherwise, an error occurs.
* If there is insufficient data to generate any meaningful predictions, an * If there is insufficient data to generate any meaningful predictions, an
error occurs. In general, forecasts that are created early in the learning phase error occurs. In general, forecasts that are created early in the learning phase

View File

@ -286,7 +286,8 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu
DataCountsReporter.ACCEPTABLE_PERCENTAGE_DATE_PARSE_ERRORS_SETTING, DataCountsReporter.ACCEPTABLE_PERCENTAGE_DATE_PARSE_ERRORS_SETTING,
DataCountsReporter.ACCEPTABLE_PERCENTAGE_OUT_OF_ORDER_ERRORS_SETTING, DataCountsReporter.ACCEPTABLE_PERCENTAGE_OUT_OF_ORDER_ERRORS_SETTING,
AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE, AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE,
AutodetectProcessManager.MAX_OPEN_JOBS_PER_NODE)); AutodetectProcessManager.MAX_OPEN_JOBS_PER_NODE,
AutodetectProcessManager.MIN_DISK_SPACE_OFF_HEAP));
} }
public Settings additionalSettings() { public Settings additionalSettings() {
@ -403,6 +404,9 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu
// This object's constructor attaches to the license state, so there's no need to retain another reference to it // This object's constructor attaches to the license state, so there's no need to retain another reference to it
new InvalidLicenseEnforcer(settings, getLicenseState(), threadPool, datafeedManager, autodetectProcessManager); new InvalidLicenseEnforcer(settings, getLicenseState(), threadPool, datafeedManager, autodetectProcessManager);
// run node startup tasks
autodetectProcessManager.onNodeStartup();
return Arrays.asList( return Arrays.asList(
mlLifeCycleService, mlLifeCycleService,
jobProvider, jobProvider,

View File

@ -15,6 +15,8 @@ import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.TransportService;
@ -28,6 +30,7 @@ import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcessManage
import org.elasticsearch.xpack.ml.job.process.autodetect.params.ForecastParams; import org.elasticsearch.xpack.ml.job.process.autodetect.params.ForecastParams;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Path;
import java.util.List; import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
@ -36,6 +39,8 @@ import static org.elasticsearch.xpack.core.ml.action.ForecastJobAction.Request.D
public class TransportForecastJobAction extends TransportJobTaskAction<ForecastJobAction.Request, public class TransportForecastJobAction extends TransportJobTaskAction<ForecastJobAction.Request,
ForecastJobAction.Response> { ForecastJobAction.Response> {
private static final ByteSizeValue FORECAST_LOCAL_STORAGE_LIMIT = new ByteSizeValue(500, ByteSizeUnit.MB);
private final JobProvider jobProvider; private final JobProvider jobProvider;
@Inject @Inject
public TransportForecastJobAction(Settings settings, TransportService transportService, ThreadPool threadPool, public TransportForecastJobAction(Settings settings, TransportService transportService, ThreadPool threadPool,
@ -73,6 +78,13 @@ public class TransportForecastJobAction extends TransportJobTaskAction<ForecastJ
paramsBuilder.expiresIn(request.getExpiresIn()); paramsBuilder.expiresIn(request.getExpiresIn());
} }
// tmp storage might be null, we do not log here, because it might not be
// required
Path tmpStorage = processManager.tryGetTmpStorage(task, FORECAST_LOCAL_STORAGE_LIMIT);
if (tmpStorage != null) {
paramsBuilder.tmpStorage(tmpStorage.toString());
}
ForecastParams params = paramsBuilder.build(); ForecastParams params = paramsBuilder.build();
processManager.forecastJob(task, params, e -> { processManager.forecastJob(task, params, e -> {
if (e == null) { if (e == null) {

View File

@ -0,0 +1,123 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.env.Environment;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
/**
* Provide storage for native components.
*/
public class NativeStorageProvider {
private static final Logger LOGGER = Loggers.getLogger(NativeStorageProvider.class);
private static final String LOCAL_STORAGE_SUBFOLDER = "ml-local-data";
private static final String LOCAL_STORAGE_TMP_FOLDER = "tmp";
private final Environment environment;
// do not allow any usage below this threshold
private final ByteSizeValue minLocalStorageAvailable;
public NativeStorageProvider(Environment environment, ByteSizeValue minDiskSpaceOffHeap) {
this.environment = environment;
this.minLocalStorageAvailable = minDiskSpaceOffHeap;
}
/**
* Removes any temporary storage leftovers.
*
* Removes all temp files and folder which might be there as a result of an
* unclean node shutdown or broken clients.
*
* Do not call while there are running jobs.
*
* @throws IOException if cleanup fails
*/
public void cleanupLocalTmpStorageInCaseOfUncleanShutdown() throws IOException {
for (Path p : environment.dataFiles()) {
IOUtils.rm(p.resolve(LOCAL_STORAGE_SUBFOLDER).resolve(LOCAL_STORAGE_TMP_FOLDER));
}
}
/**
* Tries to find local storage for storing temporary data.
*
* @param uniqueIdentifier An identifier to be used as sub folder
* @param requestedSize The maximum size required
* @return Path for temporary storage if available, null otherwise
*/
public Path tryGetLocalTmpStorage(String uniqueIdentifier, ByteSizeValue requestedSize) {
for (Path path : environment.dataFiles()) {
try {
if (getUsableSpace(path) >= requestedSize.getBytes() + minLocalStorageAvailable.getBytes()) {
Path tmpDirectory = path.resolve(LOCAL_STORAGE_SUBFOLDER).resolve(LOCAL_STORAGE_TMP_FOLDER).resolve(uniqueIdentifier);
Files.createDirectories(tmpDirectory);
return tmpDirectory;
}
} catch (IOException e) {
LOGGER.debug("Failed to obtain information about path [{}]: {}", path, e);
}
}
LOGGER.debug("Failed to find native storage for [{}], returning null", uniqueIdentifier);
return null;
}
public boolean localTmpStorageHasEnoughSpace(Path path, ByteSizeValue requestedSize) {
Path realPath = path.toAbsolutePath();
for (Path p : environment.dataFiles()) {
try {
if (realPath.startsWith(p.resolve(LOCAL_STORAGE_SUBFOLDER).resolve(LOCAL_STORAGE_TMP_FOLDER))) {
return getUsableSpace(p) >= requestedSize.getBytes() + minLocalStorageAvailable.getBytes();
}
} catch (IOException e) {
LOGGER.debug("Failed to optain information about path [{}]: {}", path, e);
}
}
LOGGER.debug("Not enough space left for path [{}]", path);
return false;
}
/**
* Delete temporary storage, previously allocated
*
* @param path
* Path to temporary storage
* @throws IOException
* if path can not be cleaned up
*/
public void cleanupLocalTmpStorage(Path path) throws IOException {
// do not allow to breakout from the tmp storage provided
Path realPath = path.toAbsolutePath();
for (Path p : environment.dataFiles()) {
if (realPath.startsWith(p.resolve(LOCAL_STORAGE_SUBFOLDER).resolve(LOCAL_STORAGE_TMP_FOLDER))) {
IOUtils.rm(path);
}
}
}
long getUsableSpace(Path path) throws IOException {
long freeSpaceInBytes = Environment.getFileStore(path).getUsableSpace();
/* See: https://bugs.openjdk.java.net/browse/JDK-8162520 */
if (freeSpaceInBytes < 0) {
freeSpaceInBytes = Long.MAX_VALUE;
}
return freeSpaceInBytes;
}
}

View File

@ -7,6 +7,7 @@ package org.elasticsearch.xpack.ml.job.process.autodetect;
import org.elasticsearch.common.xcontent.XContentElasticsearchExtension; import org.elasticsearch.common.xcontent.XContentElasticsearchExtension;
import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.core.internal.io.IOUtils;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionListener;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
@ -15,11 +16,12 @@ import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.index.analysis.AnalysisRegistry;
@ -47,6 +49,7 @@ import org.elasticsearch.xpack.ml.job.persistence.JobRenormalizedResultsPersiste
import org.elasticsearch.xpack.ml.job.persistence.JobResultsPersister; import org.elasticsearch.xpack.ml.job.persistence.JobResultsPersister;
import org.elasticsearch.xpack.ml.job.persistence.StateStreamer; import org.elasticsearch.xpack.ml.job.persistence.StateStreamer;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.NativeStorageProvider;
import org.elasticsearch.xpack.ml.job.process.autodetect.output.AutoDetectResultProcessor; import org.elasticsearch.xpack.ml.job.process.autodetect.output.AutoDetectResultProcessor;
import org.elasticsearch.xpack.ml.job.process.autodetect.params.DataLoadParams; import org.elasticsearch.xpack.ml.job.process.autodetect.params.DataLoadParams;
import org.elasticsearch.xpack.ml.job.process.autodetect.params.FlushJobParams; import org.elasticsearch.xpack.ml.job.process.autodetect.params.FlushJobParams;
@ -59,6 +62,7 @@ import org.elasticsearch.xpack.ml.notifications.Auditor;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.file.Path;
import java.time.Duration; import java.time.Duration;
import java.time.ZonedDateTime; import java.time.ZonedDateTime;
import java.util.Date; import java.util.Date;
@ -96,6 +100,10 @@ public class AutodetectProcessManager extends AbstractComponent {
public static final Setting<Integer> MAX_OPEN_JOBS_PER_NODE = public static final Setting<Integer> MAX_OPEN_JOBS_PER_NODE =
Setting.intSetting("xpack.ml.max_open_jobs", MAX_RUNNING_JOBS_PER_NODE, 1, Property.NodeScope); Setting.intSetting("xpack.ml.max_open_jobs", MAX_RUNNING_JOBS_PER_NODE, 1, Property.NodeScope);
// Undocumented setting for integration test purposes
public static final Setting<ByteSizeValue> MIN_DISK_SPACE_OFF_HEAP =
Setting.byteSizeSetting("xpack.ml.min_disk_space_off_heap", new ByteSizeValue(5, ByteSizeUnit.GB), Property.NodeScope);
private final Client client; private final Client client;
private final Environment environment; private final Environment environment;
private final ThreadPool threadPool; private final ThreadPool threadPool;
@ -107,8 +115,12 @@ public class AutodetectProcessManager extends AbstractComponent {
private final JobResultsPersister jobResultsPersister; private final JobResultsPersister jobResultsPersister;
private final JobDataCountsPersister jobDataCountsPersister; private final JobDataCountsPersister jobDataCountsPersister;
private NativeStorageProvider nativeStorageProvider;
private final ConcurrentMap<Long, ProcessContext> processByAllocation = new ConcurrentHashMap<>(); private final ConcurrentMap<Long, ProcessContext> processByAllocation = new ConcurrentHashMap<>();
// a map that manages the allocation of temporary space to jobs
private final ConcurrentMap<String, Path> nativeTmpStorage = new ConcurrentHashMap<>();
private final int maxAllowedRunningJobs; private final int maxAllowedRunningJobs;
private final NamedXContentRegistry xContentRegistry; private final NamedXContentRegistry xContentRegistry;
@ -133,6 +145,15 @@ public class AutodetectProcessManager extends AbstractComponent {
this.jobResultsPersister = jobResultsPersister; this.jobResultsPersister = jobResultsPersister;
this.jobDataCountsPersister = jobDataCountsPersister; this.jobDataCountsPersister = jobDataCountsPersister;
this.auditor = auditor; this.auditor = auditor;
this.nativeStorageProvider = new NativeStorageProvider(environment, MIN_DISK_SPACE_OFF_HEAP.get(settings));
}
public void onNodeStartup() {
try {
nativeStorageProvider.cleanupLocalTmpStorageInCaseOfUncleanShutdown();
} catch (Exception e) {
logger.warn("Failed to cleanup native storage from previous invocation", e);
}
} }
public synchronized void closeAllJobsOnThisNode(String reason) throws IOException { public synchronized void closeAllJobsOnThisNode(String reason) throws IOException {
@ -251,6 +272,28 @@ public class AutodetectProcessManager extends AbstractComponent {
}); });
} }
/**
* Request temporary storage to be used for the job
*
* @param jobTask The job task
* @param requestedSize requested size
* @return a Path to local storage or null if storage is not available
*/
public Path tryGetTmpStorage(JobTask jobTask, ByteSizeValue requestedSize) {
String jobId = jobTask.getJobId();
Path path = nativeTmpStorage.get(jobId);
if (path == null) {
path = nativeStorageProvider.tryGetLocalTmpStorage(jobId, requestedSize);
if (path != null) {
nativeTmpStorage.put(jobId, path);
}
} else if (!nativeStorageProvider.localTmpStorageHasEnoughSpace(path, requestedSize)) {
// the previous tmp location ran out of disk space, do not allow further usage
return null;
}
return path;
}
/** /**
* Do a forecast for the running job. * Do a forecast for the running job.
* *
@ -258,10 +301,11 @@ public class AutodetectProcessManager extends AbstractComponent {
* @param params Forecast parameters * @param params Forecast parameters
*/ */
public void forecastJob(JobTask jobTask, ForecastParams params, Consumer<Exception> handler) { public void forecastJob(JobTask jobTask, ForecastParams params, Consumer<Exception> handler) {
logger.debug("Forecasting job {}", jobTask.getJobId()); String jobId = jobTask.getJobId();
logger.debug("Forecasting job {}", jobId);
AutodetectCommunicator communicator = getOpenAutodetectCommunicator(jobTask); AutodetectCommunicator communicator = getOpenAutodetectCommunicator(jobTask);
if (communicator == null) { if (communicator == null) {
String message = String.format(Locale.ROOT, "Cannot forecast because job [%s] is not open", jobTask.getJobId()); String message = String.format(Locale.ROOT, "Cannot forecast because job [%s] is not open", jobId);
logger.debug(message); logger.debug(message);
handler.accept(ExceptionsHelper.conflictStatusException(message)); handler.accept(ExceptionsHelper.conflictStatusException(message));
return; return;
@ -271,7 +315,7 @@ public class AutodetectProcessManager extends AbstractComponent {
if (e == null) { if (e == null) {
handler.accept(null); handler.accept(null);
} else { } else {
String msg = String.format(Locale.ROOT, "[%s] exception while forecasting job", jobTask.getJobId()); String msg = String.format(Locale.ROOT, "[%s] exception while forecasting job", jobId);
logger.error(msg, e); logger.error(msg, e);
handler.accept(ExceptionsHelper.serverError(msg, e)); handler.accept(ExceptionsHelper.serverError(msg, e));
} }
@ -477,6 +521,11 @@ public class AutodetectProcessManager extends AbstractComponent {
} }
} }
setJobState(jobTask, JobState.FAILED); setJobState(jobTask, JobState.FAILED);
try {
removeTmpStorage(jobTask.getJobId());
} catch (IOException e) {
logger.error(new ParameterizedMessage("[{}] Failed to delete temporary files", jobTask.getJobId()), e);
}
}; };
} }
@ -535,6 +584,12 @@ public class AutodetectProcessManager extends AbstractComponent {
// thread that gets into this method blocks until the first thread has finished closing the job // thread that gets into this method blocks until the first thread has finished closing the job
processContext.unlock(); processContext.unlock();
} }
// delete any tmp storage
try {
removeTmpStorage(jobId);
} catch (IOException e) {
logger.error(new ParameterizedMessage("[{}]Failed to delete temporary files", jobId), e);
}
} }
int numberOfOpenJobs() { int numberOfOpenJobs() {
@ -613,6 +668,13 @@ public class AutodetectProcessManager extends AbstractComponent {
return Optional.of(new Tuple<>(communicator.getDataCounts(), communicator.getModelSizeStats())); return Optional.of(new Tuple<>(communicator.getDataCounts(), communicator.getModelSizeStats()));
} }
private void removeTmpStorage(String jobId) throws IOException {
Path path = nativeTmpStorage.get(jobId);
if (path != null) {
nativeStorageProvider.cleanupLocalTmpStorage(path);
}
}
ExecutorService createAutodetectExecutorService(ExecutorService executorService) { ExecutorService createAutodetectExecutorService(ExecutorService executorService) {
AutodetectWorkerExecutorService autoDetectWorkerExecutor = new AutodetectWorkerExecutorService(threadPool.getThreadContext()); AutodetectWorkerExecutorService autoDetectWorkerExecutor = new AutodetectWorkerExecutorService(threadPool.getThreadContext());
executorService.submit(autoDetectWorkerExecutor::start); executorService.submit(autoDetectWorkerExecutor::start);

View File

@ -16,12 +16,14 @@ public class ForecastParams {
private final long createTime; private final long createTime;
private final long duration; private final long duration;
private final long expiresIn; private final long expiresIn;
private final String tmpStorage;
private ForecastParams(String forecastId, long createTime, long duration, long expiresIn) { private ForecastParams(String forecastId, long createTime, long duration, long expiresIn, String tmpStorage) {
this.forecastId = forecastId; this.forecastId = forecastId;
this.createTime = createTime; this.createTime = createTime;
this.duration = duration; this.duration = duration;
this.expiresIn = expiresIn; this.expiresIn = expiresIn;
this.tmpStorage = tmpStorage;
} }
public String getForecastId() { public String getForecastId() {
@ -52,9 +54,18 @@ public class ForecastParams {
return expiresIn; return expiresIn;
} }
/**
* Temporary storage forecast is allowed to use for persisting models.
*
* @return path to tmp storage
*/
public String getTmpStorage() {
return tmpStorage;
}
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(forecastId, createTime, duration, expiresIn); return Objects.hash(forecastId, createTime, duration, expiresIn, tmpStorage);
} }
@Override @Override
@ -69,7 +80,8 @@ public class ForecastParams {
return Objects.equals(forecastId, other.forecastId) return Objects.equals(forecastId, other.forecastId)
&& Objects.equals(createTime, other.createTime) && Objects.equals(createTime, other.createTime)
&& Objects.equals(duration, other.duration) && Objects.equals(duration, other.duration)
&& Objects.equals(expiresIn, other.expiresIn); && Objects.equals(expiresIn, other.expiresIn)
&& Objects.equals(tmpStorage, other.tmpStorage);
} }
public static Builder builder() { public static Builder builder() {
@ -81,6 +93,7 @@ public class ForecastParams {
private final long createTimeEpochSecs; private final long createTimeEpochSecs;
private long durationSecs; private long durationSecs;
private long expiresInSecs; private long expiresInSecs;
private String tmpStorage;
private Builder() { private Builder() {
forecastId = UUIDs.base64UUID(); forecastId = UUIDs.base64UUID();
@ -101,8 +114,13 @@ public class ForecastParams {
return this; return this;
} }
public Builder tmpStorage(String tmpStorage) {
this.tmpStorage = tmpStorage;
return this;
}
public ForecastParams build() { public ForecastParams build() {
return new ForecastParams(forecastId, createTimeEpochSecs, durationSecs, expiresInSecs); return new ForecastParams(forecastId, createTimeEpochSecs, durationSecs, expiresInSecs, tmpStorage);
} }
} }
} }

View File

@ -164,6 +164,9 @@ public class ControlMsgToProcessWriter {
if (params.getExpiresIn() != -1) { if (params.getExpiresIn() != -1) {
builder.field("expires_in", params.getExpiresIn()); builder.field("expires_in", params.getExpiresIn());
} }
if (params.getTmpStorage() != null) {
builder.field("tmp_storage", params.getTmpStorage());
}
builder.endObject(); builder.endObject();
writeMessage(FORECAST_MESSAGE_CODE + Strings.toString(builder)); writeMessage(FORECAST_MESSAGE_CODE + Strings.toString(builder));

View File

@ -0,0 +1,139 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.env.Environment;
import org.elasticsearch.test.ESTestCase;
import org.junit.Assert;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.doAnswer;
public class NativeStorageProviderTests extends ESTestCase {
public void testTmpStorage() throws IOException {
Map<Path, Long> storage = new HashMap<>();
Path tmpDir = createTempDir();
storage.put(tmpDir, new ByteSizeValue(6, ByteSizeUnit.GB).getBytes());
NativeStorageProvider storageProvider = createNativeStorageProvider(storage);
Assert.assertNotNull(
storageProvider.tryGetLocalTmpStorage(randomAlphaOfLengthBetween(4, 10), new ByteSizeValue(100, ByteSizeUnit.BYTES)));
Assert.assertNull(storageProvider.tryGetLocalTmpStorage(randomAlphaOfLengthBetween(4, 10),
new ByteSizeValue(1024 * 1024 * 1024 + 1, ByteSizeUnit.BYTES)));
String id = randomAlphaOfLengthBetween(4, 10);
Path path = storageProvider.tryGetLocalTmpStorage(id, new ByteSizeValue(1, ByteSizeUnit.GB));
Assert.assertNotNull(path);
Assert.assertEquals(tmpDir.resolve("ml-local-data").resolve("tmp").resolve(id).toString(), path.toString());
}
public void testTmpStorageChooseDisk() throws IOException {
Map<Path, Long> storage = new HashMap<>();
Path tmpDir = createTempDir();
// low disk space
Path disk1 = tmpDir.resolve(randomAlphaOfLengthBetween(4, 10));
storage.put(disk1, new ByteSizeValue(1, ByteSizeUnit.GB).getBytes());
// sufficient disk space
Path disk2 = tmpDir.resolve(randomAlphaOfLengthBetween(4, 10));
storage.put(disk2, new ByteSizeValue(20, ByteSizeUnit.GB).getBytes());
NativeStorageProvider storageProvider = createNativeStorageProvider(storage);
String id = randomAlphaOfLengthBetween(4, 10);
Path path = storageProvider.tryGetLocalTmpStorage(id, new ByteSizeValue(1, ByteSizeUnit.GB));
Assert.assertNotNull(path);
// should resolve to disk2 as disk1 is low on space
Assert.assertEquals(disk2.resolve("ml-local-data").resolve("tmp").resolve(id).toString(), path.toString());
}
public void testTmpStorageCleanup() throws IOException {
Map<Path, Long> storage = new HashMap<>();
Path tmpDir = createTempDir();
storage.put(tmpDir, new ByteSizeValue(6, ByteSizeUnit.GB).getBytes());
NativeStorageProvider storageProvider = createNativeStorageProvider(storage);
String id = randomAlphaOfLengthBetween(4, 10);
Path path = storageProvider.tryGetLocalTmpStorage(id, new ByteSizeValue(1, ByteSizeUnit.KB));
Assert.assertTrue(Files.exists(path));
Path testFile = PathUtils.get(path.toString(), "testFile");
BufferedWriter writer = Files.newBufferedWriter(testFile, StandardCharsets.UTF_8);
writer.write("created by NativeStorageProviderTests::testTmpStorageDelete");
writer.close();
Assert.assertTrue(Files.exists(testFile));
Assert.assertTrue(Files.isRegularFile(testFile));
// the native component should cleanup itself, but assume it has crashed
storageProvider.cleanupLocalTmpStorage(path);
Assert.assertFalse(Files.exists(testFile));
Assert.assertFalse(Files.exists(path));
}
public void testTmpStorageCleanupOnStart() throws IOException {
Map<Path, Long> storage = new HashMap<>();
Path tmpDir = createTempDir();
storage.put(tmpDir, new ByteSizeValue(6, ByteSizeUnit.GB).getBytes());
NativeStorageProvider storageProvider = createNativeStorageProvider(storage);
String id = randomAlphaOfLengthBetween(4, 10);
Path path = storageProvider.tryGetLocalTmpStorage(id, new ByteSizeValue(1, ByteSizeUnit.KB));
Assert.assertTrue(Files.exists(path));
Path testFile = PathUtils.get(path.toString(), "testFile");
BufferedWriter writer = Files.newBufferedWriter(testFile, StandardCharsets.UTF_8);
writer.write("created by NativeStorageProviderTests::testTmpStorageWipe");
writer.close();
Assert.assertTrue(Files.exists(testFile));
Assert.assertTrue(Files.isRegularFile(testFile));
// create a new storage provider to test the case of a crashed node
storageProvider = createNativeStorageProvider(storage);
storageProvider.cleanupLocalTmpStorageInCaseOfUncleanShutdown();
Assert.assertFalse(Files.exists(testFile));
Assert.assertFalse(Files.exists(path));
}
private NativeStorageProvider createNativeStorageProvider(Map<Path, Long> paths) throws IOException {
Environment environment = mock(Environment.class);
when(environment.dataFiles()).thenReturn(paths.keySet().toArray(new Path[paths.size()]));
NativeStorageProvider storageProvider = spy(new NativeStorageProvider(environment, new ByteSizeValue(5, ByteSizeUnit.GB)));
doAnswer(invocation -> {
return paths.getOrDefault(invocation.getArguments()[0], Long.valueOf(0)).longValue();
}
).when(storageProvider).getUsableSpace(any(Path.class));
return storageProvider;
}
}

View File

@ -61,6 +61,7 @@ integTestCluster {
setting 'xpack.security.transport.ssl.verification_mode', 'certificate' setting 'xpack.security.transport.ssl.verification_mode', 'certificate'
setting 'xpack.security.audit.enabled', 'true' setting 'xpack.security.audit.enabled', 'true'
setting 'xpack.license.self_generated.type', 'trial' setting 'xpack.license.self_generated.type', 'trial'
setting 'xpack.ml.min_disk_space_off_heap', '200mb'
keystoreSetting 'bootstrap.password', 'x-pack-test-password' keystoreSetting 'bootstrap.password', 'x-pack-test-password'
keystoreSetting 'xpack.security.transport.ssl.keystore.secure_password', 'keypass' keystoreSetting 'xpack.security.transport.ssl.keystore.secure_password', 'keypass'

View File

@ -6,6 +6,7 @@
package org.elasticsearch.xpack.ml.integration; package org.elasticsearch.xpack.ml.integration;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits; import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
@ -206,8 +207,7 @@ public class ForecastIT extends MlNativeAutodetectIntegTestCase {
assertThat(e.getMessage(), equalTo("Cannot run forecast: Forecast cannot be executed as model memory status is not OK")); assertThat(e.getMessage(), equalTo("Cannot run forecast: Forecast cannot be executed as model memory status is not OK"));
} }
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/pull/30399") public void testOverflowToDisk() throws Exception {
public void testMemoryLimit() throws Exception {
Detector.Builder detector = new Detector.Builder("mean", "value"); Detector.Builder detector = new Detector.Builder("mean", "value");
detector.setByFieldName("clientIP"); detector.setByFieldName("clientIP");
@ -216,7 +216,9 @@ public class ForecastIT extends MlNativeAutodetectIntegTestCase {
analysisConfig.setBucketSpan(bucketSpan); analysisConfig.setBucketSpan(bucketSpan);
DataDescription.Builder dataDescription = new DataDescription.Builder(); DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setTimeFormat("epoch"); dataDescription.setTimeFormat("epoch");
Job.Builder job = new Job.Builder("forecast-it-test-memory-limit"); Job.Builder job = new Job.Builder("forecast-it-test-overflow-to-disk");
AnalysisLimits limits = new AnalysisLimits(2048L, null);
job.setAnalysisLimits(limits);
job.setAnalysisConfig(analysisConfig); job.setAnalysisConfig(analysisConfig);
job.setDataDescription(dataDescription); job.setDataDescription(dataDescription);
@ -224,28 +226,47 @@ public class ForecastIT extends MlNativeAutodetectIntegTestCase {
putJob(job); putJob(job);
openJob(job.getId()); openJob(job.getId());
createDataWithLotsOfClientIps(bucketSpan, job); createDataWithLotsOfClientIps(bucketSpan, job);
ElasticsearchException e = expectThrows(ElasticsearchException.class,
() -> forecast(job.getId(), TimeValue.timeValueMinutes(120), null)); try {
assertThat(e.getMessage(), String forecastId = forecast(job.getId(), TimeValue.timeValueHours(1), null);
equalTo("Cannot run forecast: Forecast cannot be executed as forecast memory usage is predicted to exceed 20MB"));
waitForecastToFinish(job.getId(), forecastId);
} catch (ElasticsearchStatusException e) {
if (e.getMessage().contains("disk space")) {
throw new ElasticsearchStatusException(
"Test likely fails due to insufficient disk space on test machine, please free up space.", e.status(), e);
}
throw e;
}
closeJob(job.getId());
List<ForecastRequestStats> forecastStats = getForecastStats();
assertThat(forecastStats.size(), equalTo(1));
ForecastRequestStats forecastRequestStats = forecastStats.get(0);
List<Forecast> forecasts = getForecasts(job.getId(), forecastRequestStats);
assertThat(forecastRequestStats.getRecordCount(), equalTo(8000L));
assertThat(forecasts.size(), equalTo(8000));
} }
private void createDataWithLotsOfClientIps(TimeValue bucketSpan, Job.Builder job) throws IOException { private void createDataWithLotsOfClientIps(TimeValue bucketSpan, Job.Builder job) throws IOException {
long now = Instant.now().getEpochSecond(); long now = Instant.now().getEpochSecond();
long timestamp = now - 50 * bucketSpan.seconds(); long timestamp = now - 15 * bucketSpan.seconds();
while (timestamp < now) {
for (int i = 1; i < 256; i++) { for (int h = 0; h < 15; h++) {
for (int i = 1; i < 101; i++) {
List<String> data = new ArrayList<>(); List<String> data = new ArrayList<>();
for (int j = 1; j < 100; j++) { for (int j = 1; j < 81; j++) {
Map<String, Object> record = new HashMap<>(); Map<String, Object> record = new HashMap<>();
record.put("time", timestamp); record.put("time", timestamp);
record.put("value", 10.0); record.put("value", 10.0 + h);
record.put("clientIP", String.format(Locale.ROOT, "192.168.%d.%d", i, j)); record.put("clientIP", String.format(Locale.ROOT, "192.168.%d.%d", i, j));
data.add(createJsonRecord(record)); data.add(createJsonRecord(record));
} }
postData(job.getId(), data.stream().collect(Collectors.joining())); postData(job.getId(), data.stream().collect(Collectors.joining()));
timestamp += bucketSpan.seconds();
} }
timestamp += bucketSpan.seconds();
} }
flushJob(job.getId(), false); flushJob(job.getId(), false);
} }