From 5faa012fd6c1cd7b04fa6bfa77173ca5f87f29f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Witek?= Date: Wed, 21 Aug 2019 14:27:36 +0200 Subject: [PATCH] [7.x] Add docs for HLRC for Estimate memory usage API (#45538) (#45783) --- .../MlClientDocumentationIT.java | 69 +++++++++++++++++++ .../ml/estimate-memory-usage.asciidoc | 35 ++++++++++ .../high-level/supported-apis.asciidoc | 2 + ...estimate-memory-usage-dfanalytics.asciidoc | 12 ++-- 4 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index 434ba1a1b20..a1a003a7b0f 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -48,6 +48,7 @@ import org.elasticsearch.client.ml.DeleteForecastRequest; import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; +import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -194,11 +195,13 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.core.Is.is; public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { @@ -3262,6 +3265,72 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { } } + public void testEstimateMemoryUsage() throws Exception { + createIndex("estimate-test-source-index"); + BulkRequest bulkRequest = + new BulkRequest("estimate-test-source-index") + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + for (int i = 0; i < 10; ++i) { + bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L)); + } + RestHighLevelClient client = highLevelClient(); + client.bulk(bulkRequest, RequestOptions.DEFAULT); + { + // tag::estimate-memory-usage-request + DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() + .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build()) + .setAnalysis(OutlierDetection.createDefault()) + .build(); + PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1> + // end::estimate-memory-usage-request + + // tag::estimate-memory-usage-execute + EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT); + // end::estimate-memory-usage-execute + + // tag::estimate-memory-usage-response + ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1> + ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2> + // end::estimate-memory-usage-response + + // We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow. + ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB); + ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB); + assertThat(expectedMemoryWithoutDisk, allOf(greaterThan(lowerBound), lessThan(upperBound))); + assertThat(expectedMemoryWithDisk, allOf(greaterThan(lowerBound), lessThan(upperBound))); + } + { + DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() + .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build()) + .setAnalysis(OutlierDetection.createDefault()) + .build(); + PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); + // tag::estimate-memory-usage-execute-listener + ActionListener listener = new ActionListener() { + @Override + public void onResponse(EstimateMemoryUsageResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::estimate-memory-usage-execute-listener + + // Replace the empty listener by a blocking listener in test + final CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::estimate-memory-usage-execute-async + client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::estimate-memory-usage-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } + public void testCreateFilter() throws Exception { RestHighLevelClient client = highLevelClient(); { diff --git a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc b/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc new file mode 100644 index 00000000000..659e7e11755 --- /dev/null +++ b/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc @@ -0,0 +1,35 @@ +-- +:api: estimate-memory-usage +:request: PutDataFrameAnalyticsRequest +:response: EstimateMemoryUsageResponse +-- +[id="{upid}-{api}"] +=== Estimate memory usage API + +The Estimate memory usage API is used to estimate memory usage of {dfanalytics}. +Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on. + +The API accepts an +{request}+ object and returns an +{response}+. + +[id="{upid}-{api}-request"] +==== Estimate memory usage Request + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +-------------------------------------------------- +<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ contains the memory usage estimates. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- +<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk). +<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. \ No newline at end of file diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index aa3c9aff3d8..abc23bd3073 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -295,6 +295,7 @@ The Java High Level REST Client supports the following Machine Learning APIs: * <<{upid}-start-data-frame-analytics>> * <<{upid}-stop-data-frame-analytics>> * <<{upid}-evaluate-data-frame>> +* <<{upid}-estimate-memory-usage>> * <<{upid}-put-filter>> * <<{upid}-get-filters>> * <<{upid}-update-filter>> @@ -346,6 +347,7 @@ include::ml/delete-data-frame-analytics.asciidoc[] include::ml/start-data-frame-analytics.asciidoc[] include::ml/stop-data-frame-analytics.asciidoc[] include::ml/evaluate-data-frame.asciidoc[] +include::ml/estimate-memory-usage.asciidoc[] include::ml/put-filter.asciidoc[] include::ml/get-filters.asciidoc[] include::ml/update-filter.asciidoc[] diff --git a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc index 9f1f77052d6..4393a3365fe 100644 --- a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc +++ b/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc @@ -42,14 +42,14 @@ Serves as an advice on how to set `model_memory_limit` when creating {dfanalytic [[ml-estimate-memory-usage-dfanalytics-results]] ==== {api-response-body-title} -`expected_memory_usage_with_one_partition`:: +`expected_memory_without_disk`:: (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk). -`expected_memory_usage_with_max_partitions`:: +`expected_memory_with_disk`:: (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. - `expected_memory_usage_with_max_partitions` is usually smaller than `expected_memory_usage_with_one_partition` - as using disk allows to limit the main memory needed to perform {dfanalytics}. + `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to + limit the main memory needed to perform {dfanalytics}. [[ml-estimate-memory-usage-dfanalytics-example]] ==== {api-examples-title} @@ -76,8 +76,8 @@ The API returns the following results: [source,js] ---- { - "expected_memory_usage_with_one_partition": "128MB", - "expected_memory_usage_with_max_partitions": "32MB" + "expected_memory_without_disk": "128MB", + "expected_memory_with_disk": "32MB" } ---- // TESTRESPONSE \ No newline at end of file