Throw an exception when memory usage estimation endpoint encounters empty data frame. (#49143) (#49164)
This commit is contained in:
parent
b9a571eb43
commit
150db2b544
|
@ -92,6 +92,7 @@ integTest.runner {
|
|||
'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k',
|
||||
'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one',
|
||||
'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred',
|
||||
'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame',
|
||||
'ml/evaluate_data_frame/Test given missing index',
|
||||
'ml/evaluate_data_frame/Test given index does not exist',
|
||||
'ml/evaluate_data_frame/Test given missing evaluation',
|
||||
|
|
|
@ -238,11 +238,13 @@ public class TransportStartDataFrameAnalyticsAction
|
|||
.collectDataSummaryAsync(ActionListener.wrap(
|
||||
dataSummary -> {
|
||||
if (dataSummary.rows == 0) {
|
||||
finalListener.onFailure(new ElasticsearchStatusException(
|
||||
"Unable to start {} as there are no analyzable data in source indices [{}].",
|
||||
RestStatus.BAD_REQUEST,
|
||||
id,
|
||||
Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
|
||||
finalListener.onFailure(ExceptionsHelper.badRequestException(
|
||||
"Unable to start {} as no documents in the source indices [{}] contained all the fields "
|
||||
+ "selected for analysis. If you are relying on automatic field selection then there are "
|
||||
+ "currently mapped fields that do not exist in any indexed documents, and you will have "
|
||||
+ "to switch to explicit field selection and include only fields that exist in indexed "
|
||||
+ "documents.",
|
||||
id, Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
|
||||
));
|
||||
} else {
|
||||
finalListener.onResponse(startContext);
|
||||
|
|
|
@ -9,6 +9,7 @@ import org.apache.logging.log4j.LogManager;
|
|||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.logging.log4j.message.ParameterizedMessage;
|
||||
import org.elasticsearch.action.ActionListener;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.unit.ByteSizeUnit;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
|
||||
|
@ -57,10 +58,16 @@ public class MemoryUsageEstimationProcessManager {
|
|||
DataFrameDataExtractorFactory dataExtractorFactory) {
|
||||
DataFrameDataExtractor dataExtractor = dataExtractorFactory.newExtractor(false);
|
||||
DataFrameDataExtractor.DataSummary dataSummary = dataExtractor.collectDataSummary();
|
||||
Set<String> categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis());
|
||||
if (dataSummary.rows == 0) {
|
||||
return new MemoryUsageEstimationResult(ByteSizeValue.ZERO, ByteSizeValue.ZERO);
|
||||
throw ExceptionsHelper.badRequestException(
|
||||
"[{}] Unable to estimate memory usage as no documents in the source indices [{}] contained all the fields selected for "
|
||||
+ "analysis. If you are relying on automatic field selection then there are currently mapped fields that do not exist "
|
||||
+ "in any indexed documents, and you will have to switch to explicit field selection and include only fields that "
|
||||
+ "exist in indexed documents.",
|
||||
jobId,
|
||||
Strings.arrayToCommaDelimitedString(config.getSource().getIndex()));
|
||||
}
|
||||
Set<String> categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis());
|
||||
AnalyticsProcessConfig processConfig =
|
||||
new AnalyticsProcessConfig(
|
||||
jobId,
|
||||
|
|
|
@ -42,8 +42,6 @@ public class MemoryUsageEstimationProcessManagerTests extends ESTestCase {
|
|||
private static final String CONFIG_ID = "dummy";
|
||||
private static final int NUM_ROWS = 100;
|
||||
private static final int NUM_COLS = 4;
|
||||
private static final MemoryUsageEstimationResult PROCESS_RESULT_ZERO =
|
||||
new MemoryUsageEstimationResult(ByteSizeValue.ZERO, ByteSizeValue.ZERO);
|
||||
private static final MemoryUsageEstimationResult PROCESS_RESULT =
|
||||
new MemoryUsageEstimationResult(ByteSizeValue.parseBytesSizeValue("20kB", ""), ByteSizeValue.parseBytesSizeValue("10kB", ""));
|
||||
|
||||
|
@ -85,9 +83,11 @@ public class MemoryUsageEstimationProcessManagerTests extends ESTestCase {
|
|||
|
||||
processManager.runJobAsync(TASK_ID, dataFrameAnalyticsConfig, dataExtractorFactory, listener);
|
||||
|
||||
verify(listener).onResponse(resultCaptor.capture());
|
||||
MemoryUsageEstimationResult result = resultCaptor.getValue();
|
||||
assertThat(result, equalTo(PROCESS_RESULT_ZERO));
|
||||
verify(listener).onFailure(exceptionCaptor.capture());
|
||||
ElasticsearchException exception = (ElasticsearchException) exceptionCaptor.getValue();
|
||||
assertThat(exception.status(), equalTo(RestStatus.BAD_REQUEST));
|
||||
assertThat(exception.getMessage(), containsString(TASK_ID));
|
||||
assertThat(exception.getMessage(), containsString("Unable to estimate memory usage"));
|
||||
|
||||
verifyNoMoreInteractions(process, listener);
|
||||
}
|
||||
|
|
|
@ -14,12 +14,27 @@ setup:
|
|||
---
|
||||
"Test memory usage estimation for empty data frame":
|
||||
- do:
|
||||
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
|
||||
ml.estimate_memory_usage:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 1 }
|
||||
- match: { result: "created" }
|
||||
|
||||
# Note that value for "y" is missing and outlier detection analysis does not support missing values.
|
||||
# Hence, the data frame is still considered empty.
|
||||
- do:
|
||||
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
|
||||
ml.estimate_memory_usage:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
- match: { expected_memory_without_disk: "0" }
|
||||
- match: { expected_memory_with_disk: "0" }
|
||||
|
||||
---
|
||||
"Test memory usage estimation for non-empty data frame":
|
||||
|
|
|
@ -86,7 +86,7 @@
|
|||
}
|
||||
|
||||
- do:
|
||||
catch: /Unable to start empty-with-compatible-fields as there are no analyzable data in source indices \[empty-index-with-compatible-fields\]/
|
||||
catch: /Unable to start empty-with-compatible-fields as no documents in the source indices \[empty-index-with-compatible-fields\] contained all the fields selected for analysis/
|
||||
ml.start_data_frame_analytics:
|
||||
id: "empty-with-compatible-fields"
|
||||
---
|
||||
|
|
Loading…
Reference in New Issue