[ML] Add default categorization analyzer definition to ML info (#49545)

The categorization job wizard in the ML UI will use this
information when showing the effect of the chosen categorization
analyzer on a sample of input.
This commit is contained in:
David Roberts 2019-11-25 13:20:12 +00:00
parent d21df9eba9
commit 62811c2272
5 changed files with 99 additions and 2 deletions

View File

@ -50,6 +50,56 @@ This is a possible response:
{
"defaults" : {
"anomaly_detectors" : {
"categorization_analyzer" : {
"tokenizer" : "ml_classic",
"filter" : [
{
"type" : "stop",
"stopwords" : [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
"Mon",
"Tue",
"Wed",
"Thu",
"Fri",
"Sat",
"Sun",
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
"GMT",
"UTC"
]
}
]
},
"model_memory_limit" : "1gb",
"categorization_examples_limit" : 4,
"model_snapshot_retention_days" : 1

View File

@ -6,12 +6,16 @@
package org.elasticsearch.xpack.core.ml.job.config;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.ToXContentFragment;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.analysis.NameOrDefinition;
import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction;
@ -245,6 +249,18 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeab
return builder;
}
/**
* Get the categorization analyzer structured as a generic map.
* This can be used to provide the structure that the XContent serialization but as a Java map rather than text.
* Since it is created by round-tripping through text it is not particularly efficient and is expected to be
* used only rarely.
*/
public Map<String, Object> asMap(NamedXContentRegistry xContentRegistry) throws IOException {
String strRep = Strings.toString(this);
XContentParser parser = JsonXContent.jsonXContent.createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, strRep);
return parser.mapOrdered();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -12,6 +12,7 @@ import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.env.Environment;
import org.elasticsearch.tasks.Task;
import org.elasticsearch.transport.TransportService;
@ -20,6 +21,7 @@ import org.elasticsearch.xpack.core.ml.MlMetadata;
import org.elasticsearch.xpack.core.ml.action.MlInfoAction;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.ml.process.NativeController;
import org.elasticsearch.xpack.ml.process.NativeControllerHolder;
@ -33,13 +35,15 @@ import java.util.concurrent.TimeoutException;
public class TransportMlInfoAction extends HandledTransportAction<MlInfoAction.Request, MlInfoAction.Response> {
private final ClusterService clusterService;
private final NamedXContentRegistry xContentRegistry;
private final Map<String, Object> nativeCodeInfo;
@Inject
public TransportMlInfoAction(TransportService transportService, ActionFilters actionFilters,
ClusterService clusterService, Environment env) {
public TransportMlInfoAction(TransportService transportService, ActionFilters actionFilters, ClusterService clusterService,
NamedXContentRegistry xContentRegistry, Environment env) {
super(MlInfoAction.NAME, transportService, actionFilters, MlInfoAction.Request::new);
this.clusterService = clusterService;
this.xContentRegistry = xContentRegistry;
try {
NativeController nativeController = NativeControllerHolder.getNativeController(clusterService.getNodeName(), env);
@ -85,6 +89,13 @@ public class TransportMlInfoAction extends HandledTransportAction<MlInfoAction.R
defaults.put(AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(), defaultModelMemoryLimit());
defaults.put(AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), AnalysisLimits.DEFAULT_CATEGORIZATION_EXAMPLES_LIMIT);
defaults.put(Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), Job.DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS);
try {
defaults.put(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName(),
CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.emptyList())
.asMap(xContentRegistry).get(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName()));
} catch (IOException e) {
logger.error("failed to convert default categorization analyzer to map", e);
}
return defaults;
}

View File

@ -6,14 +6,20 @@
package org.elasticsearch.xpack.ml.job.config;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractSerializingTestCase;
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.nullValue;
public class CategorizationAnalyzerConfigTests extends AbstractSerializingTestCase<CategorizationAnalyzerConfig> {
@Override
@ -64,6 +70,17 @@ public class CategorizationAnalyzerConfigTests extends AbstractSerializingTestCa
return builder;
}
public void testAsMap() throws IOException {
Map<String, Object> map = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.emptyList())
.asMap(NamedXContentRegistry.EMPTY);
@SuppressWarnings("unchecked")
Map<String, Object> firstLevel =
(Map<String, Object>) map.get(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName());
assertThat(firstLevel, not(nullValue()));
String tokenizer = (String) firstLevel.get(CategorizationAnalyzerConfig.TOKENIZER.getPreferredName());
assertThat(tokenizer, is("ml_classic"));
}
@Override
protected Writeable.Reader<CategorizationAnalyzerConfig> instanceReader() {
return CategorizationAnalyzerConfig::new;

View File

@ -10,6 +10,7 @@ teardown:
"Test ml info":
- do:
ml.info: {}
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
- match: { defaults.anomaly_detectors.model_memory_limit: "1gb" }
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
@ -25,6 +26,7 @@ teardown:
- do:
ml.info: {}
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
- match: { defaults.anomaly_detectors.model_memory_limit: "512mb" }
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
@ -40,6 +42,7 @@ teardown:
- do:
ml.info: {}
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
- match: { defaults.anomaly_detectors.model_memory_limit: "1gb" }
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }