[ML] Add default categorization analyzer definition to ML info (#49545)
The categorization job wizard in the ML UI will use this information when showing the effect of the chosen categorization analyzer on a sample of input.
This commit is contained in:
parent
d21df9eba9
commit
62811c2272
|
@ -50,6 +50,56 @@ This is a possible response:
|
||||||
{
|
{
|
||||||
"defaults" : {
|
"defaults" : {
|
||||||
"anomaly_detectors" : {
|
"anomaly_detectors" : {
|
||||||
|
"categorization_analyzer" : {
|
||||||
|
"tokenizer" : "ml_classic",
|
||||||
|
"filter" : [
|
||||||
|
{
|
||||||
|
"type" : "stop",
|
||||||
|
"stopwords" : [
|
||||||
|
"Monday",
|
||||||
|
"Tuesday",
|
||||||
|
"Wednesday",
|
||||||
|
"Thursday",
|
||||||
|
"Friday",
|
||||||
|
"Saturday",
|
||||||
|
"Sunday",
|
||||||
|
"Mon",
|
||||||
|
"Tue",
|
||||||
|
"Wed",
|
||||||
|
"Thu",
|
||||||
|
"Fri",
|
||||||
|
"Sat",
|
||||||
|
"Sun",
|
||||||
|
"January",
|
||||||
|
"February",
|
||||||
|
"March",
|
||||||
|
"April",
|
||||||
|
"May",
|
||||||
|
"June",
|
||||||
|
"July",
|
||||||
|
"August",
|
||||||
|
"September",
|
||||||
|
"October",
|
||||||
|
"November",
|
||||||
|
"December",
|
||||||
|
"Jan",
|
||||||
|
"Feb",
|
||||||
|
"Mar",
|
||||||
|
"Apr",
|
||||||
|
"May",
|
||||||
|
"Jun",
|
||||||
|
"Jul",
|
||||||
|
"Aug",
|
||||||
|
"Sep",
|
||||||
|
"Oct",
|
||||||
|
"Nov",
|
||||||
|
"Dec",
|
||||||
|
"GMT",
|
||||||
|
"UTC"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
"model_memory_limit" : "1gb",
|
"model_memory_limit" : "1gb",
|
||||||
"categorization_examples_limit" : 4,
|
"categorization_examples_limit" : 4,
|
||||||
"model_snapshot_retention_days" : 1
|
"model_snapshot_retention_days" : 1
|
||||||
|
|
|
@ -6,12 +6,16 @@
|
||||||
package org.elasticsearch.xpack.core.ml.job.config;
|
package org.elasticsearch.xpack.core.ml.job.config;
|
||||||
|
|
||||||
import org.elasticsearch.common.ParseField;
|
import org.elasticsearch.common.ParseField;
|
||||||
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
import org.elasticsearch.common.io.stream.Writeable;
|
import org.elasticsearch.common.io.stream.Writeable;
|
||||||
|
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
|
||||||
|
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||||
import org.elasticsearch.common.xcontent.ToXContentFragment;
|
import org.elasticsearch.common.xcontent.ToXContentFragment;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||||
import org.elasticsearch.index.analysis.NameOrDefinition;
|
import org.elasticsearch.index.analysis.NameOrDefinition;
|
||||||
import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction;
|
import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction;
|
||||||
|
|
||||||
|
@ -245,6 +249,18 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeab
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the categorization analyzer structured as a generic map.
|
||||||
|
* This can be used to provide the structure that the XContent serialization but as a Java map rather than text.
|
||||||
|
* Since it is created by round-tripping through text it is not particularly efficient and is expected to be
|
||||||
|
* used only rarely.
|
||||||
|
*/
|
||||||
|
public Map<String, Object> asMap(NamedXContentRegistry xContentRegistry) throws IOException {
|
||||||
|
String strRep = Strings.toString(this);
|
||||||
|
XContentParser parser = JsonXContent.jsonXContent.createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, strRep);
|
||||||
|
return parser.mapOrdered();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o) {
|
public boolean equals(Object o) {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
|
|
|
@ -12,6 +12,7 @@ import org.elasticsearch.cluster.service.ClusterService;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.unit.ByteSizeUnit;
|
import org.elasticsearch.common.unit.ByteSizeUnit;
|
||||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||||
|
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.tasks.Task;
|
import org.elasticsearch.tasks.Task;
|
||||||
import org.elasticsearch.transport.TransportService;
|
import org.elasticsearch.transport.TransportService;
|
||||||
|
@ -20,6 +21,7 @@ import org.elasticsearch.xpack.core.ml.MlMetadata;
|
||||||
import org.elasticsearch.xpack.core.ml.action.MlInfoAction;
|
import org.elasticsearch.xpack.core.ml.action.MlInfoAction;
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
|
import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
||||||
import org.elasticsearch.xpack.ml.process.NativeController;
|
import org.elasticsearch.xpack.ml.process.NativeController;
|
||||||
import org.elasticsearch.xpack.ml.process.NativeControllerHolder;
|
import org.elasticsearch.xpack.ml.process.NativeControllerHolder;
|
||||||
|
@ -33,13 +35,15 @@ import java.util.concurrent.TimeoutException;
|
||||||
public class TransportMlInfoAction extends HandledTransportAction<MlInfoAction.Request, MlInfoAction.Response> {
|
public class TransportMlInfoAction extends HandledTransportAction<MlInfoAction.Request, MlInfoAction.Response> {
|
||||||
|
|
||||||
private final ClusterService clusterService;
|
private final ClusterService clusterService;
|
||||||
|
private final NamedXContentRegistry xContentRegistry;
|
||||||
private final Map<String, Object> nativeCodeInfo;
|
private final Map<String, Object> nativeCodeInfo;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public TransportMlInfoAction(TransportService transportService, ActionFilters actionFilters,
|
public TransportMlInfoAction(TransportService transportService, ActionFilters actionFilters, ClusterService clusterService,
|
||||||
ClusterService clusterService, Environment env) {
|
NamedXContentRegistry xContentRegistry, Environment env) {
|
||||||
super(MlInfoAction.NAME, transportService, actionFilters, MlInfoAction.Request::new);
|
super(MlInfoAction.NAME, transportService, actionFilters, MlInfoAction.Request::new);
|
||||||
this.clusterService = clusterService;
|
this.clusterService = clusterService;
|
||||||
|
this.xContentRegistry = xContentRegistry;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
NativeController nativeController = NativeControllerHolder.getNativeController(clusterService.getNodeName(), env);
|
NativeController nativeController = NativeControllerHolder.getNativeController(clusterService.getNodeName(), env);
|
||||||
|
@ -85,6 +89,13 @@ public class TransportMlInfoAction extends HandledTransportAction<MlInfoAction.R
|
||||||
defaults.put(AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(), defaultModelMemoryLimit());
|
defaults.put(AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(), defaultModelMemoryLimit());
|
||||||
defaults.put(AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), AnalysisLimits.DEFAULT_CATEGORIZATION_EXAMPLES_LIMIT);
|
defaults.put(AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), AnalysisLimits.DEFAULT_CATEGORIZATION_EXAMPLES_LIMIT);
|
||||||
defaults.put(Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), Job.DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS);
|
defaults.put(Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), Job.DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS);
|
||||||
|
try {
|
||||||
|
defaults.put(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName(),
|
||||||
|
CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.emptyList())
|
||||||
|
.asMap(xContentRegistry).get(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName()));
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("failed to convert default categorization analyzer to map", e);
|
||||||
|
}
|
||||||
return defaults;
|
return defaults;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,14 +6,20 @@
|
||||||
package org.elasticsearch.xpack.ml.job.config;
|
package org.elasticsearch.xpack.ml.job.config;
|
||||||
|
|
||||||
import org.elasticsearch.common.io.stream.Writeable;
|
import org.elasticsearch.common.io.stream.Writeable;
|
||||||
|
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.test.AbstractSerializingTestCase;
|
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
|
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.is;
|
||||||
|
import static org.hamcrest.Matchers.not;
|
||||||
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
|
|
||||||
public class CategorizationAnalyzerConfigTests extends AbstractSerializingTestCase<CategorizationAnalyzerConfig> {
|
public class CategorizationAnalyzerConfigTests extends AbstractSerializingTestCase<CategorizationAnalyzerConfig> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -64,6 +70,17 @@ public class CategorizationAnalyzerConfigTests extends AbstractSerializingTestCa
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAsMap() throws IOException {
|
||||||
|
Map<String, Object> map = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.emptyList())
|
||||||
|
.asMap(NamedXContentRegistry.EMPTY);
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Map<String, Object> firstLevel =
|
||||||
|
(Map<String, Object>) map.get(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName());
|
||||||
|
assertThat(firstLevel, not(nullValue()));
|
||||||
|
String tokenizer = (String) firstLevel.get(CategorizationAnalyzerConfig.TOKENIZER.getPreferredName());
|
||||||
|
assertThat(tokenizer, is("ml_classic"));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Writeable.Reader<CategorizationAnalyzerConfig> instanceReader() {
|
protected Writeable.Reader<CategorizationAnalyzerConfig> instanceReader() {
|
||||||
return CategorizationAnalyzerConfig::new;
|
return CategorizationAnalyzerConfig::new;
|
||||||
|
|
|
@ -10,6 +10,7 @@ teardown:
|
||||||
"Test ml info":
|
"Test ml info":
|
||||||
- do:
|
- do:
|
||||||
ml.info: {}
|
ml.info: {}
|
||||||
|
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
|
||||||
- match: { defaults.anomaly_detectors.model_memory_limit: "1gb" }
|
- match: { defaults.anomaly_detectors.model_memory_limit: "1gb" }
|
||||||
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
||||||
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
||||||
|
@ -25,6 +26,7 @@ teardown:
|
||||||
|
|
||||||
- do:
|
- do:
|
||||||
ml.info: {}
|
ml.info: {}
|
||||||
|
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
|
||||||
- match: { defaults.anomaly_detectors.model_memory_limit: "512mb" }
|
- match: { defaults.anomaly_detectors.model_memory_limit: "512mb" }
|
||||||
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
||||||
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
||||||
|
@ -40,6 +42,7 @@ teardown:
|
||||||
|
|
||||||
- do:
|
- do:
|
||||||
ml.info: {}
|
ml.info: {}
|
||||||
|
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
|
||||||
- match: { defaults.anomaly_detectors.model_memory_limit: "1gb" }
|
- match: { defaults.anomaly_detectors.model_memory_limit: "1gb" }
|
||||||
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
||||||
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
||||||
|
|
Loading…
Reference in New Issue