[ML] Add default categorization analyzer definition to ML info (#49545)
The categorization job wizard in the ML UI will use this information when showing the effect of the chosen categorization analyzer on a sample of input.
This commit is contained in:
parent
d21df9eba9
commit
62811c2272
|
@ -50,6 +50,56 @@ This is a possible response:
|
|||
{
|
||||
"defaults" : {
|
||||
"anomaly_detectors" : {
|
||||
"categorization_analyzer" : {
|
||||
"tokenizer" : "ml_classic",
|
||||
"filter" : [
|
||||
{
|
||||
"type" : "stop",
|
||||
"stopwords" : [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
"Mon",
|
||||
"Tue",
|
||||
"Wed",
|
||||
"Thu",
|
||||
"Fri",
|
||||
"Sat",
|
||||
"Sun",
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
"Jan",
|
||||
"Feb",
|
||||
"Mar",
|
||||
"Apr",
|
||||
"May",
|
||||
"Jun",
|
||||
"Jul",
|
||||
"Aug",
|
||||
"Sep",
|
||||
"Oct",
|
||||
"Nov",
|
||||
"Dec",
|
||||
"GMT",
|
||||
"UTC"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"model_memory_limit" : "1gb",
|
||||
"categorization_examples_limit" : 4,
|
||||
"model_snapshot_retention_days" : 1
|
||||
|
|
|
@ -6,12 +6,16 @@
|
|||
package org.elasticsearch.xpack.core.ml.job.config;
|
||||
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
|
||||
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.common.xcontent.ToXContentFragment;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||
import org.elasticsearch.index.analysis.NameOrDefinition;
|
||||
import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction;
|
||||
|
||||
|
@ -245,6 +249,18 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeab
|
|||
return builder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the categorization analyzer structured as a generic map.
|
||||
* This can be used to provide the structure that the XContent serialization but as a Java map rather than text.
|
||||
* Since it is created by round-tripping through text it is not particularly efficient and is expected to be
|
||||
* used only rarely.
|
||||
*/
|
||||
public Map<String, Object> asMap(NamedXContentRegistry xContentRegistry) throws IOException {
|
||||
String strRep = Strings.toString(this);
|
||||
XContentParser parser = JsonXContent.jsonXContent.createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, strRep);
|
||||
return parser.mapOrdered();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
|
|
@ -12,6 +12,7 @@ import org.elasticsearch.cluster.service.ClusterService;
|
|||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.unit.ByteSizeUnit;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.tasks.Task;
|
||||
import org.elasticsearch.transport.TransportService;
|
||||
|
@ -20,6 +21,7 @@ import org.elasticsearch.xpack.core.ml.MlMetadata;
|
|||
import org.elasticsearch.xpack.core.ml.action.MlInfoAction;
|
||||
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
||||
import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
|
||||
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
|
||||
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
||||
import org.elasticsearch.xpack.ml.process.NativeController;
|
||||
import org.elasticsearch.xpack.ml.process.NativeControllerHolder;
|
||||
|
@ -33,13 +35,15 @@ import java.util.concurrent.TimeoutException;
|
|||
public class TransportMlInfoAction extends HandledTransportAction<MlInfoAction.Request, MlInfoAction.Response> {
|
||||
|
||||
private final ClusterService clusterService;
|
||||
private final NamedXContentRegistry xContentRegistry;
|
||||
private final Map<String, Object> nativeCodeInfo;
|
||||
|
||||
@Inject
|
||||
public TransportMlInfoAction(TransportService transportService, ActionFilters actionFilters,
|
||||
ClusterService clusterService, Environment env) {
|
||||
public TransportMlInfoAction(TransportService transportService, ActionFilters actionFilters, ClusterService clusterService,
|
||||
NamedXContentRegistry xContentRegistry, Environment env) {
|
||||
super(MlInfoAction.NAME, transportService, actionFilters, MlInfoAction.Request::new);
|
||||
this.clusterService = clusterService;
|
||||
this.xContentRegistry = xContentRegistry;
|
||||
|
||||
try {
|
||||
NativeController nativeController = NativeControllerHolder.getNativeController(clusterService.getNodeName(), env);
|
||||
|
@ -85,6 +89,13 @@ public class TransportMlInfoAction extends HandledTransportAction<MlInfoAction.R
|
|||
defaults.put(AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(), defaultModelMemoryLimit());
|
||||
defaults.put(AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), AnalysisLimits.DEFAULT_CATEGORIZATION_EXAMPLES_LIMIT);
|
||||
defaults.put(Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), Job.DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS);
|
||||
try {
|
||||
defaults.put(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName(),
|
||||
CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.emptyList())
|
||||
.asMap(xContentRegistry).get(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName()));
|
||||
} catch (IOException e) {
|
||||
logger.error("failed to convert default categorization analyzer to map", e);
|
||||
}
|
||||
return defaults;
|
||||
}
|
||||
|
||||
|
|
|
@ -6,14 +6,20 @@
|
|||
package org.elasticsearch.xpack.ml.job.config;
|
||||
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||
import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.not;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
public class CategorizationAnalyzerConfigTests extends AbstractSerializingTestCase<CategorizationAnalyzerConfig> {
|
||||
|
||||
@Override
|
||||
|
@ -64,6 +70,17 @@ public class CategorizationAnalyzerConfigTests extends AbstractSerializingTestCa
|
|||
return builder;
|
||||
}
|
||||
|
||||
public void testAsMap() throws IOException {
|
||||
Map<String, Object> map = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.emptyList())
|
||||
.asMap(NamedXContentRegistry.EMPTY);
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> firstLevel =
|
||||
(Map<String, Object>) map.get(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName());
|
||||
assertThat(firstLevel, not(nullValue()));
|
||||
String tokenizer = (String) firstLevel.get(CategorizationAnalyzerConfig.TOKENIZER.getPreferredName());
|
||||
assertThat(tokenizer, is("ml_classic"));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<CategorizationAnalyzerConfig> instanceReader() {
|
||||
return CategorizationAnalyzerConfig::new;
|
||||
|
|
|
@ -10,6 +10,7 @@ teardown:
|
|||
"Test ml info":
|
||||
- do:
|
||||
ml.info: {}
|
||||
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
|
||||
- match: { defaults.anomaly_detectors.model_memory_limit: "1gb" }
|
||||
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
||||
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
||||
|
@ -25,6 +26,7 @@ teardown:
|
|||
|
||||
- do:
|
||||
ml.info: {}
|
||||
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
|
||||
- match: { defaults.anomaly_detectors.model_memory_limit: "512mb" }
|
||||
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
||||
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
||||
|
@ -40,6 +42,7 @@ teardown:
|
|||
|
||||
- do:
|
||||
ml.info: {}
|
||||
- match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" }
|
||||
- match: { defaults.anomaly_detectors.model_memory_limit: "1gb" }
|
||||
- match: { defaults.anomaly_detectors.categorization_examples_limit: 4 }
|
||||
- match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }
|
||||
|
|
Loading…
Reference in New Issue