[ML] Add some ML config classes to protocol library (#32502)
This commit adds four ML config classes to the X-Pack protocol library used by the high level REST client. (Other commits will add the remaining config classes, plus results and stats classes.) These classes: - Are immutable - Have little/no validation of field values beyond null checks - Are convertible to and from X-Content, but NOT wire transportable - Have lenient parsers to maximize compatibility across versions - Have the same class names, member names and getter/setter names as the corresponding classes in X-Pack core to ease migration for transport client users - Don't reproduce all the methods that do calculations or transformations that the the corresponding classes in X-Pack core have
This commit is contained in:
parent
1ee6393117
commit
bc274b2ff2
|
@ -0,0 +1,139 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.protocol.xpack.ml.job.config;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.Nullable;
|
||||||
|
import org.elasticsearch.common.ParseField;
|
||||||
|
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||||
|
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analysis limits for autodetect. In particular,
|
||||||
|
* this is a collection of parameters that allow limiting
|
||||||
|
* the resources used by the job.
|
||||||
|
*/
|
||||||
|
public class AnalysisLimits implements ToXContentObject {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serialisation field names
|
||||||
|
*/
|
||||||
|
public static final ParseField MODEL_MEMORY_LIMIT = new ParseField("model_memory_limit");
|
||||||
|
public static final ParseField CATEGORIZATION_EXAMPLES_LIMIT = new ParseField("categorization_examples_limit");
|
||||||
|
|
||||||
|
public static final ConstructingObjectParser<AnalysisLimits, Void> PARSER =
|
||||||
|
new ConstructingObjectParser<>("analysis_limits", true, a -> new AnalysisLimits((Long) a[0], (Long) a[1]));
|
||||||
|
|
||||||
|
static {
|
||||||
|
PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
|
||||||
|
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
|
||||||
|
return ByteSizeValue.parseBytesSizeValue(p.text(), MODEL_MEMORY_LIMIT.getPreferredName()).getMb();
|
||||||
|
} else if (p.currentToken() == XContentParser.Token.VALUE_NUMBER) {
|
||||||
|
return p.longValue();
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
|
||||||
|
}, MODEL_MEMORY_LIMIT, ObjectParser.ValueType.VALUE);
|
||||||
|
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), CATEGORIZATION_EXAMPLES_LIMIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The model memory limit in MiBs.
|
||||||
|
* It is initialised to <code>null</code>, which implies that the server-side default will be used.
|
||||||
|
*/
|
||||||
|
private final Long modelMemoryLimit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It is initialised to <code>null</code>.
|
||||||
|
* A value of <code>null</code> will result in the server-side default being used.
|
||||||
|
*/
|
||||||
|
private final Long categorizationExamplesLimit;
|
||||||
|
|
||||||
|
public AnalysisLimits(Long categorizationExamplesLimit) {
|
||||||
|
this(null, categorizationExamplesLimit);
|
||||||
|
}
|
||||||
|
|
||||||
|
public AnalysisLimits(Long modelMemoryLimit, Long categorizationExamplesLimit) {
|
||||||
|
this.modelMemoryLimit = modelMemoryLimit;
|
||||||
|
this.categorizationExamplesLimit = categorizationExamplesLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum size of the model in MB before the anomaly detector
|
||||||
|
* will drop new samples to prevent the model using any more
|
||||||
|
* memory.
|
||||||
|
*
|
||||||
|
* @return The set memory limit or <code>null</code> if not set
|
||||||
|
*/
|
||||||
|
@Nullable
|
||||||
|
public Long getModelMemoryLimit() {
|
||||||
|
return modelMemoryLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the limit to the number of examples that are stored per category
|
||||||
|
*
|
||||||
|
* @return the limit or <code>null</code> if not set
|
||||||
|
*/
|
||||||
|
@Nullable
|
||||||
|
public Long getCategorizationExamplesLimit() {
|
||||||
|
return categorizationExamplesLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
builder.startObject();
|
||||||
|
if (modelMemoryLimit != null) {
|
||||||
|
builder.field(MODEL_MEMORY_LIMIT.getPreferredName(), modelMemoryLimit + "mb");
|
||||||
|
}
|
||||||
|
if (categorizationExamplesLimit != null) {
|
||||||
|
builder.field(CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), categorizationExamplesLimit);
|
||||||
|
}
|
||||||
|
builder.endObject();
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Overridden equality test
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (this == other) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (other instanceof AnalysisLimits == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
AnalysisLimits that = (AnalysisLimits) other;
|
||||||
|
return Objects.equals(this.modelMemoryLimit, that.modelMemoryLimit) &&
|
||||||
|
Objects.equals(this.categorizationExamplesLimit, that.categorizationExamplesLimit);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(modelMemoryLimit, categorizationExamplesLimit);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,339 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.protocol.xpack.ml.job.config;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.ParseField;
|
||||||
|
import org.elasticsearch.common.Strings;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.xcontent.ToXContentFragment;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
|
import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration for the categorization analyzer.
|
||||||
|
*
|
||||||
|
* The syntax is a subset of what can be supplied to the {@linkplain RestAnalyzeAction <code>_analyze</code> endpoint}.
|
||||||
|
* To summarize, the first option is to specify the name of an out-of-the-box analyzer:
|
||||||
|
* <code>
|
||||||
|
* "categorization_analyzer" : "standard"
|
||||||
|
* </code>
|
||||||
|
*
|
||||||
|
* The second option is to specify a custom analyzer by combining the <code>char_filters</code>, <code>tokenizer</code>
|
||||||
|
* and <code>token_filters</code> fields. In turn, each of these can be specified as the name of an out-of-the-box
|
||||||
|
* one or as an object defining a custom one. For example:
|
||||||
|
* <code>
|
||||||
|
* "char_filters" : [
|
||||||
|
* "html_strip",
|
||||||
|
* { "type" : "pattern_replace", "pattern": "SQL: .*" }
|
||||||
|
* ],
|
||||||
|
* "tokenizer" : "thai",
|
||||||
|
* "token_filters" : [
|
||||||
|
* "lowercase",
|
||||||
|
* { "type" : "pattern_replace", "pattern": "^[0-9].*" }
|
||||||
|
* ]
|
||||||
|
* </code>
|
||||||
|
*/
|
||||||
|
public class CategorizationAnalyzerConfig implements ToXContentFragment {
|
||||||
|
|
||||||
|
public static final ParseField CATEGORIZATION_ANALYZER = new ParseField("categorization_analyzer");
|
||||||
|
private static final ParseField TOKENIZER = RestAnalyzeAction.Fields.TOKENIZER;
|
||||||
|
private static final ParseField TOKEN_FILTERS = RestAnalyzeAction.Fields.TOKEN_FILTERS;
|
||||||
|
private static final ParseField CHAR_FILTERS = RestAnalyzeAction.Fields.CHAR_FILTERS;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method is only used in the unit tests - in production code this config is always parsed as a fragment.
|
||||||
|
*/
|
||||||
|
static CategorizationAnalyzerConfig buildFromXContentObject(XContentParser parser) throws IOException {
|
||||||
|
|
||||||
|
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
|
||||||
|
throw new IllegalArgumentException("Expected start object but got [" + parser.currentToken() + "]");
|
||||||
|
}
|
||||||
|
if (parser.nextToken() != XContentParser.Token.FIELD_NAME) {
|
||||||
|
throw new IllegalArgumentException("Expected field name but got [" + parser.currentToken() + "]");
|
||||||
|
}
|
||||||
|
parser.nextToken();
|
||||||
|
CategorizationAnalyzerConfig categorizationAnalyzerConfig = buildFromXContentFragment(parser);
|
||||||
|
parser.nextToken();
|
||||||
|
return categorizationAnalyzerConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a <code>categorization_analyzer</code> configuration. A custom parser is needed due to the
|
||||||
|
* complexity of the format, with many elements able to be specified as either the name of a built-in
|
||||||
|
* element or an object containing a custom definition.
|
||||||
|
*/
|
||||||
|
static CategorizationAnalyzerConfig buildFromXContentFragment(XContentParser parser) throws IOException {
|
||||||
|
|
||||||
|
CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder();
|
||||||
|
|
||||||
|
XContentParser.Token token = parser.currentToken();
|
||||||
|
if (token == XContentParser.Token.VALUE_STRING) {
|
||||||
|
builder.setAnalyzer(parser.text());
|
||||||
|
} else if (token != XContentParser.Token.START_OBJECT) {
|
||||||
|
throw new IllegalArgumentException("[" + CATEGORIZATION_ANALYZER + "] should be analyzer's name or settings [" + token + "]");
|
||||||
|
} else {
|
||||||
|
String currentFieldName = null;
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||||
|
if (token == XContentParser.Token.FIELD_NAME) {
|
||||||
|
currentFieldName = parser.currentName();
|
||||||
|
} else if (CHAR_FILTERS.match(currentFieldName, parser.getDeprecationHandler())
|
||||||
|
&& token == XContentParser.Token.START_ARRAY) {
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||||
|
if (token == XContentParser.Token.VALUE_STRING) {
|
||||||
|
builder.addCharFilter(parser.text());
|
||||||
|
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||||
|
builder.addCharFilter(parser.map());
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
|
||||||
|
"] array element should contain char_filter's name or settings [" + token + "]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (TOKENIZER.match(currentFieldName, parser.getDeprecationHandler())) {
|
||||||
|
if (token == XContentParser.Token.VALUE_STRING) {
|
||||||
|
builder.setTokenizer(parser.text());
|
||||||
|
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||||
|
builder.setTokenizer(parser.map());
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
|
||||||
|
"] should be tokenizer's name or settings [" + token + "]");
|
||||||
|
}
|
||||||
|
} else if (TOKEN_FILTERS.match(currentFieldName, parser.getDeprecationHandler())
|
||||||
|
&& token == XContentParser.Token.START_ARRAY) {
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||||
|
if (token == XContentParser.Token.VALUE_STRING) {
|
||||||
|
builder.addTokenFilter(parser.text());
|
||||||
|
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||||
|
builder.addTokenFilter(parser.map());
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
|
||||||
|
"] array element should contain token_filter's name or settings [" + token + "]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple store of either a name of a built-in analyzer element or a custom definition.
|
||||||
|
*/
|
||||||
|
public static final class NameOrDefinition implements ToXContentFragment {
|
||||||
|
|
||||||
|
// Exactly one of these two members is not null
|
||||||
|
public final String name;
|
||||||
|
public final Settings definition;
|
||||||
|
|
||||||
|
NameOrDefinition(String name) {
|
||||||
|
this.name = Objects.requireNonNull(name);
|
||||||
|
this.definition = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
NameOrDefinition(ParseField field, Map<String, Object> definition) {
|
||||||
|
this.name = null;
|
||||||
|
Objects.requireNonNull(definition);
|
||||||
|
try {
|
||||||
|
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
|
||||||
|
builder.map(definition);
|
||||||
|
this.definition = Settings.builder().loadFromSource(Strings.toString(builder), builder.contentType()).build();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new IllegalArgumentException("Failed to parse [" + definition + "] in [" + field.getPreferredName() + "]", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
if (definition == null) {
|
||||||
|
builder.value(name);
|
||||||
|
} else {
|
||||||
|
builder.startObject();
|
||||||
|
definition.toXContent(builder, params);
|
||||||
|
builder.endObject();
|
||||||
|
}
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
NameOrDefinition that = (NameOrDefinition) o;
|
||||||
|
return Objects.equals(name, that.name) &&
|
||||||
|
Objects.equals(definition, that.definition);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(name, definition);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
if (definition == null) {
|
||||||
|
return name;
|
||||||
|
} else {
|
||||||
|
return definition.toDelimitedString(';');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String analyzer;
|
||||||
|
private final List<NameOrDefinition> charFilters;
|
||||||
|
private final NameOrDefinition tokenizer;
|
||||||
|
private final List<NameOrDefinition> tokenFilters;
|
||||||
|
|
||||||
|
private CategorizationAnalyzerConfig(String analyzer, List<NameOrDefinition> charFilters, NameOrDefinition tokenizer,
|
||||||
|
List<NameOrDefinition> tokenFilters) {
|
||||||
|
this.analyzer = analyzer;
|
||||||
|
this.charFilters = Collections.unmodifiableList(charFilters);
|
||||||
|
this.tokenizer = tokenizer;
|
||||||
|
this.tokenFilters = Collections.unmodifiableList(tokenFilters);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAnalyzer() {
|
||||||
|
return analyzer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<NameOrDefinition> getCharFilters() {
|
||||||
|
return charFilters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public NameOrDefinition getTokenizer() {
|
||||||
|
return tokenizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<NameOrDefinition> getTokenFilters() {
|
||||||
|
return tokenFilters;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
if (analyzer != null) {
|
||||||
|
builder.field(CATEGORIZATION_ANALYZER.getPreferredName(), analyzer);
|
||||||
|
} else {
|
||||||
|
builder.startObject(CATEGORIZATION_ANALYZER.getPreferredName());
|
||||||
|
if (charFilters.isEmpty() == false) {
|
||||||
|
builder.startArray(CHAR_FILTERS.getPreferredName());
|
||||||
|
for (NameOrDefinition charFilter : charFilters) {
|
||||||
|
charFilter.toXContent(builder, params);
|
||||||
|
}
|
||||||
|
builder.endArray();
|
||||||
|
}
|
||||||
|
if (tokenizer != null) {
|
||||||
|
builder.field(TOKENIZER.getPreferredName(), tokenizer);
|
||||||
|
}
|
||||||
|
if (tokenFilters.isEmpty() == false) {
|
||||||
|
builder.startArray(TOKEN_FILTERS.getPreferredName());
|
||||||
|
for (NameOrDefinition tokenFilter : tokenFilters) {
|
||||||
|
tokenFilter.toXContent(builder, params);
|
||||||
|
}
|
||||||
|
builder.endArray();
|
||||||
|
}
|
||||||
|
builder.endObject();
|
||||||
|
}
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
CategorizationAnalyzerConfig that = (CategorizationAnalyzerConfig) o;
|
||||||
|
return Objects.equals(analyzer, that.analyzer) &&
|
||||||
|
Objects.equals(charFilters, that.charFilters) &&
|
||||||
|
Objects.equals(tokenizer, that.tokenizer) &&
|
||||||
|
Objects.equals(tokenFilters, that.tokenFilters);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(analyzer, charFilters, tokenizer, tokenFilters);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
|
||||||
|
private String analyzer;
|
||||||
|
private List<NameOrDefinition> charFilters = new ArrayList<>();
|
||||||
|
private NameOrDefinition tokenizer;
|
||||||
|
private List<NameOrDefinition> tokenFilters = new ArrayList<>();
|
||||||
|
|
||||||
|
public Builder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder(CategorizationAnalyzerConfig categorizationAnalyzerConfig) {
|
||||||
|
this.analyzer = categorizationAnalyzerConfig.analyzer;
|
||||||
|
this.charFilters = new ArrayList<>(categorizationAnalyzerConfig.charFilters);
|
||||||
|
this.tokenizer = categorizationAnalyzerConfig.tokenizer;
|
||||||
|
this.tokenFilters = new ArrayList<>(categorizationAnalyzerConfig.tokenFilters);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder setAnalyzer(String analyzer) {
|
||||||
|
this.analyzer = analyzer;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addCharFilter(String charFilter) {
|
||||||
|
this.charFilters.add(new NameOrDefinition(charFilter));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addCharFilter(Map<String, Object> charFilter) {
|
||||||
|
this.charFilters.add(new NameOrDefinition(CHAR_FILTERS, charFilter));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder setTokenizer(String tokenizer) {
|
||||||
|
this.tokenizer = new NameOrDefinition(tokenizer);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder setTokenizer(Map<String, Object> tokenizer) {
|
||||||
|
this.tokenizer = new NameOrDefinition(TOKENIZER, tokenizer);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addTokenFilter(String tokenFilter) {
|
||||||
|
this.tokenFilters.add(new NameOrDefinition(tokenFilter));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addTokenFilter(Map<String, Object> tokenFilter) {
|
||||||
|
this.tokenFilters.add(new NameOrDefinition(TOKEN_FILTERS, tokenFilter));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a config
|
||||||
|
*/
|
||||||
|
public CategorizationAnalyzerConfig build() {
|
||||||
|
return new CategorizationAnalyzerConfig(analyzer, charFilters, tokenizer, tokenFilters);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,282 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.protocol.xpack.ml.job.config;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.ParseField;
|
||||||
|
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.ObjectParser.ValueType;
|
||||||
|
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Describes the format of the data used in the job and how it should
|
||||||
|
* be interpreted by the ML job.
|
||||||
|
* <p>
|
||||||
|
* {@link #getTimeField()} is the name of the field containing the timestamp and
|
||||||
|
* {@link #getTimeFormat()} is the format code for the date string in as described by
|
||||||
|
* {@link java.time.format.DateTimeFormatter}.
|
||||||
|
*/
|
||||||
|
public class DataDescription implements ToXContentObject {
|
||||||
|
/**
|
||||||
|
* Enum of the acceptable data formats.
|
||||||
|
*/
|
||||||
|
public enum DataFormat {
|
||||||
|
XCONTENT,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is deprecated
|
||||||
|
*/
|
||||||
|
DELIMITED;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Case-insensitive from string method.
|
||||||
|
* Works with either XCONTENT, XContent, etc.
|
||||||
|
*
|
||||||
|
* @param value String representation
|
||||||
|
* @return The data format
|
||||||
|
*/
|
||||||
|
public static DataFormat forString(String value) {
|
||||||
|
return DataFormat.valueOf(value.toUpperCase(Locale.ROOT));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return name().toLowerCase(Locale.ROOT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final ParseField DATA_DESCRIPTION_FIELD = new ParseField("data_description");
|
||||||
|
private static final ParseField FORMAT_FIELD = new ParseField("format");
|
||||||
|
private static final ParseField TIME_FIELD_NAME_FIELD = new ParseField("time_field");
|
||||||
|
private static final ParseField TIME_FORMAT_FIELD = new ParseField("time_format");
|
||||||
|
private static final ParseField FIELD_DELIMITER_FIELD = new ParseField("field_delimiter");
|
||||||
|
private static final ParseField QUOTE_CHARACTER_FIELD = new ParseField("quote_character");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Special time format string for epoch times (seconds)
|
||||||
|
*/
|
||||||
|
public static final String EPOCH = "epoch";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Special time format string for epoch times (milli-seconds)
|
||||||
|
*/
|
||||||
|
public static final String EPOCH_MS = "epoch_ms";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* By default autodetect expects the timestamp in a field with this name
|
||||||
|
*/
|
||||||
|
public static final String DEFAULT_TIME_FIELD = "time";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default field delimiter expected by the native autodetect
|
||||||
|
* program.
|
||||||
|
*/
|
||||||
|
public static final char DEFAULT_DELIMITER = '\t';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default quote character used to escape text in
|
||||||
|
* delimited data formats
|
||||||
|
*/
|
||||||
|
public static final char DEFAULT_QUOTE_CHAR = '"';
|
||||||
|
|
||||||
|
private final DataFormat dataFormat;
|
||||||
|
private final String timeFieldName;
|
||||||
|
private final String timeFormat;
|
||||||
|
private final Character fieldDelimiter;
|
||||||
|
private final Character quoteCharacter;
|
||||||
|
|
||||||
|
public static final ObjectParser<Builder, Void> PARSER =
|
||||||
|
new ObjectParser<>(DATA_DESCRIPTION_FIELD.getPreferredName(), true, Builder::new);
|
||||||
|
|
||||||
|
static {
|
||||||
|
PARSER.declareString(Builder::setFormat, FORMAT_FIELD);
|
||||||
|
PARSER.declareString(Builder::setTimeField, TIME_FIELD_NAME_FIELD);
|
||||||
|
PARSER.declareString(Builder::setTimeFormat, TIME_FORMAT_FIELD);
|
||||||
|
PARSER.declareField(Builder::setFieldDelimiter, DataDescription::extractChar, FIELD_DELIMITER_FIELD, ValueType.STRING);
|
||||||
|
PARSER.declareField(Builder::setQuoteCharacter, DataDescription::extractChar, QUOTE_CHARACTER_FIELD, ValueType.STRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DataDescription(DataFormat dataFormat, String timeFieldName, String timeFormat, Character fieldDelimiter,
|
||||||
|
Character quoteCharacter) {
|
||||||
|
this.dataFormat = dataFormat;
|
||||||
|
this.timeFieldName = timeFieldName;
|
||||||
|
this.timeFormat = timeFormat;
|
||||||
|
this.fieldDelimiter = fieldDelimiter;
|
||||||
|
this.quoteCharacter = quoteCharacter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
builder.startObject();
|
||||||
|
if (dataFormat != DataFormat.XCONTENT) {
|
||||||
|
builder.field(FORMAT_FIELD.getPreferredName(), dataFormat);
|
||||||
|
}
|
||||||
|
builder.field(TIME_FIELD_NAME_FIELD.getPreferredName(), timeFieldName);
|
||||||
|
builder.field(TIME_FORMAT_FIELD.getPreferredName(), timeFormat);
|
||||||
|
if (fieldDelimiter != null) {
|
||||||
|
builder.field(FIELD_DELIMITER_FIELD.getPreferredName(), String.valueOf(fieldDelimiter));
|
||||||
|
}
|
||||||
|
if (quoteCharacter != null) {
|
||||||
|
builder.field(QUOTE_CHARACTER_FIELD.getPreferredName(), String.valueOf(quoteCharacter));
|
||||||
|
}
|
||||||
|
builder.endObject();
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The format of the data to be processed.
|
||||||
|
* Defaults to {@link DataDescription.DataFormat#XCONTENT}
|
||||||
|
*
|
||||||
|
* @return The data format
|
||||||
|
*/
|
||||||
|
public DataFormat getFormat() {
|
||||||
|
return dataFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The name of the field containing the timestamp
|
||||||
|
*
|
||||||
|
* @return A String if set or <code>null</code>
|
||||||
|
*/
|
||||||
|
public String getTimeField() {
|
||||||
|
return timeFieldName;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Either {@value #EPOCH}, {@value #EPOCH_MS} or a SimpleDateTime format string.
|
||||||
|
* If not set (is <code>null</code> or an empty string) or set to
|
||||||
|
* {@value #EPOCH_MS} (the default) then the date is assumed to be in
|
||||||
|
* milliseconds from the epoch.
|
||||||
|
*
|
||||||
|
* @return A String if set or <code>null</code>
|
||||||
|
*/
|
||||||
|
public String getTimeFormat() {
|
||||||
|
return timeFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the data is in a delimited format with a header e.g. csv or tsv
|
||||||
|
* this is the delimiter character used. This is only applicable if
|
||||||
|
* {@linkplain #getFormat()} is {@link DataDescription.DataFormat#DELIMITED}.
|
||||||
|
* The default value for delimited format is {@value #DEFAULT_DELIMITER}.
|
||||||
|
*
|
||||||
|
* @return A char
|
||||||
|
*/
|
||||||
|
public Character getFieldDelimiter() {
|
||||||
|
return fieldDelimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The quote character used in delimited formats.
|
||||||
|
* The default value for delimited format is {@value #DEFAULT_QUOTE_CHAR}.
|
||||||
|
*
|
||||||
|
* @return The delimited format quote character
|
||||||
|
*/
|
||||||
|
public Character getQuoteCharacter() {
|
||||||
|
return quoteCharacter;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Character extractChar(XContentParser parser) throws IOException {
|
||||||
|
if (parser.currentToken() == XContentParser.Token.VALUE_STRING) {
|
||||||
|
String charStr = parser.text();
|
||||||
|
if (charStr.length() != 1) {
|
||||||
|
throw new IllegalArgumentException("String must be a single character, found [" + charStr + "]");
|
||||||
|
}
|
||||||
|
return charStr.charAt(0);
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Unsupported token [" + parser.currentToken() + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Overridden equality test
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (this == other) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (other instanceof DataDescription == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataDescription that = (DataDescription) other;
|
||||||
|
|
||||||
|
return this.dataFormat == that.dataFormat &&
|
||||||
|
Objects.equals(this.quoteCharacter, that.quoteCharacter) &&
|
||||||
|
Objects.equals(this.timeFieldName, that.timeFieldName) &&
|
||||||
|
Objects.equals(this.timeFormat, that.timeFormat) &&
|
||||||
|
Objects.equals(this.fieldDelimiter, that.fieldDelimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(dataFormat, quoteCharacter, timeFieldName, timeFormat, fieldDelimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
|
||||||
|
private DataFormat dataFormat = DataFormat.XCONTENT;
|
||||||
|
private String timeFieldName = DEFAULT_TIME_FIELD;
|
||||||
|
private String timeFormat = EPOCH_MS;
|
||||||
|
private Character fieldDelimiter;
|
||||||
|
private Character quoteCharacter;
|
||||||
|
|
||||||
|
public void setFormat(DataFormat format) {
|
||||||
|
dataFormat = Objects.requireNonNull(format);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setFormat(String format) {
|
||||||
|
setFormat(DataFormat.forString(format));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTimeField(String fieldName) {
|
||||||
|
timeFieldName = Objects.requireNonNull(fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTimeFormat(String format) {
|
||||||
|
timeFormat = Objects.requireNonNull(format);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFieldDelimiter(Character delimiter) {
|
||||||
|
fieldDelimiter = delimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setQuoteCharacter(Character value) {
|
||||||
|
quoteCharacter = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DataDescription build() {
|
||||||
|
if (dataFormat == DataFormat.DELIMITED) {
|
||||||
|
if (fieldDelimiter == null) {
|
||||||
|
fieldDelimiter = DEFAULT_DELIMITER;
|
||||||
|
}
|
||||||
|
if (quoteCharacter == null) {
|
||||||
|
quoteCharacter = DEFAULT_QUOTE_CHAR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new DataDescription(dataFormat, timeFieldName, timeFormat, fieldDelimiter, quoteCharacter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.protocol.xpack.ml.job.config;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.ParseField;
|
||||||
|
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public class ModelPlotConfig implements ToXContentObject {
|
||||||
|
|
||||||
|
private static final ParseField TYPE_FIELD = new ParseField("model_plot_config");
|
||||||
|
private static final ParseField ENABLED_FIELD = new ParseField("enabled");
|
||||||
|
public static final ParseField TERMS_FIELD = new ParseField("terms");
|
||||||
|
|
||||||
|
public static final ConstructingObjectParser<ModelPlotConfig, Void> PARSER =
|
||||||
|
new ConstructingObjectParser<>(TYPE_FIELD.getPreferredName(), true, a -> new ModelPlotConfig((boolean) a[0], (String) a[1]));
|
||||||
|
|
||||||
|
static {
|
||||||
|
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED_FIELD);
|
||||||
|
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TERMS_FIELD);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final boolean enabled;
|
||||||
|
private final String terms;
|
||||||
|
|
||||||
|
public ModelPlotConfig(boolean enabled, String terms) {
|
||||||
|
this.enabled = enabled;
|
||||||
|
this.terms = terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
builder.startObject();
|
||||||
|
builder.field(ENABLED_FIELD.getPreferredName(), enabled);
|
||||||
|
if (terms != null) {
|
||||||
|
builder.field(TERMS_FIELD.getPreferredName(), terms);
|
||||||
|
}
|
||||||
|
builder.endObject();
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEnabled() {
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTerms() {
|
||||||
|
return this.terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (this == other) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (other instanceof ModelPlotConfig == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ModelPlotConfig that = (ModelPlotConfig) other;
|
||||||
|
return this.enabled == that.enabled && Objects.equals(this.terms, that.terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(enabled, terms);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,105 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.protocol.xpack.ml.job.config;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.xcontent.DeprecationHandler;
|
||||||
|
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
|
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
public class AnalysisLimitsTests extends AbstractXContentTestCase<AnalysisLimits> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AnalysisLimits createTestInstance() {
|
||||||
|
return createRandomized();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static AnalysisLimits createRandomized() {
|
||||||
|
return new AnalysisLimits(randomBoolean() ? (long) randomIntBetween(1, 1000000) : null,
|
||||||
|
randomBoolean() ? randomNonNegativeLong() : null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AnalysisLimits doParseInstance(XContentParser parser) {
|
||||||
|
return AnalysisLimits.PARSER.apply(parser, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testParseModelMemoryLimitGivenPositiveNumber() throws IOException {
|
||||||
|
String json = "{\"model_memory_limit\": 2048}";
|
||||||
|
XContentParser parser = XContentFactory.xContent(XContentType.JSON)
|
||||||
|
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json);
|
||||||
|
|
||||||
|
AnalysisLimits limits = AnalysisLimits.PARSER.apply(parser, null);
|
||||||
|
|
||||||
|
assertThat(limits.getModelMemoryLimit(), equalTo(2048L));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testParseModelMemoryLimitGivenStringMultipleOfMBs() throws IOException {
|
||||||
|
String json = "{\"model_memory_limit\":\"4g\"}";
|
||||||
|
XContentParser parser = XContentFactory.xContent(XContentType.JSON)
|
||||||
|
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json);
|
||||||
|
|
||||||
|
AnalysisLimits limits = AnalysisLimits.PARSER.apply(parser, null);
|
||||||
|
|
||||||
|
assertThat(limits.getModelMemoryLimit(), equalTo(4096L));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals_GivenEqual() {
|
||||||
|
AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
|
||||||
|
AnalysisLimits analysisLimits2 = new AnalysisLimits(10L, 20L);
|
||||||
|
|
||||||
|
assertTrue(analysisLimits1.equals(analysisLimits1));
|
||||||
|
assertTrue(analysisLimits1.equals(analysisLimits2));
|
||||||
|
assertTrue(analysisLimits2.equals(analysisLimits1));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals_GivenDifferentModelMemoryLimit() {
|
||||||
|
AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
|
||||||
|
AnalysisLimits analysisLimits2 = new AnalysisLimits(11L, 20L);
|
||||||
|
|
||||||
|
assertFalse(analysisLimits1.equals(analysisLimits2));
|
||||||
|
assertFalse(analysisLimits2.equals(analysisLimits1));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals_GivenDifferentCategorizationExamplesLimit() {
|
||||||
|
AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
|
||||||
|
AnalysisLimits analysisLimits2 = new AnalysisLimits(10L, 21L);
|
||||||
|
|
||||||
|
assertFalse(analysisLimits1.equals(analysisLimits2));
|
||||||
|
assertFalse(analysisLimits2.equals(analysisLimits1));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testHashCode_GivenEqual() {
|
||||||
|
AnalysisLimits analysisLimits1 = new AnalysisLimits(5555L, 3L);
|
||||||
|
AnalysisLimits analysisLimits2 = new AnalysisLimits(5555L, 3L);
|
||||||
|
|
||||||
|
assertEquals(analysisLimits1.hashCode(), analysisLimits2.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean supportsUnknownFields() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,87 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.protocol.xpack.ml.job.config;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class CategorizationAnalyzerConfigTests extends AbstractXContentTestCase<CategorizationAnalyzerConfig> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected CategorizationAnalyzerConfig createTestInstance() {
|
||||||
|
return createRandomized().build();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CategorizationAnalyzerConfig.Builder createRandomized() {
|
||||||
|
CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder();
|
||||||
|
if (rarely()) {
|
||||||
|
builder.setAnalyzer(randomAlphaOfLength(10));
|
||||||
|
} else {
|
||||||
|
if (randomBoolean()) {
|
||||||
|
for (String pattern : generateRandomStringArray(3, 40, false)) {
|
||||||
|
if (rarely()) {
|
||||||
|
builder.addCharFilter(randomAlphaOfLength(10));
|
||||||
|
} else {
|
||||||
|
Map<String, Object> charFilter = new HashMap<>();
|
||||||
|
charFilter.put("type", "pattern_replace");
|
||||||
|
charFilter.put("pattern", pattern);
|
||||||
|
builder.addCharFilter(charFilter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rarely()) {
|
||||||
|
builder.setTokenizer(randomAlphaOfLength(10));
|
||||||
|
} else {
|
||||||
|
Map<String, Object> tokenizer = new HashMap<>();
|
||||||
|
tokenizer.put("type", "pattern");
|
||||||
|
tokenizer.put("pattern", randomAlphaOfLength(10));
|
||||||
|
builder.setTokenizer(tokenizer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (randomBoolean()) {
|
||||||
|
for (String pattern : generateRandomStringArray(4, 40, false)) {
|
||||||
|
if (rarely()) {
|
||||||
|
builder.addTokenFilter(randomAlphaOfLength(10));
|
||||||
|
} else {
|
||||||
|
Map<String, Object> tokenFilter = new HashMap<>();
|
||||||
|
tokenFilter.put("type", "pattern_replace");
|
||||||
|
tokenFilter.put("pattern", pattern);
|
||||||
|
builder.addTokenFilter(tokenFilter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected CategorizationAnalyzerConfig doParseInstance(XContentParser parser) throws IOException {
|
||||||
|
return CategorizationAnalyzerConfig.buildFromXContentObject(parser);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean supportsUnknownFields() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,185 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.protocol.xpack.ml.job.config;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.protocol.xpack.ml.job.config.DataDescription.DataFormat;
|
||||||
|
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
|
import static org.hamcrest.core.Is.is;
|
||||||
|
|
||||||
|
public class DataDescriptionTests extends AbstractXContentTestCase<DataDescription> {
|
||||||
|
|
||||||
|
public void testDefault() {
|
||||||
|
DataDescription dataDescription = new DataDescription.Builder().build();
|
||||||
|
assertThat(dataDescription.getFormat(), equalTo(DataFormat.XCONTENT));
|
||||||
|
assertThat(dataDescription.getTimeField(), equalTo("time"));
|
||||||
|
assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms"));
|
||||||
|
assertThat(dataDescription.getFieldDelimiter(), is(nullValue()));
|
||||||
|
assertThat(dataDescription.getQuoteCharacter(), is(nullValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDefaultDelimited() {
|
||||||
|
DataDescription.Builder dataDescriptionBuilder = new DataDescription.Builder();
|
||||||
|
dataDescriptionBuilder.setFormat(DataFormat.DELIMITED);
|
||||||
|
DataDescription dataDescription = dataDescriptionBuilder.build();
|
||||||
|
|
||||||
|
assertThat(dataDescription.getFormat(), equalTo(DataFormat.DELIMITED));
|
||||||
|
assertThat(dataDescription.getTimeField(), equalTo("time"));
|
||||||
|
assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms"));
|
||||||
|
assertThat(dataDescription.getFieldDelimiter(), is('\t'));
|
||||||
|
assertThat(dataDescription.getQuoteCharacter(), is('"'));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals_GivenDifferentDateFormat() {
|
||||||
|
DataDescription.Builder description1 = new DataDescription.Builder();
|
||||||
|
description1.setFormat(DataFormat.XCONTENT);
|
||||||
|
description1.setQuoteCharacter('"');
|
||||||
|
description1.setTimeField("timestamp");
|
||||||
|
description1.setTimeFormat("epoch");
|
||||||
|
description1.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
DataDescription.Builder description2 = new DataDescription.Builder();
|
||||||
|
description2.setFormat(DataFormat.DELIMITED);
|
||||||
|
description2.setQuoteCharacter('"');
|
||||||
|
description2.setTimeField("timestamp");
|
||||||
|
description2.setTimeFormat("epoch");
|
||||||
|
description2.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
assertFalse(description1.build().equals(description2.build()));
|
||||||
|
assertFalse(description2.build().equals(description1.build()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals_GivenDifferentQuoteCharacter() {
|
||||||
|
DataDescription.Builder description1 = new DataDescription.Builder();
|
||||||
|
description1.setFormat(DataFormat.XCONTENT);
|
||||||
|
description1.setQuoteCharacter('"');
|
||||||
|
description1.setTimeField("timestamp");
|
||||||
|
description1.setTimeFormat("epoch");
|
||||||
|
description1.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
DataDescription.Builder description2 = new DataDescription.Builder();
|
||||||
|
description2.setFormat(DataFormat.XCONTENT);
|
||||||
|
description2.setQuoteCharacter('\'');
|
||||||
|
description2.setTimeField("timestamp");
|
||||||
|
description2.setTimeFormat("epoch");
|
||||||
|
description2.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
assertFalse(description1.build().equals(description2.build()));
|
||||||
|
assertFalse(description2.build().equals(description1.build()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals_GivenDifferentTimeField() {
|
||||||
|
DataDescription.Builder description1 = new DataDescription.Builder();
|
||||||
|
description1.setFormat(DataFormat.XCONTENT);
|
||||||
|
description1.setQuoteCharacter('"');
|
||||||
|
description1.setTimeField("timestamp");
|
||||||
|
description1.setTimeFormat("epoch");
|
||||||
|
description1.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
DataDescription.Builder description2 = new DataDescription.Builder();
|
||||||
|
description2.setFormat(DataFormat.XCONTENT);
|
||||||
|
description2.setQuoteCharacter('"');
|
||||||
|
description2.setTimeField("time");
|
||||||
|
description2.setTimeFormat("epoch");
|
||||||
|
description2.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
assertFalse(description1.build().equals(description2.build()));
|
||||||
|
assertFalse(description2.build().equals(description1.build()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals_GivenDifferentTimeFormat() {
|
||||||
|
DataDescription.Builder description1 = new DataDescription.Builder();
|
||||||
|
description1.setFormat(DataFormat.XCONTENT);
|
||||||
|
description1.setQuoteCharacter('"');
|
||||||
|
description1.setTimeField("timestamp");
|
||||||
|
description1.setTimeFormat("epoch");
|
||||||
|
description1.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
DataDescription.Builder description2 = new DataDescription.Builder();
|
||||||
|
description2.setFormat(DataFormat.XCONTENT);
|
||||||
|
description2.setQuoteCharacter('"');
|
||||||
|
description2.setTimeField("timestamp");
|
||||||
|
description2.setTimeFormat("epoch_ms");
|
||||||
|
description2.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
assertFalse(description1.build().equals(description2.build()));
|
||||||
|
assertFalse(description2.build().equals(description1.build()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEquals_GivenDifferentFieldDelimiter() {
|
||||||
|
DataDescription.Builder description1 = new DataDescription.Builder();
|
||||||
|
description1.setFormat(DataFormat.XCONTENT);
|
||||||
|
description1.setQuoteCharacter('"');
|
||||||
|
description1.setTimeField("timestamp");
|
||||||
|
description1.setTimeFormat("epoch");
|
||||||
|
description1.setFieldDelimiter(',');
|
||||||
|
|
||||||
|
DataDescription.Builder description2 = new DataDescription.Builder();
|
||||||
|
description2.setFormat(DataFormat.XCONTENT);
|
||||||
|
description2.setQuoteCharacter('"');
|
||||||
|
description2.setTimeField("timestamp");
|
||||||
|
description2.setTimeFormat("epoch");
|
||||||
|
description2.setFieldDelimiter(';');
|
||||||
|
|
||||||
|
assertFalse(description1.build().equals(description2.build()));
|
||||||
|
assertFalse(description2.build().equals(description1.build()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DataDescription createTestInstance() {
|
||||||
|
DataDescription.Builder dataDescription = new DataDescription.Builder();
|
||||||
|
if (randomBoolean()) {
|
||||||
|
dataDescription.setFormat(randomFrom(DataFormat.values()));
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
dataDescription.setTimeField(randomAlphaOfLengthBetween(1, 20));
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
String format;
|
||||||
|
if (randomBoolean()) {
|
||||||
|
format = DataDescription.EPOCH;
|
||||||
|
} else if (randomBoolean()) {
|
||||||
|
format = DataDescription.EPOCH_MS;
|
||||||
|
} else {
|
||||||
|
format = "yyyy-MM-dd HH:mm:ss.SSS";
|
||||||
|
}
|
||||||
|
dataDescription.setTimeFormat(format);
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
dataDescription.setFieldDelimiter(randomAlphaOfLength(1).charAt(0));
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
dataDescription.setQuoteCharacter(randomAlphaOfLength(1).charAt(0));
|
||||||
|
}
|
||||||
|
return dataDescription.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DataDescription doParseInstance(XContentParser parser) {
|
||||||
|
return DataDescription.PARSER.apply(parser, null).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean supportsUnknownFields() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.protocol.xpack.ml.job.config;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||||
|
|
||||||
|
public class ModelPlotConfigTests extends AbstractXContentTestCase<ModelPlotConfig> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ModelPlotConfig createTestInstance() {
|
||||||
|
return new ModelPlotConfig(randomBoolean(), randomAlphaOfLengthBetween(1, 30));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ModelPlotConfig doParseInstance(XContentParser parser) {
|
||||||
|
return ModelPlotConfig.PARSER.apply(parser, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean supportsUnknownFields() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue