[ML] Add some ML config classes to protocol library (#32502)

This commit adds four ML config classes to the X-Pack protocol
library used by the high level REST client.

(Other commits will add the remaining config classes, plus results
and stats classes.)

These classes:

- Are immutable
- Have little/no validation of field values beyond null checks
- Are convertible to and from X-Content, but NOT wire transportable
- Have lenient parsers to maximize compatibility across versions
- Have the same class names, member names and getter/setter names
  as the corresponding classes in X-Pack core to ease migration
  for transport client users
- Don't reproduce all the methods that do calculations or
  transformations that the the corresponding classes in X-Pack core
  have
This commit is contained in:
David Roberts 2018-08-03 13:21:08 +01:00 committed by GitHub
parent 1ee6393117
commit bc274b2ff2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 1265 additions and 0 deletions

View File

@ -0,0 +1,139 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.protocol.xpack.ml.job.config;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.Objects;
/**
* Analysis limits for autodetect. In particular,
* this is a collection of parameters that allow limiting
* the resources used by the job.
*/
public class AnalysisLimits implements ToXContentObject {
/**
* Serialisation field names
*/
public static final ParseField MODEL_MEMORY_LIMIT = new ParseField("model_memory_limit");
public static final ParseField CATEGORIZATION_EXAMPLES_LIMIT = new ParseField("categorization_examples_limit");
public static final ConstructingObjectParser<AnalysisLimits, Void> PARSER =
new ConstructingObjectParser<>("analysis_limits", true, a -> new AnalysisLimits((Long) a[0], (Long) a[1]));
static {
PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
return ByteSizeValue.parseBytesSizeValue(p.text(), MODEL_MEMORY_LIMIT.getPreferredName()).getMb();
} else if (p.currentToken() == XContentParser.Token.VALUE_NUMBER) {
return p.longValue();
}
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
}, MODEL_MEMORY_LIMIT, ObjectParser.ValueType.VALUE);
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), CATEGORIZATION_EXAMPLES_LIMIT);
}
/**
* The model memory limit in MiBs.
* It is initialised to <code>null</code>, which implies that the server-side default will be used.
*/
private final Long modelMemoryLimit;
/**
* It is initialised to <code>null</code>.
* A value of <code>null</code> will result in the server-side default being used.
*/
private final Long categorizationExamplesLimit;
public AnalysisLimits(Long categorizationExamplesLimit) {
this(null, categorizationExamplesLimit);
}
public AnalysisLimits(Long modelMemoryLimit, Long categorizationExamplesLimit) {
this.modelMemoryLimit = modelMemoryLimit;
this.categorizationExamplesLimit = categorizationExamplesLimit;
}
/**
* Maximum size of the model in MB before the anomaly detector
* will drop new samples to prevent the model using any more
* memory.
*
* @return The set memory limit or <code>null</code> if not set
*/
@Nullable
public Long getModelMemoryLimit() {
return modelMemoryLimit;
}
/**
* Gets the limit to the number of examples that are stored per category
*
* @return the limit or <code>null</code> if not set
*/
@Nullable
public Long getCategorizationExamplesLimit() {
return categorizationExamplesLimit;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
if (modelMemoryLimit != null) {
builder.field(MODEL_MEMORY_LIMIT.getPreferredName(), modelMemoryLimit + "mb");
}
if (categorizationExamplesLimit != null) {
builder.field(CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), categorizationExamplesLimit);
}
builder.endObject();
return builder;
}
/**
* Overridden equality test
*/
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other instanceof AnalysisLimits == false) {
return false;
}
AnalysisLimits that = (AnalysisLimits) other;
return Objects.equals(this.modelMemoryLimit, that.modelMemoryLimit) &&
Objects.equals(this.categorizationExamplesLimit, that.categorizationExamplesLimit);
}
@Override
public int hashCode() {
return Objects.hash(modelMemoryLimit, categorizationExamplesLimit);
}
}

View File

@ -0,0 +1,339 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.protocol.xpack.ml.job.config;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ToXContentFragment;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
* Configuration for the categorization analyzer.
*
* The syntax is a subset of what can be supplied to the {@linkplain RestAnalyzeAction <code>_analyze</code> endpoint}.
* To summarize, the first option is to specify the name of an out-of-the-box analyzer:
* <code>
* "categorization_analyzer" : "standard"
* </code>
*
* The second option is to specify a custom analyzer by combining the <code>char_filters</code>, <code>tokenizer</code>
* and <code>token_filters</code> fields. In turn, each of these can be specified as the name of an out-of-the-box
* one or as an object defining a custom one. For example:
* <code>
* "char_filters" : [
* "html_strip",
* { "type" : "pattern_replace", "pattern": "SQL: .*" }
* ],
* "tokenizer" : "thai",
* "token_filters" : [
* "lowercase",
* { "type" : "pattern_replace", "pattern": "^[0-9].*" }
* ]
* </code>
*/
public class CategorizationAnalyzerConfig implements ToXContentFragment {
public static final ParseField CATEGORIZATION_ANALYZER = new ParseField("categorization_analyzer");
private static final ParseField TOKENIZER = RestAnalyzeAction.Fields.TOKENIZER;
private static final ParseField TOKEN_FILTERS = RestAnalyzeAction.Fields.TOKEN_FILTERS;
private static final ParseField CHAR_FILTERS = RestAnalyzeAction.Fields.CHAR_FILTERS;
/**
* This method is only used in the unit tests - in production code this config is always parsed as a fragment.
*/
static CategorizationAnalyzerConfig buildFromXContentObject(XContentParser parser) throws IOException {
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
throw new IllegalArgumentException("Expected start object but got [" + parser.currentToken() + "]");
}
if (parser.nextToken() != XContentParser.Token.FIELD_NAME) {
throw new IllegalArgumentException("Expected field name but got [" + parser.currentToken() + "]");
}
parser.nextToken();
CategorizationAnalyzerConfig categorizationAnalyzerConfig = buildFromXContentFragment(parser);
parser.nextToken();
return categorizationAnalyzerConfig;
}
/**
* Parse a <code>categorization_analyzer</code> configuration. A custom parser is needed due to the
* complexity of the format, with many elements able to be specified as either the name of a built-in
* element or an object containing a custom definition.
*/
static CategorizationAnalyzerConfig buildFromXContentFragment(XContentParser parser) throws IOException {
CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder();
XContentParser.Token token = parser.currentToken();
if (token == XContentParser.Token.VALUE_STRING) {
builder.setAnalyzer(parser.text());
} else if (token != XContentParser.Token.START_OBJECT) {
throw new IllegalArgumentException("[" + CATEGORIZATION_ANALYZER + "] should be analyzer's name or settings [" + token + "]");
} else {
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (CHAR_FILTERS.match(currentFieldName, parser.getDeprecationHandler())
&& token == XContentParser.Token.START_ARRAY) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token == XContentParser.Token.VALUE_STRING) {
builder.addCharFilter(parser.text());
} else if (token == XContentParser.Token.START_OBJECT) {
builder.addCharFilter(parser.map());
} else {
throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
"] array element should contain char_filter's name or settings [" + token + "]");
}
}
} else if (TOKENIZER.match(currentFieldName, parser.getDeprecationHandler())) {
if (token == XContentParser.Token.VALUE_STRING) {
builder.setTokenizer(parser.text());
} else if (token == XContentParser.Token.START_OBJECT) {
builder.setTokenizer(parser.map());
} else {
throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
"] should be tokenizer's name or settings [" + token + "]");
}
} else if (TOKEN_FILTERS.match(currentFieldName, parser.getDeprecationHandler())
&& token == XContentParser.Token.START_ARRAY) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token == XContentParser.Token.VALUE_STRING) {
builder.addTokenFilter(parser.text());
} else if (token == XContentParser.Token.START_OBJECT) {
builder.addTokenFilter(parser.map());
} else {
throw new IllegalArgumentException("[" + currentFieldName + "] in [" + CATEGORIZATION_ANALYZER +
"] array element should contain token_filter's name or settings [" + token + "]");
}
}
}
}
}
return builder.build();
}
/**
* Simple store of either a name of a built-in analyzer element or a custom definition.
*/
public static final class NameOrDefinition implements ToXContentFragment {
// Exactly one of these two members is not null
public final String name;
public final Settings definition;
NameOrDefinition(String name) {
this.name = Objects.requireNonNull(name);
this.definition = null;
}
NameOrDefinition(ParseField field, Map<String, Object> definition) {
this.name = null;
Objects.requireNonNull(definition);
try {
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
builder.map(definition);
this.definition = Settings.builder().loadFromSource(Strings.toString(builder), builder.contentType()).build();
} catch (IOException e) {
throw new IllegalArgumentException("Failed to parse [" + definition + "] in [" + field.getPreferredName() + "]", e);
}
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
if (definition == null) {
builder.value(name);
} else {
builder.startObject();
definition.toXContent(builder, params);
builder.endObject();
}
return builder;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
NameOrDefinition that = (NameOrDefinition) o;
return Objects.equals(name, that.name) &&
Objects.equals(definition, that.definition);
}
@Override
public int hashCode() {
return Objects.hash(name, definition);
}
@Override
public String toString() {
if (definition == null) {
return name;
} else {
return definition.toDelimitedString(';');
}
}
}
private final String analyzer;
private final List<NameOrDefinition> charFilters;
private final NameOrDefinition tokenizer;
private final List<NameOrDefinition> tokenFilters;
private CategorizationAnalyzerConfig(String analyzer, List<NameOrDefinition> charFilters, NameOrDefinition tokenizer,
List<NameOrDefinition> tokenFilters) {
this.analyzer = analyzer;
this.charFilters = Collections.unmodifiableList(charFilters);
this.tokenizer = tokenizer;
this.tokenFilters = Collections.unmodifiableList(tokenFilters);
}
public String getAnalyzer() {
return analyzer;
}
public List<NameOrDefinition> getCharFilters() {
return charFilters;
}
public NameOrDefinition getTokenizer() {
return tokenizer;
}
public List<NameOrDefinition> getTokenFilters() {
return tokenFilters;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
if (analyzer != null) {
builder.field(CATEGORIZATION_ANALYZER.getPreferredName(), analyzer);
} else {
builder.startObject(CATEGORIZATION_ANALYZER.getPreferredName());
if (charFilters.isEmpty() == false) {
builder.startArray(CHAR_FILTERS.getPreferredName());
for (NameOrDefinition charFilter : charFilters) {
charFilter.toXContent(builder, params);
}
builder.endArray();
}
if (tokenizer != null) {
builder.field(TOKENIZER.getPreferredName(), tokenizer);
}
if (tokenFilters.isEmpty() == false) {
builder.startArray(TOKEN_FILTERS.getPreferredName());
for (NameOrDefinition tokenFilter : tokenFilters) {
tokenFilter.toXContent(builder, params);
}
builder.endArray();
}
builder.endObject();
}
return builder;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
CategorizationAnalyzerConfig that = (CategorizationAnalyzerConfig) o;
return Objects.equals(analyzer, that.analyzer) &&
Objects.equals(charFilters, that.charFilters) &&
Objects.equals(tokenizer, that.tokenizer) &&
Objects.equals(tokenFilters, that.tokenFilters);
}
@Override
public int hashCode() {
return Objects.hash(analyzer, charFilters, tokenizer, tokenFilters);
}
public static class Builder {
private String analyzer;
private List<NameOrDefinition> charFilters = new ArrayList<>();
private NameOrDefinition tokenizer;
private List<NameOrDefinition> tokenFilters = new ArrayList<>();
public Builder() {
}
public Builder(CategorizationAnalyzerConfig categorizationAnalyzerConfig) {
this.analyzer = categorizationAnalyzerConfig.analyzer;
this.charFilters = new ArrayList<>(categorizationAnalyzerConfig.charFilters);
this.tokenizer = categorizationAnalyzerConfig.tokenizer;
this.tokenFilters = new ArrayList<>(categorizationAnalyzerConfig.tokenFilters);
}
public Builder setAnalyzer(String analyzer) {
this.analyzer = analyzer;
return this;
}
public Builder addCharFilter(String charFilter) {
this.charFilters.add(new NameOrDefinition(charFilter));
return this;
}
public Builder addCharFilter(Map<String, Object> charFilter) {
this.charFilters.add(new NameOrDefinition(CHAR_FILTERS, charFilter));
return this;
}
public Builder setTokenizer(String tokenizer) {
this.tokenizer = new NameOrDefinition(tokenizer);
return this;
}
public Builder setTokenizer(Map<String, Object> tokenizer) {
this.tokenizer = new NameOrDefinition(TOKENIZER, tokenizer);
return this;
}
public Builder addTokenFilter(String tokenFilter) {
this.tokenFilters.add(new NameOrDefinition(tokenFilter));
return this;
}
public Builder addTokenFilter(Map<String, Object> tokenFilter) {
this.tokenFilters.add(new NameOrDefinition(TOKEN_FILTERS, tokenFilter));
return this;
}
/**
* Create a config
*/
public CategorizationAnalyzerConfig build() {
return new CategorizationAnalyzerConfig(analyzer, charFilters, tokenizer, tokenFilters);
}
}
}

View File

@ -0,0 +1,282 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.protocol.xpack.ml.job.config;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ObjectParser.ValueType;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.Locale;
import java.util.Objects;
/**
* Describes the format of the data used in the job and how it should
* be interpreted by the ML job.
* <p>
* {@link #getTimeField()} is the name of the field containing the timestamp and
* {@link #getTimeFormat()} is the format code for the date string in as described by
* {@link java.time.format.DateTimeFormatter}.
*/
public class DataDescription implements ToXContentObject {
/**
* Enum of the acceptable data formats.
*/
public enum DataFormat {
XCONTENT,
/**
* This is deprecated
*/
DELIMITED;
/**
* Case-insensitive from string method.
* Works with either XCONTENT, XContent, etc.
*
* @param value String representation
* @return The data format
*/
public static DataFormat forString(String value) {
return DataFormat.valueOf(value.toUpperCase(Locale.ROOT));
}
@Override
public String toString() {
return name().toLowerCase(Locale.ROOT);
}
}
private static final ParseField DATA_DESCRIPTION_FIELD = new ParseField("data_description");
private static final ParseField FORMAT_FIELD = new ParseField("format");
private static final ParseField TIME_FIELD_NAME_FIELD = new ParseField("time_field");
private static final ParseField TIME_FORMAT_FIELD = new ParseField("time_format");
private static final ParseField FIELD_DELIMITER_FIELD = new ParseField("field_delimiter");
private static final ParseField QUOTE_CHARACTER_FIELD = new ParseField("quote_character");
/**
* Special time format string for epoch times (seconds)
*/
public static final String EPOCH = "epoch";
/**
* Special time format string for epoch times (milli-seconds)
*/
public static final String EPOCH_MS = "epoch_ms";
/**
* By default autodetect expects the timestamp in a field with this name
*/
public static final String DEFAULT_TIME_FIELD = "time";
/**
* The default field delimiter expected by the native autodetect
* program.
*/
public static final char DEFAULT_DELIMITER = '\t';
/**
* The default quote character used to escape text in
* delimited data formats
*/
public static final char DEFAULT_QUOTE_CHAR = '"';
private final DataFormat dataFormat;
private final String timeFieldName;
private final String timeFormat;
private final Character fieldDelimiter;
private final Character quoteCharacter;
public static final ObjectParser<Builder, Void> PARSER =
new ObjectParser<>(DATA_DESCRIPTION_FIELD.getPreferredName(), true, Builder::new);
static {
PARSER.declareString(Builder::setFormat, FORMAT_FIELD);
PARSER.declareString(Builder::setTimeField, TIME_FIELD_NAME_FIELD);
PARSER.declareString(Builder::setTimeFormat, TIME_FORMAT_FIELD);
PARSER.declareField(Builder::setFieldDelimiter, DataDescription::extractChar, FIELD_DELIMITER_FIELD, ValueType.STRING);
PARSER.declareField(Builder::setQuoteCharacter, DataDescription::extractChar, QUOTE_CHARACTER_FIELD, ValueType.STRING);
}
public DataDescription(DataFormat dataFormat, String timeFieldName, String timeFormat, Character fieldDelimiter,
Character quoteCharacter) {
this.dataFormat = dataFormat;
this.timeFieldName = timeFieldName;
this.timeFormat = timeFormat;
this.fieldDelimiter = fieldDelimiter;
this.quoteCharacter = quoteCharacter;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
if (dataFormat != DataFormat.XCONTENT) {
builder.field(FORMAT_FIELD.getPreferredName(), dataFormat);
}
builder.field(TIME_FIELD_NAME_FIELD.getPreferredName(), timeFieldName);
builder.field(TIME_FORMAT_FIELD.getPreferredName(), timeFormat);
if (fieldDelimiter != null) {
builder.field(FIELD_DELIMITER_FIELD.getPreferredName(), String.valueOf(fieldDelimiter));
}
if (quoteCharacter != null) {
builder.field(QUOTE_CHARACTER_FIELD.getPreferredName(), String.valueOf(quoteCharacter));
}
builder.endObject();
return builder;
}
/**
* The format of the data to be processed.
* Defaults to {@link DataDescription.DataFormat#XCONTENT}
*
* @return The data format
*/
public DataFormat getFormat() {
return dataFormat;
}
/**
* The name of the field containing the timestamp
*
* @return A String if set or <code>null</code>
*/
public String getTimeField() {
return timeFieldName;
}
/**
* Either {@value #EPOCH}, {@value #EPOCH_MS} or a SimpleDateTime format string.
* If not set (is <code>null</code> or an empty string) or set to
* {@value #EPOCH_MS} (the default) then the date is assumed to be in
* milliseconds from the epoch.
*
* @return A String if set or <code>null</code>
*/
public String getTimeFormat() {
return timeFormat;
}
/**
* If the data is in a delimited format with a header e.g. csv or tsv
* this is the delimiter character used. This is only applicable if
* {@linkplain #getFormat()} is {@link DataDescription.DataFormat#DELIMITED}.
* The default value for delimited format is {@value #DEFAULT_DELIMITER}.
*
* @return A char
*/
public Character getFieldDelimiter() {
return fieldDelimiter;
}
/**
* The quote character used in delimited formats.
* The default value for delimited format is {@value #DEFAULT_QUOTE_CHAR}.
*
* @return The delimited format quote character
*/
public Character getQuoteCharacter() {
return quoteCharacter;
}
private static Character extractChar(XContentParser parser) throws IOException {
if (parser.currentToken() == XContentParser.Token.VALUE_STRING) {
String charStr = parser.text();
if (charStr.length() != 1) {
throw new IllegalArgumentException("String must be a single character, found [" + charStr + "]");
}
return charStr.charAt(0);
}
throw new IllegalArgumentException("Unsupported token [" + parser.currentToken() + "]");
}
/**
* Overridden equality test
*/
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other instanceof DataDescription == false) {
return false;
}
DataDescription that = (DataDescription) other;
return this.dataFormat == that.dataFormat &&
Objects.equals(this.quoteCharacter, that.quoteCharacter) &&
Objects.equals(this.timeFieldName, that.timeFieldName) &&
Objects.equals(this.timeFormat, that.timeFormat) &&
Objects.equals(this.fieldDelimiter, that.fieldDelimiter);
}
@Override
public int hashCode() {
return Objects.hash(dataFormat, quoteCharacter, timeFieldName, timeFormat, fieldDelimiter);
}
public static class Builder {
private DataFormat dataFormat = DataFormat.XCONTENT;
private String timeFieldName = DEFAULT_TIME_FIELD;
private String timeFormat = EPOCH_MS;
private Character fieldDelimiter;
private Character quoteCharacter;
public void setFormat(DataFormat format) {
dataFormat = Objects.requireNonNull(format);
}
private void setFormat(String format) {
setFormat(DataFormat.forString(format));
}
public void setTimeField(String fieldName) {
timeFieldName = Objects.requireNonNull(fieldName);
}
public void setTimeFormat(String format) {
timeFormat = Objects.requireNonNull(format);
}
public void setFieldDelimiter(Character delimiter) {
fieldDelimiter = delimiter;
}
public void setQuoteCharacter(Character value) {
quoteCharacter = value;
}
public DataDescription build() {
if (dataFormat == DataFormat.DELIMITED) {
if (fieldDelimiter == null) {
fieldDelimiter = DEFAULT_DELIMITER;
}
if (quoteCharacter == null) {
quoteCharacter = DEFAULT_QUOTE_CHAR;
}
}
return new DataDescription(dataFormat, timeFieldName, timeFormat, fieldDelimiter, quoteCharacter);
}
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.protocol.xpack.ml.job.config;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.Objects;
public class ModelPlotConfig implements ToXContentObject {
private static final ParseField TYPE_FIELD = new ParseField("model_plot_config");
private static final ParseField ENABLED_FIELD = new ParseField("enabled");
public static final ParseField TERMS_FIELD = new ParseField("terms");
public static final ConstructingObjectParser<ModelPlotConfig, Void> PARSER =
new ConstructingObjectParser<>(TYPE_FIELD.getPreferredName(), true, a -> new ModelPlotConfig((boolean) a[0], (String) a[1]));
static {
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED_FIELD);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TERMS_FIELD);
}
private final boolean enabled;
private final String terms;
public ModelPlotConfig(boolean enabled, String terms) {
this.enabled = enabled;
this.terms = terms;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(ENABLED_FIELD.getPreferredName(), enabled);
if (terms != null) {
builder.field(TERMS_FIELD.getPreferredName(), terms);
}
builder.endObject();
return builder;
}
public boolean isEnabled() {
return enabled;
}
public String getTerms() {
return this.terms;
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other instanceof ModelPlotConfig == false) {
return false;
}
ModelPlotConfig that = (ModelPlotConfig) other;
return this.enabled == that.enabled && Objects.equals(this.terms, that.terms);
}
@Override
public int hashCode() {
return Objects.hash(enabled, terms);
}
}

View File

@ -0,0 +1,105 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.protocol.xpack.ml.job.config;
import org.elasticsearch.common.xcontent.DeprecationHandler;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.test.AbstractXContentTestCase;
import java.io.IOException;
import static org.hamcrest.Matchers.equalTo;
public class AnalysisLimitsTests extends AbstractXContentTestCase<AnalysisLimits> {
@Override
protected AnalysisLimits createTestInstance() {
return createRandomized();
}
public static AnalysisLimits createRandomized() {
return new AnalysisLimits(randomBoolean() ? (long) randomIntBetween(1, 1000000) : null,
randomBoolean() ? randomNonNegativeLong() : null);
}
@Override
protected AnalysisLimits doParseInstance(XContentParser parser) {
return AnalysisLimits.PARSER.apply(parser, null);
}
public void testParseModelMemoryLimitGivenPositiveNumber() throws IOException {
String json = "{\"model_memory_limit\": 2048}";
XContentParser parser = XContentFactory.xContent(XContentType.JSON)
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json);
AnalysisLimits limits = AnalysisLimits.PARSER.apply(parser, null);
assertThat(limits.getModelMemoryLimit(), equalTo(2048L));
}
public void testParseModelMemoryLimitGivenStringMultipleOfMBs() throws IOException {
String json = "{\"model_memory_limit\":\"4g\"}";
XContentParser parser = XContentFactory.xContent(XContentType.JSON)
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json);
AnalysisLimits limits = AnalysisLimits.PARSER.apply(parser, null);
assertThat(limits.getModelMemoryLimit(), equalTo(4096L));
}
public void testEquals_GivenEqual() {
AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
AnalysisLimits analysisLimits2 = new AnalysisLimits(10L, 20L);
assertTrue(analysisLimits1.equals(analysisLimits1));
assertTrue(analysisLimits1.equals(analysisLimits2));
assertTrue(analysisLimits2.equals(analysisLimits1));
}
public void testEquals_GivenDifferentModelMemoryLimit() {
AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
AnalysisLimits analysisLimits2 = new AnalysisLimits(11L, 20L);
assertFalse(analysisLimits1.equals(analysisLimits2));
assertFalse(analysisLimits2.equals(analysisLimits1));
}
public void testEquals_GivenDifferentCategorizationExamplesLimit() {
AnalysisLimits analysisLimits1 = new AnalysisLimits(10L, 20L);
AnalysisLimits analysisLimits2 = new AnalysisLimits(10L, 21L);
assertFalse(analysisLimits1.equals(analysisLimits2));
assertFalse(analysisLimits2.equals(analysisLimits1));
}
public void testHashCode_GivenEqual() {
AnalysisLimits analysisLimits1 = new AnalysisLimits(5555L, 3L);
AnalysisLimits analysisLimits2 = new AnalysisLimits(5555L, 3L);
assertEquals(analysisLimits1.hashCode(), analysisLimits2.hashCode());
}
@Override
protected boolean supportsUnknownFields() {
return true;
}
}

View File

@ -0,0 +1,87 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.protocol.xpack.ml.job.config;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class CategorizationAnalyzerConfigTests extends AbstractXContentTestCase<CategorizationAnalyzerConfig> {
@Override
protected CategorizationAnalyzerConfig createTestInstance() {
return createRandomized().build();
}
public static CategorizationAnalyzerConfig.Builder createRandomized() {
CategorizationAnalyzerConfig.Builder builder = new CategorizationAnalyzerConfig.Builder();
if (rarely()) {
builder.setAnalyzer(randomAlphaOfLength(10));
} else {
if (randomBoolean()) {
for (String pattern : generateRandomStringArray(3, 40, false)) {
if (rarely()) {
builder.addCharFilter(randomAlphaOfLength(10));
} else {
Map<String, Object> charFilter = new HashMap<>();
charFilter.put("type", "pattern_replace");
charFilter.put("pattern", pattern);
builder.addCharFilter(charFilter);
}
}
}
if (rarely()) {
builder.setTokenizer(randomAlphaOfLength(10));
} else {
Map<String, Object> tokenizer = new HashMap<>();
tokenizer.put("type", "pattern");
tokenizer.put("pattern", randomAlphaOfLength(10));
builder.setTokenizer(tokenizer);
}
if (randomBoolean()) {
for (String pattern : generateRandomStringArray(4, 40, false)) {
if (rarely()) {
builder.addTokenFilter(randomAlphaOfLength(10));
} else {
Map<String, Object> tokenFilter = new HashMap<>();
tokenFilter.put("type", "pattern_replace");
tokenFilter.put("pattern", pattern);
builder.addTokenFilter(tokenFilter);
}
}
}
}
return builder;
}
@Override
protected CategorizationAnalyzerConfig doParseInstance(XContentParser parser) throws IOException {
return CategorizationAnalyzerConfig.buildFromXContentObject(parser);
}
@Override
protected boolean supportsUnknownFields() {
return false;
}
}

View File

@ -0,0 +1,185 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.protocol.xpack.ml.job.config;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.protocol.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.test.AbstractXContentTestCase;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.nullValue;
import static org.hamcrest.core.Is.is;
public class DataDescriptionTests extends AbstractXContentTestCase<DataDescription> {
public void testDefault() {
DataDescription dataDescription = new DataDescription.Builder().build();
assertThat(dataDescription.getFormat(), equalTo(DataFormat.XCONTENT));
assertThat(dataDescription.getTimeField(), equalTo("time"));
assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms"));
assertThat(dataDescription.getFieldDelimiter(), is(nullValue()));
assertThat(dataDescription.getQuoteCharacter(), is(nullValue()));
}
public void testDefaultDelimited() {
DataDescription.Builder dataDescriptionBuilder = new DataDescription.Builder();
dataDescriptionBuilder.setFormat(DataFormat.DELIMITED);
DataDescription dataDescription = dataDescriptionBuilder.build();
assertThat(dataDescription.getFormat(), equalTo(DataFormat.DELIMITED));
assertThat(dataDescription.getTimeField(), equalTo("time"));
assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms"));
assertThat(dataDescription.getFieldDelimiter(), is('\t'));
assertThat(dataDescription.getQuoteCharacter(), is('"'));
}
public void testEquals_GivenDifferentDateFormat() {
DataDescription.Builder description1 = new DataDescription.Builder();
description1.setFormat(DataFormat.XCONTENT);
description1.setQuoteCharacter('"');
description1.setTimeField("timestamp");
description1.setTimeFormat("epoch");
description1.setFieldDelimiter(',');
DataDescription.Builder description2 = new DataDescription.Builder();
description2.setFormat(DataFormat.DELIMITED);
description2.setQuoteCharacter('"');
description2.setTimeField("timestamp");
description2.setTimeFormat("epoch");
description2.setFieldDelimiter(',');
assertFalse(description1.build().equals(description2.build()));
assertFalse(description2.build().equals(description1.build()));
}
public void testEquals_GivenDifferentQuoteCharacter() {
DataDescription.Builder description1 = new DataDescription.Builder();
description1.setFormat(DataFormat.XCONTENT);
description1.setQuoteCharacter('"');
description1.setTimeField("timestamp");
description1.setTimeFormat("epoch");
description1.setFieldDelimiter(',');
DataDescription.Builder description2 = new DataDescription.Builder();
description2.setFormat(DataFormat.XCONTENT);
description2.setQuoteCharacter('\'');
description2.setTimeField("timestamp");
description2.setTimeFormat("epoch");
description2.setFieldDelimiter(',');
assertFalse(description1.build().equals(description2.build()));
assertFalse(description2.build().equals(description1.build()));
}
public void testEquals_GivenDifferentTimeField() {
DataDescription.Builder description1 = new DataDescription.Builder();
description1.setFormat(DataFormat.XCONTENT);
description1.setQuoteCharacter('"');
description1.setTimeField("timestamp");
description1.setTimeFormat("epoch");
description1.setFieldDelimiter(',');
DataDescription.Builder description2 = new DataDescription.Builder();
description2.setFormat(DataFormat.XCONTENT);
description2.setQuoteCharacter('"');
description2.setTimeField("time");
description2.setTimeFormat("epoch");
description2.setFieldDelimiter(',');
assertFalse(description1.build().equals(description2.build()));
assertFalse(description2.build().equals(description1.build()));
}
public void testEquals_GivenDifferentTimeFormat() {
DataDescription.Builder description1 = new DataDescription.Builder();
description1.setFormat(DataFormat.XCONTENT);
description1.setQuoteCharacter('"');
description1.setTimeField("timestamp");
description1.setTimeFormat("epoch");
description1.setFieldDelimiter(',');
DataDescription.Builder description2 = new DataDescription.Builder();
description2.setFormat(DataFormat.XCONTENT);
description2.setQuoteCharacter('"');
description2.setTimeField("timestamp");
description2.setTimeFormat("epoch_ms");
description2.setFieldDelimiter(',');
assertFalse(description1.build().equals(description2.build()));
assertFalse(description2.build().equals(description1.build()));
}
public void testEquals_GivenDifferentFieldDelimiter() {
DataDescription.Builder description1 = new DataDescription.Builder();
description1.setFormat(DataFormat.XCONTENT);
description1.setQuoteCharacter('"');
description1.setTimeField("timestamp");
description1.setTimeFormat("epoch");
description1.setFieldDelimiter(',');
DataDescription.Builder description2 = new DataDescription.Builder();
description2.setFormat(DataFormat.XCONTENT);
description2.setQuoteCharacter('"');
description2.setTimeField("timestamp");
description2.setTimeFormat("epoch");
description2.setFieldDelimiter(';');
assertFalse(description1.build().equals(description2.build()));
assertFalse(description2.build().equals(description1.build()));
}
@Override
protected DataDescription createTestInstance() {
DataDescription.Builder dataDescription = new DataDescription.Builder();
if (randomBoolean()) {
dataDescription.setFormat(randomFrom(DataFormat.values()));
}
if (randomBoolean()) {
dataDescription.setTimeField(randomAlphaOfLengthBetween(1, 20));
}
if (randomBoolean()) {
String format;
if (randomBoolean()) {
format = DataDescription.EPOCH;
} else if (randomBoolean()) {
format = DataDescription.EPOCH_MS;
} else {
format = "yyyy-MM-dd HH:mm:ss.SSS";
}
dataDescription.setTimeFormat(format);
}
if (randomBoolean()) {
dataDescription.setFieldDelimiter(randomAlphaOfLength(1).charAt(0));
}
if (randomBoolean()) {
dataDescription.setQuoteCharacter(randomAlphaOfLength(1).charAt(0));
}
return dataDescription.build();
}
@Override
protected DataDescription doParseInstance(XContentParser parser) {
return DataDescription.PARSER.apply(parser, null).build();
}
@Override
protected boolean supportsUnknownFields() {
return true;
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.protocol.xpack.ml.job.config;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
public class ModelPlotConfigTests extends AbstractXContentTestCase<ModelPlotConfig> {
@Override
protected ModelPlotConfig createTestInstance() {
return new ModelPlotConfig(randomBoolean(), randomAlphaOfLengthBetween(1, 30));
}
@Override
protected ModelPlotConfig doParseInstance(XContentParser parser) {
return ModelPlotConfig.PARSER.apply(parser, null);
}
@Override
protected boolean supportsUnknownFields() {
return true;
}
}