Removing transforms and the SINGLE_LINE input format (elastic/elasticsearch#790)

Most transforms will be replaced with Painless scripts.

The exception is the DateTransform, whose functionality is now simplified
to what existed before the other transforms were added.

The SINGLE_LINE format relied on transforms to extract fields, so has also
been removed, but this is reasonable as it strays into Logstash territory.

Relates elastic/elasticsearch#630

Closes elastic/elasticsearch#39

Original commit: elastic/x-pack-elasticsearch@a593d3e0ad
This commit is contained in:
David Roberts 2017-01-25 15:51:50 +00:00 committed by GitHub
parent 99c9d3733f
commit 4b366f8ef6
77 changed files with 265 additions and 6045 deletions

View File

@ -63,8 +63,6 @@ import org.elasticsearch.xpack.ml.action.UpdateDatafeedStatusAction;
import org.elasticsearch.xpack.ml.action.UpdateJobStatusAction;
import org.elasticsearch.xpack.ml.action.UpdateModelSnapshotAction;
import org.elasticsearch.xpack.ml.action.ValidateDetectorAction;
import org.elasticsearch.xpack.ml.action.ValidateTransformAction;
import org.elasticsearch.xpack.ml.action.ValidateTransformsAction;
import org.elasticsearch.xpack.ml.datafeed.DatafeedJobRunner;
import org.elasticsearch.xpack.ml.job.JobManager;
import org.elasticsearch.xpack.ml.job.metadata.MlInitializationService;
@ -112,8 +110,6 @@ import org.elasticsearch.xpack.ml.rest.results.RestGetCategoriesAction;
import org.elasticsearch.xpack.ml.rest.results.RestGetInfluencersAction;
import org.elasticsearch.xpack.ml.rest.results.RestGetRecordsAction;
import org.elasticsearch.xpack.ml.rest.validate.RestValidateDetectorAction;
import org.elasticsearch.xpack.ml.rest.validate.RestValidateTransformAction;
import org.elasticsearch.xpack.ml.rest.validate.RestValidateTransformsAction;
import org.elasticsearch.xpack.ml.utils.NamedPipeHelper;
import java.io.IOException;
@ -255,8 +251,6 @@ public class MlPlugin extends Plugin implements ActionPlugin {
new RestCloseJobAction(settings, restController),
new RestFlushJobAction(settings, restController),
new RestValidateDetectorAction(settings, restController),
new RestValidateTransformAction(settings, restController),
new RestValidateTransformsAction(settings, restController),
new RestGetCategoriesAction(settings, restController),
new RestGetModelSnapshotsAction(settings, restController),
new RestRevertModelSnapshotAction(settings, restController),
@ -295,8 +289,6 @@ public class MlPlugin extends Plugin implements ActionPlugin {
new ActionHandler<>(CloseJobAction.INSTANCE, CloseJobAction.TransportAction.class),
new ActionHandler<>(FlushJobAction.INSTANCE, FlushJobAction.TransportAction.class),
new ActionHandler<>(ValidateDetectorAction.INSTANCE, ValidateDetectorAction.TransportAction.class),
new ActionHandler<>(ValidateTransformAction.INSTANCE, ValidateTransformAction.TransportAction.class),
new ActionHandler<>(ValidateTransformsAction.INSTANCE, ValidateTransformsAction.TransportAction.class),
new ActionHandler<>(GetCategoriesAction.INSTANCE, GetCategoriesAction.TransportAction.class),
new ActionHandler<>(GetModelSnapshotsAction.INSTANCE, GetModelSnapshotsAction.TransportAction.class),
new ActionHandler<>(RevertModelSnapshotAction.INSTANCE, RevertModelSnapshotAction.TransportAction.class),

View File

@ -1,164 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.action;
import org.elasticsearch.action.Action;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestBuilder;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.HandledTransportAction;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.ElasticsearchClient;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigVerifier;
import java.io.IOException;
import java.util.Objects;
public class ValidateTransformAction
extends Action<ValidateTransformAction.Request, ValidateTransformAction.Response, ValidateTransformAction.RequestBuilder> {
public static final ValidateTransformAction INSTANCE = new ValidateTransformAction();
public static final String NAME = "cluster:admin/ml/validate/transform";
protected ValidateTransformAction() {
super(NAME);
}
@Override
public RequestBuilder newRequestBuilder(ElasticsearchClient client) {
return new RequestBuilder(client, INSTANCE);
}
@Override
public Response newResponse() {
return new Response();
}
public static class RequestBuilder extends ActionRequestBuilder<Request, Response, RequestBuilder> {
protected RequestBuilder(ElasticsearchClient client, ValidateTransformAction action) {
super(client, action, new Request());
}
}
public static class Request extends ActionRequest implements ToXContent {
private TransformConfig transform;
public static Request parseRequest(XContentParser parser) {
TransformConfig transform = TransformConfig.PARSER.apply(parser, null);
return new Request(transform);
}
Request() {
this.transform = null;
}
public Request(TransformConfig transform) {
this.transform = transform;
}
public TransformConfig getTransform() {
return transform;
}
@Override
public ActionRequestValidationException validate() {
return null;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
transform.writeTo(out);
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
transform = new TransformConfig(in);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
transform.toXContent(builder, params);
return builder;
}
@Override
public int hashCode() {
return Objects.hash(transform);
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Request other = (Request) obj;
return Objects.equals(transform, other.transform);
}
}
public static class Response extends AcknowledgedResponse {
public Response() {
super();
}
public Response(boolean acknowledged) {
super(acknowledged);
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
readAcknowledged(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
writeAcknowledged(out);
}
}
public static class TransportAction extends HandledTransportAction<Request, Response> {
@Inject
public TransportAction(Settings settings, TransportService transportService, ClusterService clusterService, ThreadPool threadPool,
ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver) {
super(settings, ValidateTransformAction.NAME, threadPool, transportService, actionFilters, indexNameExpressionResolver,
Request::new);
}
@Override
protected void doExecute(Request request, ActionListener<Response> listener) {
TransformConfigVerifier.verify(request.getTransform());
listener.onResponse(new Response(true));
}
}
}

View File

@ -1,173 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.action;
import org.elasticsearch.action.Action;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestBuilder;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.HandledTransportAction;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.ElasticsearchClient;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
public class ValidateTransformsAction
extends Action<ValidateTransformsAction.Request, ValidateTransformsAction.Response, ValidateTransformsAction.RequestBuilder> {
public static final ValidateTransformsAction INSTANCE = new ValidateTransformsAction();
public static final String NAME = "cluster:admin/ml/validate/transforms";
protected ValidateTransformsAction() {
super(NAME);
}
@Override
public RequestBuilder newRequestBuilder(ElasticsearchClient client) {
return new RequestBuilder(client, INSTANCE);
}
@Override
public Response newResponse() {
return new Response();
}
public static class RequestBuilder extends ActionRequestBuilder<Request, Response, RequestBuilder> {
protected RequestBuilder(ElasticsearchClient client, ValidateTransformsAction action) {
super(client, action, new Request());
}
}
public static class Request extends ActionRequest implements ToXContent {
public static final ParseField TRANSFORMS = new ParseField("transforms");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<Request, Void> PARSER = new ConstructingObjectParser<>(NAME,
a -> new Request((List<TransformConfig>) a[0]));
static {
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), TransformConfig.PARSER, TRANSFORMS);
}
private List<TransformConfig> transforms;
Request() {
this.transforms = null;
}
public Request(List<TransformConfig> transforms) {
this.transforms = transforms;
}
public List<TransformConfig> getTransforms() {
return transforms;
}
@Override
public ActionRequestValidationException validate() {
return null;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeList(transforms);
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
transforms = in.readList(TransformConfig::new);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.array(TRANSFORMS.getPreferredName(), transforms.toArray(new Object[transforms.size()]));
builder.endObject();
return builder;
}
@Override
public int hashCode() {
return Objects.hash(transforms);
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Request other = (Request) obj;
return Objects.equals(transforms, other.transforms);
}
}
public static class Response extends AcknowledgedResponse {
public Response() {
super();
}
public Response(boolean acknowledged) {
super(acknowledged);
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
readAcknowledged(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
writeAcknowledged(out);
}
}
public static class TransportAction extends HandledTransportAction<Request, Response> {
@Inject
public TransportAction(Settings settings, TransportService transportService, ClusterService clusterService, ThreadPool threadPool,
ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver) {
super(settings, ValidateTransformsAction.NAME, threadPool, transportService, actionFilters, indexNameExpressionResolver,
Request::new);
}
@Override
protected void doExecute(Request request, ActionListener<Response> listener) {
TransformConfigsVerifier.verify(request.getTransforms());
listener.onResponse(new Response(true));
}
}
}

View File

@ -41,8 +41,7 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
*/
public enum DataFormat implements Writeable {
JSON("json"),
DELIMITED("delimited"),
SINGLE_LINE("single_line");
DELIMITED("delimited");
/**
* Delimited used to be called delineated. We keep supporting that for backwards

View File

@ -18,9 +18,6 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier;
import org.elasticsearch.xpack.ml.utils.MlStrings;
import org.elasticsearch.xpack.ml.utils.time.TimeUtils;
@ -64,7 +61,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
public static final ParseField MODEL_SNAPSHOT_RETENTION_DAYS = new ParseField("model_snapshot_retention_days");
public static final ParseField RESULTS_RETENTION_DAYS = new ParseField("results_retention_days");
public static final ParseField TIMEOUT = new ParseField("timeout");
public static final ParseField TRANSFORMS = new ParseField("transforms");
public static final ParseField MODEL_SNAPSHOT_ID = new ParseField("model_snapshot_id");
public static final ParseField INDEX_NAME = new ParseField("index_name");
@ -107,7 +103,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
PARSER.declareObject(Builder::setAnalysisConfig, AnalysisConfig.PARSER, ANALYSIS_CONFIG);
PARSER.declareObject(Builder::setAnalysisLimits, AnalysisLimits.PARSER, ANALYSIS_LIMITS);
PARSER.declareObject(Builder::setDataDescription, DataDescription.PARSER, DATA_DESCRIPTION);
PARSER.declareObjectArray(Builder::setTransforms, TransformConfig.PARSER, TRANSFORMS);
PARSER.declareObject(Builder::setModelDebugConfig, ModelDebugConfig.PARSER, MODEL_DEBUG_CONFIG);
PARSER.declareField(Builder::setIgnoreDowntime, (p, c) -> IgnoreDowntime.fromString(p.text()), IGNORE_DOWNTIME, ValueType.STRING);
PARSER.declareLong(Builder::setTimeout, TIMEOUT);
@ -130,7 +125,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
private final AnalysisConfig analysisConfig;
private final AnalysisLimits analysisLimits;
private final DataDescription dataDescription;
private final List<TransformConfig> transforms;
private final ModelDebugConfig modelDebugConfig;
private final IgnoreDowntime ignoreDowntime;
private final Long renormalizationWindowDays;
@ -143,7 +137,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
public Job(String jobId, String description, Date createTime, Date finishedTime, Date lastDataTime, long timeout,
AnalysisConfig analysisConfig, AnalysisLimits analysisLimits, DataDescription dataDescription,
List<TransformConfig> transforms, ModelDebugConfig modelDebugConfig, IgnoreDowntime ignoreDowntime,
ModelDebugConfig modelDebugConfig, IgnoreDowntime ignoreDowntime,
Long renormalizationWindowDays, Long backgroundPersistInterval, Long modelSnapshotRetentionDays, Long resultsRetentionDays,
Map<String, Object> customSettings, String modelSnapshotId, String indexName) {
this.jobId = jobId;
@ -155,7 +149,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
this.analysisConfig = analysisConfig;
this.analysisLimits = analysisLimits;
this.dataDescription = dataDescription;
this.transforms = transforms;
this.modelDebugConfig = modelDebugConfig;
this.ignoreDowntime = ignoreDowntime;
this.renormalizationWindowDays = renormalizationWindowDays;
@ -177,7 +170,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
analysisConfig = new AnalysisConfig(in);
analysisLimits = in.readOptionalWriteable(AnalysisLimits::new);
dataDescription = in.readOptionalWriteable(DataDescription::new);
transforms = in.readList(TransformConfig::new);
modelDebugConfig = in.readOptionalWriteable(ModelDebugConfig::new);
ignoreDowntime = in.readOptionalWriteable(IgnoreDowntime::fromStream);
renormalizationWindowDays = in.readOptionalLong();
@ -302,10 +294,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
return dataDescription;
}
public List<TransformConfig> getTransforms() {
return transforms;
}
/**
* The duration of the renormalization window in days
*
@ -342,7 +330,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
/**
* Get a list of all input data fields mentioned in the job configuration,
* namely analysis fields, time field and transform input fields.
* namely analysis fields and the time field.
*
* @return the list of fields - never <code>null</code>
*/
@ -354,16 +342,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
allFields.addAll(analysisConfig.analysisFields());
}
// transform input fields
if (transforms != null) {
for (TransformConfig tc : transforms) {
List<String> inputFields = tc.getInputs();
if (inputFields != null) {
allFields.addAll(inputFields);
}
}
}
// time field
if (dataDescription != null) {
String timeField = dataDescription.getTimeField();
@ -399,7 +377,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
analysisConfig.writeTo(out);
out.writeOptionalWriteable(analysisLimits);
out.writeOptionalWriteable(dataDescription);
out.writeList(transforms);
out.writeOptionalWriteable(modelDebugConfig);
out.writeOptionalWriteable(ignoreDowntime);
out.writeOptionalLong(renormalizationWindowDays);
@ -439,9 +416,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
if (dataDescription != null) {
builder.field(DATA_DESCRIPTION.getPreferredName(), dataDescription, params);
}
if (transforms != null) {
builder.field(TRANSFORMS.getPreferredName(), transforms);
}
if (modelDebugConfig != null) {
builder.field(MODEL_DEBUG_CONFIG.getPreferredName(), modelDebugConfig, params);
}
@ -488,7 +462,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
&& (this.timeout == that.timeout)
&& Objects.equals(this.analysisConfig, that.analysisConfig)
&& Objects.equals(this.analysisLimits, that.analysisLimits) && Objects.equals(this.dataDescription, that.dataDescription)
&& Objects.equals(this.modelDebugConfig, that.modelDebugConfig) && Objects.equals(this.transforms, that.transforms)
&& Objects.equals(this.modelDebugConfig, that.modelDebugConfig)
&& Objects.equals(this.ignoreDowntime, that.ignoreDowntime)
&& Objects.equals(this.renormalizationWindowDays, that.renormalizationWindowDays)
&& Objects.equals(this.backgroundPersistInterval, that.backgroundPersistInterval)
@ -502,7 +476,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
@Override
public int hashCode() {
return Objects.hash(jobId, description, createTime, finishedTime, lastDataTime, timeout, analysisConfig,
analysisLimits, dataDescription, modelDebugConfig, transforms, renormalizationWindowDays,
analysisLimits, dataDescription, modelDebugConfig, renormalizationWindowDays,
backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, ignoreDowntime, customSettings,
modelSnapshotId, indexName);
}
@ -533,7 +507,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
private AnalysisConfig analysisConfig;
private AnalysisLimits analysisLimits;
private List<TransformConfig> transforms = new ArrayList<>();
private DataDescription dataDescription;
private Date createTime;
private Date finishedTime;
@ -560,7 +533,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
this.id = job.getId();
this.description = job.getDescription();
this.analysisConfig = job.getAnalysisConfig();
this.transforms = job.getTransforms();
this.dataDescription = job.getDataDescription();
this.createTime = job.getCreateTime();
this.finishedTime = job.getFinishedTime();
@ -628,10 +600,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
this.lastDataTime = lastDataTime;
}
public void setTransforms(List<TransformConfig> transforms) {
this.transforms = transforms;
}
public void setDataDescription(DataDescription.Builder description) {
dataDescription = description.build();
}
@ -677,19 +645,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_MISSING_ANALYSISCONFIG));
}
if (transforms != null && transforms.isEmpty() == false) {
TransformConfigsVerifier.verify(transforms);
checkTransformOutputIsUsed();
} else {
if (dataDescription != null && dataDescription.getFormat() == DataDescription.DataFormat.SINGLE_LINE) {
String msg = Messages.getMessage(
Messages.JOB_CONFIG_DATAFORMAT_REQUIRES_TRANSFORM,
DataDescription.DataFormat.SINGLE_LINE);
throw new IllegalArgumentException(msg);
}
}
checkValueNotLessThan(0, "timeout", timeout);
checkValueNotLessThan(0, "renormalizationWindowDays", renormalizationWindowDays);
checkValueNotLessThan(MIN_BACKGROUND_PERSIST_INTERVAL, "backgroundPersistInterval", backgroundPersistInterval);
@ -732,7 +687,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
return new Job(
id, description, createTime, finishedTime, lastDataTime, timeout, analysisConfig, analysisLimits,
dataDescription, transforms, modelDebugConfig, ignoreDowntime, renormalizationWindowDays,
dataDescription, modelDebugConfig, ignoreDowntime, renormalizationWindowDays,
backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, customSettings, modelSnapshotId,
indexName
);
@ -743,41 +698,5 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW, name, minVal, value));
}
}
/**
* Transform outputs should be used in either the date field,
* as an analysis field or input to another transform
*/
private boolean checkTransformOutputIsUsed() {
Set<String> usedFields = new TransformConfigs(transforms).inputFieldNames();
usedFields.addAll(analysisConfig.analysisFields());
String summaryCountFieldName = analysisConfig.getSummaryCountFieldName();
boolean isSummarised = !Strings.isNullOrEmpty(summaryCountFieldName);
if (isSummarised) {
usedFields.remove(summaryCountFieldName);
}
String timeField = dataDescription == null ? DataDescription.DEFAULT_TIME_FIELD : dataDescription.getTimeField();
usedFields.add(timeField);
for (TransformConfig tc : transforms) {
// if the type has no default outputs it doesn't need an output
boolean usesAnOutput = tc.type().defaultOutputNames().isEmpty()
|| tc.getOutputs().stream().anyMatch(outputName -> usedFields.contains(outputName));
if (isSummarised && tc.getOutputs().contains(summaryCountFieldName)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_DUPLICATED_OUTPUT_NAME, tc.type().prettyName());
throw new IllegalArgumentException(msg);
}
if (!usesAnOutput) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_OUTPUTS_UNUSED,
tc.type().prettyName());
throw new IllegalArgumentException(msg);
}
}
return false;
}
}
}

View File

@ -1,105 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import java.util.Objects;
public class IntRange {
public enum BoundType {
OPEN, CLOSED
}
public static class Bound {
private final int value;
private final BoundType boundType;
public Bound(int value, BoundType boundType) {
this.value = value;
this.boundType = Objects.requireNonNull(boundType);
}
}
private static String PLUS_INFINITY = "+\u221E";
private static String MINUS_INFINITY = "-\u221E";
private static char LEFT_BRACKET = '(';
private static char RIGHT_BRACKET = ')';
private static char LEFT_SQUARE_BRACKET = '[';
private static char RIGHT_SQUARE_BRACKET = ']';
private static char BOUNDS_SEPARATOR = '\u2025';
private final Bound lower;
private final Bound upper;
private IntRange(Bound lower, Bound upper) {
this.lower = Objects.requireNonNull(lower);
this.upper = Objects.requireNonNull(upper);
}
public boolean contains(int value) {
int lowerIncludedValue = lower.boundType == BoundType.CLOSED ? lower.value : lower.value + 1;
int upperIncludedValue = upper.boundType == BoundType.CLOSED ? upper.value : upper.value - 1;
return value >= lowerIncludedValue && value <= upperIncludedValue;
}
public boolean hasLowerBound() {
return lower.value != Integer.MIN_VALUE;
}
public boolean hasUpperBound() {
return upper.value != Integer.MAX_VALUE;
}
public int lower() {
return lower.value;
}
public int upper() {
return upper.value;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append(hasLowerBound() && lower.boundType == BoundType.CLOSED ? LEFT_SQUARE_BRACKET : LEFT_BRACKET);
builder.append(hasLowerBound() ? lower.value : MINUS_INFINITY);
builder.append(BOUNDS_SEPARATOR);
builder.append(hasUpperBound() ? upper.value : PLUS_INFINITY);
builder.append(hasUpperBound() && upper.boundType == BoundType.CLOSED ? RIGHT_SQUARE_BRACKET : RIGHT_BRACKET);
return builder.toString();
}
public static IntRange singleton(int value) {
return closed(value, value);
}
public static IntRange closed(int lower, int upper) {
return new IntRange(closedBound(lower), closedBound(upper));
}
public static IntRange open(int lower, int upper) {
return new IntRange(openBound(lower), openBound(upper));
}
public static IntRange openClosed(int lower, int upper) {
return new IntRange(openBound(lower), closedBound(upper));
}
public static IntRange closedOpen(int lower, int upper) {
return new IntRange(closedBound(lower), openBound(upper));
}
public static IntRange atLeast(int lower) {
return closed(lower, Integer.MAX_VALUE);
}
private static Bound openBound(int value) {
return new Bound(value, BoundType.OPEN);
}
private static Bound closedBound(int value) {
return new Bound(value, BoundType.CLOSED);
}
}

View File

@ -1,190 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.xpack.ml.job.config.Condition;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
/**
* Represents an API data transform
*/
// NORELEASE: to be replaced by ingest (https://github.com/elastic/prelert-legacy/issues/39)
public class TransformConfig extends ToXContentToBytes implements Writeable {
// Serialisation strings
public static final ParseField TYPE = new ParseField("transform");
public static final ParseField TRANSFORM = new ParseField("transform");
public static final ParseField CONDITION = new ParseField("condition");
public static final ParseField ARGUMENTS = new ParseField("arguments");
public static final ParseField INPUTS = new ParseField("inputs");
public static final ParseField OUTPUTS = new ParseField("outputs");
public static final ConstructingObjectParser<TransformConfig, Void> PARSER = new ConstructingObjectParser<>(
TYPE.getPreferredName(), objects -> new TransformConfig((String) objects[0]));
static {
PARSER.declareString(ConstructingObjectParser.constructorArg(), TYPE);
PARSER.declareStringArray(TransformConfig::setInputs, INPUTS);
PARSER.declareStringArray(TransformConfig::setArguments, ARGUMENTS);
PARSER.declareStringArray(TransformConfig::setOutputs, OUTPUTS);
PARSER.declareObject(TransformConfig::setCondition, Condition.PARSER, CONDITION);
}
private List<String> inputs;
private String type;
private List<String> arguments;
private List<String> outputs;
private Condition condition;
// lazily initialized:
private transient TransformType lazyType;
public TransformConfig(String type) {
this.type = type;
lazyType = TransformType.fromString(type);
try {
outputs = lazyType.defaultOutputNames();
} catch (IllegalArgumentException e) {
outputs = Collections.emptyList();
}
arguments = Collections.emptyList();
}
@SuppressWarnings("unchecked")
public TransformConfig(StreamInput in) throws IOException {
this(in.readString());
inputs = (List<String>) in.readGenericValue();
arguments = (List<String>) in.readGenericValue();
outputs = (List<String>) in.readGenericValue();
if (in.readBoolean()) {
condition = new Condition(in);
}
}
public List<String> getInputs() {
return inputs;
}
public void setInputs(List<String> fields) {
inputs = fields;
}
/**
* Transform type see {@linkplain TransformType.Names}
*/
public String getTransform() {
return type;
}
public List<String> getArguments() {
return arguments;
}
public void setArguments(List<String> args) {
arguments = args;
}
public List<String> getOutputs() {
return outputs;
}
public void setOutputs(List<String> outputs) {
this.outputs = outputs;
}
/**
* The condition object which may or may not be defined for this
* transform
*
* @return May be <code>null</code>
*/
public Condition getCondition() {
return condition;
}
public void setCondition(Condition condition) {
this.condition = condition;
}
/**
* This field shouldn't be serialised as its created dynamically
* Type may be null when the class is constructed.
*/
public TransformType type() {
return lazyType;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(type);
out.writeGenericValue(inputs);
out.writeGenericValue(arguments);
out.writeGenericValue(outputs);
if (condition != null) {
out.writeBoolean(true);
condition.writeTo(out);
} else {
out.writeBoolean(false);
}
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(TYPE.getPreferredName(), type);
if (inputs != null) {
builder.field(INPUTS.getPreferredName(), inputs);
}
if (arguments != null) {
builder.field(ARGUMENTS.getPreferredName(), arguments);
}
if (outputs != null) {
builder.field(OUTPUTS.getPreferredName(), outputs);
}
if (condition != null) {
builder.field(CONDITION.getPreferredName(), condition);
}
builder.endObject();
return builder;
}
@Override
public int hashCode() {
return Objects.hash(inputs, type, outputs, arguments, condition);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
TransformConfig other = (TransformConfig) obj;
return Objects.equals(this.type, other.type)
&& Objects.equals(this.inputs, other.inputs)
&& Objects.equals(this.outputs, other.outputs)
&& Objects.equals(this.arguments, other.arguments)
&& Objects.equals(this.condition, other.condition);
}
}

View File

@ -1,107 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
/**
* Utility class for methods involving arrays of transforms
*/
public class TransformConfigs extends ToXContentToBytes implements Writeable {
public static final ParseField TRANSFORMS = new ParseField("transforms");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<TransformConfigs, Void> PARSER = new ConstructingObjectParser<>(
TRANSFORMS.getPreferredName(), a -> new TransformConfigs((List<TransformConfig>) a[0]));
static {
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), TransformConfig.PARSER, TRANSFORMS);
}
private List<TransformConfig> transforms;
public TransformConfigs(List<TransformConfig> transforms) {
this.transforms = Objects.requireNonNull(transforms);
}
public TransformConfigs(StreamInput in) throws IOException {
transforms = in.readList(TransformConfig::new);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeList(transforms);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(TRANSFORMS.getPreferredName(), transforms);
builder.endObject();
return builder;
}
public List<TransformConfig> getTransforms() {
return transforms;
}
/**
* Set of all the field names that are required as inputs to transforms
*/
public Set<String> inputFieldNames() {
Set<String> fields = new HashSet<>();
for (TransformConfig t : transforms) {
fields.addAll(t.getInputs());
}
return fields;
}
/**
* Set of all the field names that are outputted (i.e. created) by
* transforms
*/
public Set<String> outputFieldNames() {
Set<String> fields = new HashSet<>();
for (TransformConfig t : transforms) {
fields.addAll(t.getOutputs());
}
return fields;
}
@Override
public int hashCode() {
return Objects.hash(transforms);
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
TransformConfigs other = (TransformConfigs) obj;
return Objects.equals(transforms, other.transforms);
}
}

View File

@ -1,156 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
/**
* Enum type representing the different transform functions
* with functions for converting between the enum and its
* pretty name i.e. human readable string.
*/
public enum TransformType implements ToXContent, Writeable {
// Name, arity, arguments, outputs, default output names, has condition
DOMAIN_SPLIT(Names.DOMAIN_SPLIT_NAME, IntRange.singleton(1), IntRange.singleton(0),
IntRange.closed(1, 2), Arrays.asList("subDomain", "hrd")),
CONCAT(Names.CONCAT_NAME, IntRange.atLeast(2), IntRange.closed(0, 1), IntRange.singleton(1),
Arrays.asList("concat")),
REGEX_EXTRACT(Names.EXTRACT_NAME, IntRange.singleton(1), IntRange.singleton(1), IntRange.atLeast(1),
Arrays.asList("extract"), false),
REGEX_SPLIT(Names.SPLIT_NAME, IntRange.singleton(1), IntRange.singleton(1), IntRange.atLeast(1),
Arrays.asList("split"), false),
EXCLUDE(Names.EXCLUDE_NAME, IntRange.atLeast(1), IntRange.singleton(0), IntRange.singleton(0),
Arrays.asList(), true),
LOWERCASE(Names.LOWERCASE_NAME, IntRange.singleton(1), IntRange.singleton(0), IntRange.singleton(1),
Arrays.asList("lowercase")),
UPPERCASE(Names.UPPERCASE_NAME, IntRange.singleton(1), IntRange.singleton(0), IntRange.singleton(1),
Arrays.asList("uppercase")),
TRIM(Names.TRIM_NAME, IntRange.singleton(1), IntRange.singleton(0), IntRange.singleton(1),
Arrays.asList("trim"));
/**
* Transform names.
*
* Enums cannot use static fields in their constructors as the
* enum values are initialised before the statics.
* Having the static fields in nested class means they are created
* when required.
*/
public class Names {
public static final String DOMAIN_SPLIT_NAME = "domain_split";
public static final String CONCAT_NAME = "concat";
public static final String EXTRACT_NAME = "extract";
public static final String SPLIT_NAME = "split";
public static final String EXCLUDE_NAME = "exclude";
public static final String LOWERCASE_NAME = "lowercase";
public static final String UPPERCASE_NAME = "uppercase";
public static final String TRIM_NAME = "trim";
private Names() {
}
}
private final IntRange arityRange;
private final IntRange argumentsRange;
private final IntRange outputsRange;
private final String prettyName;
private final List<String> defaultOutputNames;
private final boolean hasCondition;
TransformType(String prettyName, IntRange arityIntRange,
IntRange argumentsIntRange, IntRange outputsIntRange,
List<String> defaultOutputNames) {
this(prettyName, arityIntRange, argumentsIntRange, outputsIntRange, defaultOutputNames, false);
}
TransformType(String prettyName, IntRange arityIntRange,
IntRange argumentsIntRange, IntRange outputsIntRange,
List<String> defaultOutputNames, boolean hasCondition) {
this.arityRange = arityIntRange;
this.argumentsRange = argumentsIntRange;
this.outputsRange = outputsIntRange;
this.prettyName = prettyName;
this.defaultOutputNames = defaultOutputNames;
this.hasCondition = hasCondition;
}
/**
* The count IntRange of inputs the transform expects.
*/
public IntRange arityRange() {
return this.arityRange;
}
/**
* The count IntRange of arguments the transform expects.
*/
public IntRange argumentsRange() {
return this.argumentsRange;
}
/**
* The count IntRange of outputs the transform expects.
*/
public IntRange outputsRange() {
return this.outputsRange;
}
public String prettyName() {
return prettyName;
}
public List<String> defaultOutputNames() {
return defaultOutputNames;
}
public boolean hasCondition() {
return hasCondition;
}
@Override
public String toString() {
return prettyName();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(ordinal());
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.value(prettyName);
return builder;
}
/**
* Get the enum for the given pretty name.
* The static function valueOf() cannot be overridden so use
* this method instead when converting from the pretty name
* to enum.
*/
public static TransformType fromString(String prettyName) throws IllegalArgumentException {
Set<TransformType> all = EnumSet.allOf(TransformType.class);
for (TransformType type : all) {
if (type.prettyName().equals(prettyName)) {
return type;
}
}
throw new IllegalArgumentException("Unknown [transformType]: [" + prettyName + "]");
}
}

View File

@ -1,15 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
@FunctionalInterface
public interface ArgumentVerifier {
void verify(String argument, TransformConfig tc) throws ElasticsearchParseException;
}

View File

@ -1,30 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import java.util.List;
import java.util.regex.Pattern;
public class RegexExtractVerifier implements ArgumentVerifier {
@Override
public void verify(String arg, TransformConfig tc) {
new RegexPatternVerifier().verify(arg, tc);
Pattern pattern = Pattern.compile(arg);
int groupCount = pattern.matcher("").groupCount();
List<String> outputs = tc.getOutputs();
int outputCount = outputs == null ? 0 : outputs.size();
if (groupCount != outputCount) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_EXTRACT_GROUPS_SHOULD_MATCH_OUTPUT_COUNT,
tc.getTransform(), outputCount, arg, groupCount);
throw new IllegalArgumentException(msg);
}
}
}

View File

@ -1,25 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class RegexPatternVerifier implements ArgumentVerifier {
@Override
public void verify(String arg, TransformConfig tc) throws ElasticsearchParseException {
try {
Pattern.compile(arg);
} catch (PatternSyntaxException e) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INVALID_ARGUMENT, tc.getTransform(), arg);
throw new IllegalArgumentException(msg);
}
}
}

View File

@ -1,149 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.IntRange;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import java.util.List;
public final class TransformConfigVerifier {
private TransformConfigVerifier() {
// Hide default constructor
}
/**
* Checks the transform configuration is valid
* <ol>
* <li>Checks there are the correct number of inputs for a given transform
* type and that those inputs are not empty strings</li>
* <li>Check the number of arguments is correct for the transform type and
* verify the argument (i.e. is is a valid regex)</li>
* <li>Check there is a valid number of ouputs for the transform type and
* those outputs are not empty strings</li>
* <li>If the transform has a condition verify it</li>
* </ol>
*/
public static boolean verify(TransformConfig tc) throws ElasticsearchParseException {
TransformType type;
try {
type = tc.type();
} catch (IllegalArgumentException e) {
throw new ElasticsearchParseException(Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_UNKNOWN_TYPE, tc.getTransform()));
}
checkCondition(tc, type);
checkInputs(tc, type);
checkArguments(tc, type);
checkOutputs(tc, type);
return true;
}
private static void checkCondition(TransformConfig tc, TransformType type) {
if (type.hasCondition()) {
if (tc.getCondition() == null) {
throw new IllegalArgumentException(
Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_CONDITION_REQUIRED, type.prettyName()));
}
}
}
private static void checkInputs(TransformConfig tc, TransformType type) {
List<String> inputs = tc.getInputs();
checkValidInputCount(tc, type, inputs);
checkInputsAreNonEmptyStrings(tc, inputs);
}
private static void checkValidInputCount(TransformConfig tc, TransformType type, List<String> inputs) {
int inputsSize = (inputs == null) ? 0 : inputs.size();
if (!type.arityRange().contains(inputsSize)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INVALID_INPUT_COUNT,
tc.getTransform(), rangeAsString(type.arityRange()), inputsSize);
throw new IllegalArgumentException(msg);
}
}
private static void checkInputsAreNonEmptyStrings(TransformConfig tc, List<String> inputs) {
if (containsEmptyString(inputs)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INPUTS_CONTAIN_EMPTY_STRING, tc.getTransform());
throw new IllegalArgumentException(msg);
}
}
private static boolean containsEmptyString(List<String> strings) {
return strings.stream().anyMatch(s -> s.trim().isEmpty());
}
private static void checkArguments(TransformConfig tc, TransformType type) {
checkArgumentsCountValid(tc, type);
checkArgumentsValid(tc, type);
}
private static void checkArgumentsCountValid(TransformConfig tc, TransformType type) {
List<String> arguments = tc.getArguments();
int argumentsSize = (arguments == null) ? 0 : arguments.size();
if (!type.argumentsRange().contains(argumentsSize)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INVALID_ARGUMENT_COUNT,
tc.getTransform(), rangeAsString(type.argumentsRange()), argumentsSize);
throw new IllegalArgumentException(msg);
}
}
private static void checkArgumentsValid(TransformConfig tc, TransformType type) {
if (tc.getArguments() != null) {
ArgumentVerifier av = argumentVerifierForType(type);
for (String argument : tc.getArguments()) {
av.verify(argument, tc);
}
}
}
private static ArgumentVerifier argumentVerifierForType(TransformType type) {
switch (type) {
case REGEX_EXTRACT:
return new RegexExtractVerifier();
case REGEX_SPLIT:
return new RegexPatternVerifier();
default:
return (argument, config) -> {};
}
}
private static void checkOutputs(TransformConfig tc, TransformType type) {
List<String> outputs = tc.getOutputs();
checkValidOutputCount(tc, type, outputs);
checkOutputsAreNonEmptyStrings(tc, outputs);
}
private static void checkValidOutputCount(TransformConfig tc, TransformType type, List<String> outputs) {
int outputsSize = (outputs == null) ? 0 : outputs.size();
if (!type.outputsRange().contains(outputsSize)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INVALID_OUTPUT_COUNT,
tc.getTransform(), rangeAsString(type.outputsRange()), outputsSize);
throw new IllegalArgumentException(msg);
}
}
private static void checkOutputsAreNonEmptyStrings(TransformConfig tc, List<String> outputs) {
if (containsEmptyString(outputs)) {
String msg = Messages.getMessage(
Messages.JOB_CONFIG_TRANSFORM_OUTPUTS_CONTAIN_EMPTY_STRING, tc.getTransform());
throw new IllegalArgumentException(msg);
}
}
private static String rangeAsString(IntRange range) {
if (range.hasLowerBound() && range.hasUpperBound() && range.lower() == range.upper()) {
return String.valueOf(range.lower());
}
return range.toString();
}
}

View File

@ -1,120 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class TransformConfigsVerifier {
private TransformConfigsVerifier() {
}
/**
* Checks the transform configurations are valid
* <ol>
* <li>Call {@linkplain TransformConfigVerifier#verify(TransformConfig)} ()} on each transform</li>
* <li>Check all the transform output field names are unique</li>
* <li>Check there are no circular dependencies in the transforms</li>
* </ol>
*/
public static boolean verify(List<TransformConfig> transforms) throws ElasticsearchParseException {
for (TransformConfig tr : transforms) {
TransformConfigVerifier.verify(tr);
}
String duplicatedName = outputNamesAreUnique(transforms);
if (duplicatedName != null) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_OUTPUT_NAME_USED_MORE_THAN_ONCE, duplicatedName);
throw new IllegalArgumentException(msg);
}
// Check for circular dependencies
int index = checkForCircularDependencies(transforms);
if (index >= 0) {
TransformConfig tc = transforms.get(index);
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_CIRCULAR_DEPENDENCY, tc.type(), tc.getInputs());
throw new IllegalArgumentException(msg);
}
return true;
}
/**
* return null if all transform ouput names are
* unique or the first duplicate name if there are
* duplications
*/
private static String outputNamesAreUnique(List<TransformConfig> transforms) {
Set<String> fields = new HashSet<>();
for (TransformConfig t : transforms) {
for (String output : t.getOutputs()) {
if (fields.contains(output)) {
return output;
}
fields.add(output);
}
}
return null;
}
/**
* Find circular dependencies in the list of transforms.
* This might be because a transform's input is its output
* or because of a transitive dependency.
*
* If there is a circular dependency the index of the transform
* in the <code>transforms</code> list at the start of the chain
* is returned else -1
*
* @return -1 if no circular dependencies else the index of the
* transform at the start of the circular chain
*/
public static int checkForCircularDependencies(List<TransformConfig> transforms) {
for (int i=0; i<transforms.size(); i++) {
Set<Integer> chain = new HashSet<Integer>();
chain.add(new Integer(i));
TransformConfig tc = transforms.get(i);
if (checkCircularDependenciesRecursive(tc, transforms, chain) == false) {
return i;
}
}
return -1;
}
private static boolean checkCircularDependenciesRecursive(TransformConfig transform, List<TransformConfig> transforms,
Set<Integer> chain) {
boolean result = true;
for (int i=0; i<transforms.size(); i++) {
TransformConfig tc = transforms.get(i);
for (String input : transform.getInputs()) {
if (tc.getOutputs().contains(input)) {
Integer index = new Integer(i);
if (chain.contains(index)) {
return false;
}
chain.add(index);
result = result && checkCircularDependenciesRecursive(tc, transforms, chain);
}
}
}
return result;
}
}

View File

@ -76,7 +76,6 @@ public final class Messages {
public static final String JOB_CONFIG_CONDITION_INVALID_VALUE_NUMBER = "job.config.condition.invalid.value.numeric";
public static final String JOB_CONFIG_CONDITION_INVALID_VALUE_REGEX = "job.config.condition.invalid.value.regex";
public static final String JOB_CONFIG_CONDITION_UNKNOWN_OPERATOR = "job.config.condition.unknown.operator";
public static final String JOB_CONFIG_DATAFORMAT_REQUIRES_TRANSFORM = "job.config.dataformat.requires.transform";
public static final String JOB_CONFIG_DETECTION_RULE_CONDITION_CATEGORICAL_INVALID_OPTION = "job.config.detectionrule.condition."
+ "categorical.invalid.option";
public static final String JOB_CONFIG_DETECTION_RULE_CONDITION_CATEGORICAL_MISSING_OPTION = "job.config.detectionrule.condition."
@ -158,21 +157,6 @@ public final class Messages {
public static final String JOB_CONFIG_UPDATE_DATAFEED_CONFIG_PARSE_ERROR = "job.config.update.datafeed.config.parse.error";
public static final String JOB_CONFIG_UPDATE_DATAFEED_CONFIG_CANNOT_BE_NULL = "job.config.update.datafeed.config.cannot.be.null";
public static final String JOB_CONFIG_TRANSFORM_CIRCULAR_DEPENDENCY = "job.config.transform.circular.dependency";
public static final String JOB_CONFIG_TRANSFORM_CONDITION_REQUIRED = "job.config.transform.condition.required";
public static final String JOB_CONFIG_TRANSFORM_DUPLICATED_OUTPUT_NAME = "job.config.transform.duplicated.output.name";
public static final String JOB_CONFIG_TRANSFORM_EXTRACT_GROUPS_SHOULD_MATCH_OUTPUT_COUNT = "job.config.transform.extract.groups.should."
+ "match.output.count";
public static final String JOB_CONFIG_TRANSFORM_INPUTS_CONTAIN_EMPTY_STRING = "job.config.transform.inputs.contain.empty.string";
public static final String JOB_CONFIG_TRANSFORM_INVALID_ARGUMENT = "job.config.transform.invalid.argument";
public static final String JOB_CONFIG_TRANSFORM_INVALID_ARGUMENT_COUNT = "job.config.transform.invalid.argument.count";
public static final String JOB_CONFIG_TRANSFORM_INVALID_INPUT_COUNT = "job.config.transform.invalid.input.count";
public static final String JOB_CONFIG_TRANSFORM_INVALID_OUTPUT_COUNT = "job.config.transform.invalid.output.count";
public static final String JOB_CONFIG_TRANSFORM_OUTPUTS_CONTAIN_EMPTY_STRING = "job.config.transform.outputs.contain.empty.string";
public static final String JOB_CONFIG_TRANSFORM_OUTPUTS_UNUSED = "job.config.transform.outputs.unused";
public static final String JOB_CONFIG_TRANSFORM_OUTPUT_NAME_USED_MORE_THAN_ONCE = "job.config.transform.output.name.used.more.than"
+ ".once";
public static final String JOB_CONFIG_TRANSFORM_UNKNOWN_TYPE = "job.config.transform.unknown.type";
public static final String JOB_CONFIG_UNKNOWN_FUNCTION = "job.config.unknown.function";
public static final String JOB_INDEX_ALREADY_EXISTS = "job.index.already.exists";
@ -207,9 +191,6 @@ public final class Messages {
public static final String JSON_DETECTOR_CONFIG_MAPPING = "json.detector.config.mapping.error";
public static final String JSON_DETECTOR_CONFIG_PARSE = "json.detector.config.parse.error";
public static final String JSON_TRANSFORM_CONFIG_MAPPING = "json.transform.config.mapping.error";
public static final String JSON_TRANSFORM_CONFIG_PARSE = "json.transform.config.parse.error";
public static final String REST_ACTION_NOT_ALLOWED_FOR_DATAFEED_JOB = "rest.action.not.allowed.for.datafeed.job";
public static final String REST_INVALID_DATETIME_PARAMS = "rest.invalid.datetime.params";

View File

@ -24,7 +24,6 @@ import org.elasticsearch.xpack.ml.job.process.autodetect.params.InterimResultsPa
import org.elasticsearch.xpack.ml.job.process.autodetect.writer.DataToProcessWriter;
import org.elasticsearch.xpack.ml.job.process.autodetect.writer.DataToProcessWriterFactory;
import org.elasticsearch.xpack.ml.job.process.CountingInputStream;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.utils.ExceptionsHelper;
import java.io.Closeable;
@ -77,7 +76,7 @@ public class AutodetectCommunicator implements Closeable {
private DataToProcessWriter createProcessWriter(Optional<DataDescription> dataDescription) {
return DataToProcessWriterFactory.create(true, autodetectProcess, dataDescription.orElse(job.getDataDescription()),
job.getAnalysisConfig(), new TransformConfigs(job.getTransforms()) , dataCountsReporter, LOGGER);
job.getAnalysisConfig(), dataCountsReporter);
}
public DataCounts writeToJob(InputStream inputStream, DataLoadParams params) throws IOException {

View File

@ -10,17 +10,6 @@ import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.transforms.DependencySorter;
import org.elasticsearch.xpack.ml.transforms.Transform;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
import org.elasticsearch.xpack.ml.transforms.TransformException;
import org.elasticsearch.xpack.ml.transforms.TransformFactory;
import org.elasticsearch.xpack.ml.transforms.date.DateFormatTransform;
import org.elasticsearch.xpack.ml.transforms.date.DateTransform;
import org.elasticsearch.xpack.ml.transforms.date.DoubleDateTransform;
import java.io.IOException;
import java.util.ArrayList;
@ -39,46 +28,36 @@ import java.util.Set;
public abstract class AbstractDataToProcessWriter implements DataToProcessWriter {
protected static final int TIME_FIELD_OUT_INDEX = 0;
private static final int MS_IN_SECOND = 1000;
private static final int TIME_FIELD_OUT_INDEX = 0;
private static final long MS_IN_SECOND = 1000;
protected final boolean includeControlField;
private final boolean includeControlField;
protected final AutodetectProcess autodetectProcess;
protected final DataDescription dataDescription;
protected final AnalysisConfig analysisConfig;
protected final DataCountsReporter dataCountsReporter;
protected final Logger logger;
protected final TransformConfigs transformConfigs;
protected List<Transform> dateInputTransforms;
protected DateTransform dateTransform;
protected List<Transform> postDateTransforms;
private final Logger logger;
private final DateTransformer dateTransformer;
protected Map<String, Integer> inFieldIndexes;
protected List<InputOutputMap> inputOutputMap;
private String[] scratchArea;
private String[][] readWriteArea;
// epoch in seconds
private long latestEpochMs;
private long latestEpochMsThisUpload;
protected AbstractDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess,
DataDescription dataDescription, AnalysisConfig analysisConfig,
TransformConfigs transformConfigs, DataCountsReporter dataCountsReporter, Logger logger) {
DataCountsReporter dataCountsReporter, Logger logger) {
this.includeControlField = includeControlField;
this.autodetectProcess = Objects.requireNonNull(autodetectProcess);
this.dataDescription = Objects.requireNonNull(dataDescription);
this.analysisConfig = Objects.requireNonNull(analysisConfig);
this.dataCountsReporter = Objects.requireNonNull(dataCountsReporter);
this.logger = Objects.requireNonNull(logger);
this.transformConfigs = Objects.requireNonNull(transformConfigs);
postDateTransforms = new ArrayList<>();
dateInputTransforms = new ArrayList<>();
Date date = dataCountsReporter.getLatestRecordTime();
latestEpochMsThisUpload = 0;
latestEpochMs = 0;
@ -86,69 +65,33 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
latestEpochMs = date.getTime();
}
readWriteArea = new String[3][];
boolean isDateFormatString = dataDescription.isTransformTime() && !dataDescription.isEpochMs();
if (isDateFormatString) {
dateTransformer = new DateFormatDateTransformer(dataDescription.getTimeFormat());
} else {
dateTransformer = new DoubleDateTransformer(dataDescription.isEpochMs());
}
}
/**
* Create the transforms. This must be called before
* {@linkplain DataToProcessWriter#write(java.io.InputStream)}
* even if no transforms are configured as it creates the
* date transform and sets up the field mappings.<br>
* Set up the field index mappings.
* This must be called before {@linkplain DataToProcessWriter#write(java.io.InputStream)}.
* <p>
* Finds the required input indexes in the <code>header</code>
* and sets the mappings for the transforms so they know where
* to read their inputs and write outputs.
* <p>
* Transforms can be chained so some write their outputs to
* a scratch area which is input to another transform
* and sets the mappings to the corresponding output indexes.
*/
public void buildTransforms(String[] header) throws IOException {
void buildFieldIndexMapping(String[] header) throws IOException {
Collection<String> inputFields = inputFields();
inFieldIndexes = inputFieldIndexes(header, inputFields);
checkForMissingFields(inputFields, inFieldIndexes, header);
Map<String, Integer> outFieldIndexes = outputFieldIndexes();
inputOutputMap = createInputOutputMap(inFieldIndexes);
dataCountsReporter.setAnalysedFieldsPerRecord(analysisConfig.analysisFields().size());
Map<String, Integer> scratchAreaIndexes = scratchAreaIndexes(inputFields, outputFields(),
dataDescription.getTimeField());
scratchArea = new String[scratchAreaIndexes.size()];
readWriteArea[TransformFactory.SCRATCH_ARRAY_INDEX] = scratchArea;
buildDateTransform(scratchAreaIndexes, outFieldIndexes);
List<TransformConfig> dateInputTransforms = DependencySorter.findDependencies(
dataDescription.getTimeField(), transformConfigs.getTransforms());
TransformFactory transformFactory = new TransformFactory();
for (TransformConfig config : dateInputTransforms) {
Transform tr = transformFactory.create(config, inFieldIndexes, scratchAreaIndexes,
outFieldIndexes, logger);
this.dateInputTransforms.add(tr);
}
// get the transforms that don't input into the date
List<TransformConfig> postDateTransforms = new ArrayList<>();
for (TransformConfig tc : transformConfigs.getTransforms()) {
if (dateInputTransforms.contains(tc) == false) {
postDateTransforms.add(tc);
}
}
postDateTransforms = DependencySorter.sortByDependency(postDateTransforms);
for (TransformConfig config : postDateTransforms) {
Transform tr = transformFactory.create(config, inFieldIndexes, scratchAreaIndexes,
outFieldIndexes, logger);
this.postDateTransforms.add(tr);
}
}
/**
* Write the header.
* The header is created from the list of analysis input fields,
* the time field and the control field
* The header is created from the list of analysis input fields, the time field and the control field.
*/
@Override
public void writeHeader() throws IOException {
@ -168,39 +111,6 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
autodetectProcess.writeRecord(record);
}
protected void buildDateTransform(Map<String, Integer> scratchAreaIndexes, Map<String, Integer> outFieldIndexes) {
List<TransformIndex> readIndexes = new ArrayList<>();
Integer index = inFieldIndexes.get(dataDescription.getTimeField());
if (index != null) {
readIndexes.add(new TransformIndex(TransformFactory.INPUT_ARRAY_INDEX, index));
} else {
index = outFieldIndexes.get(dataDescription.getTimeField());
if (index != null) {
// date field could also be an output field
readIndexes.add(new TransformIndex(TransformFactory.OUTPUT_ARRAY_INDEX, index));
} else if (scratchAreaIndexes.containsKey(dataDescription.getTimeField())) {
index = scratchAreaIndexes.get(dataDescription.getTimeField());
readIndexes.add(new TransformIndex(TransformFactory.SCRATCH_ARRAY_INDEX, index));
} else {
throw new IllegalStateException(
String.format(Locale.ROOT, "Transform input date field '%s' not found",
dataDescription.getTimeField()));
}
}
List<TransformIndex> writeIndexes = new ArrayList<>();
writeIndexes.add(new TransformIndex(TransformFactory.OUTPUT_ARRAY_INDEX,
outFieldIndexes.get(dataDescription.getTimeField())));
boolean isDateFormatString = dataDescription.isTransformTime() && !dataDescription.isEpochMs();
if (isDateFormatString) {
dateTransform = new DateFormatTransform(dataDescription.getTimeFormat(), readIndexes, writeIndexes, logger);
} else {
dateTransform = new DoubleDateTransform(dataDescription.isEpochMs(), readIndexes, writeIndexes, logger);
}
}
/**
* Transform the input data and write to length encoded writer.<br>
* <p>
@ -210,33 +120,21 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
* First all the transforms whose outputs the Date transform relies
* on are executed then the date transform then the remaining transforms.
*
* @param input The record the transforms should read their input from. The contents should
* align with the header parameter passed to {@linkplain #buildTransforms(String[])}
* @param output The record that will be written to the length encoded writer.
* @param record The record that will be written to the length encoded writer after the time has been transformed.
* This should be the same size as the number of output (analysis fields) i.e.
* the size of the map returned by {@linkplain #outputFieldIndexes()}
* @param numberOfFieldsRead The total number read not just those included in the analysis
*/
protected boolean applyTransformsAndWrite(String[] input, String[] output, long numberOfFieldsRead)
throws IOException {
readWriteArea[TransformFactory.INPUT_ARRAY_INDEX] = input;
readWriteArea[TransformFactory.OUTPUT_ARRAY_INDEX] = output;
Arrays.fill(readWriteArea[TransformFactory.SCRATCH_ARRAY_INDEX], "");
if (!applyTransforms(dateInputTransforms, numberOfFieldsRead)) {
return false;
}
protected boolean transformTimeAndWrite(String[] record, long numberOfFieldsRead) throws IOException {
long epochMs;
try {
dateTransform.transform(readWriteArea);
} catch (TransformException e) {
epochMs = dateTransformer.transform(record[TIME_FIELD_OUT_INDEX]);
} catch (CannotParseTimestampException e) {
dataCountsReporter.reportDateParseError(numberOfFieldsRead);
logger.error(e.getMessage());
return false;
}
long epochMs = dateTransform.epochMs();
// Records have epoch seconds timestamp so compare for out of order in seconds
if (epochMs / MS_IN_SECOND < latestEpochMs / MS_IN_SECOND - analysisConfig.getLatency()) {
// out of order
@ -250,38 +148,17 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
return false;
}
// Now do the rest of the transforms
if (!applyTransforms(postDateTransforms, numberOfFieldsRead)) {
return false;
}
record[TIME_FIELD_OUT_INDEX] = Long.toString(epochMs / MS_IN_SECOND);
latestEpochMs = Math.max(latestEpochMs, epochMs);
latestEpochMsThisUpload = latestEpochMs;
autodetectProcess.writeRecord(output);
autodetectProcess.writeRecord(record);
dataCountsReporter.reportRecordWritten(numberOfFieldsRead, latestEpochMs);
return true;
}
/**
* If false then the transform is excluded
*/
private boolean applyTransforms(List<Transform> transforms, long inputFieldCount) {
for (Transform tr : transforms) {
try {
TransformResult result = tr.transform(readWriteArea);
if (result == TransformResult.EXCLUDE) {
return false;
}
} catch (TransformException e) {
logger.warn(e);
}
}
return true;
}
@Override
public void flush() throws IOException {
autodetectProcess.flushStream();
@ -289,16 +166,11 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
/**
* Get all the expected input fields i.e. all the fields we
* must see in the csv header.
* = transform input fields + analysis fields that aren't a transform output
* + the date field - the transform output field names
* must see in the csv header
*/
public final Collection<String> inputFields() {
final Collection<String> inputFields() {
Set<String> requiredFields = new HashSet<>(analysisConfig.analysisFields());
requiredFields.add(dataDescription.getTimeField());
requiredFields.addAll(transformConfigs.inputFieldNames());
requiredFields.removeAll(transformConfigs.outputFieldNames()); // inputs not in a transform
return requiredFields;
}
@ -321,21 +193,10 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
return fieldIndexes;
}
public Map<String, Integer> getInputFieldIndexes() {
Map<String, Integer> getInputFieldIndexes() {
return inFieldIndexes;
}
/**
* This output fields are the time field and all the fields
* configured for analysis
*/
public final Collection<String> outputFields() {
List<String> outputFields = new ArrayList<>(analysisConfig.analysisFields());
outputFields.add(dataDescription.getTimeField());
return outputFields;
}
/**
* Create indexes of the output fields.
* This is the time field and all the fields configured for analysis
@ -368,7 +229,7 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
* The number of fields used in the analysis field,
* the time field and (sometimes) the control field
*/
public int outputFieldCount() {
protected int outputFieldCount() {
return analysisConfig.analysisFields().size() + (includeControlField ? 2 : 1);
}
@ -376,63 +237,28 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
return outputFieldIndexes();
}
/**
* Find all the scratch area fields. These are those that are input to a
* transform but are not written to the output or read from input. i.e. for
* the case where a transforms output is used exclusively by another
* transform
* Create a map of input index to output index. This does not include the time or control fields.
*
* @param inputFields
* Fields we expect in the header
* @param outputFields
* Fields that are written to the analytics
* @param dateTimeField date field
* @param inFieldIndexes Map of field name to index in the input array
*/
protected final Map<String, Integer> scratchAreaIndexes(Collection<String> inputFields, Collection<String> outputFields,
String dateTimeField) {
Set<String> requiredFields = new HashSet<>(transformConfigs.outputFieldNames());
boolean dateTimeFieldIsTransformOutput = requiredFields.contains(dateTimeField);
requiredFields.addAll(transformConfigs.inputFieldNames());
requiredFields.removeAll(inputFields);
requiredFields.removeAll(outputFields);
// date time is a output of a transform AND the input to the date time transform
// so add it back into the scratch area
if (dateTimeFieldIsTransformOutput) {
requiredFields.add(dateTimeField);
}
int index = 0;
Map<String, Integer> result = new HashMap<String, Integer>();
for (String field : requiredFields) {
result.put(field, new Integer(index++));
}
return result;
}
/**
* For inputs that aren't transformed create a map of input index
* to output index. This does not include the time or control fields
*
* @param inFieldIndexes Map of field name -&gt; index in the input array
*/
protected final List<InputOutputMap> createInputOutputMap(Map<String, Integer> inFieldIndexes) {
// where no transform
private List<InputOutputMap> createInputOutputMap(Map<String, Integer> inFieldIndexes) {
List<InputOutputMap> inputOutputMap = new ArrayList<>();
int outIndex = TIME_FIELD_OUT_INDEX + 1;
int outIndex = TIME_FIELD_OUT_INDEX;
Integer inIndex = inFieldIndexes.get(dataDescription.getTimeField());
if (inIndex == null) {
throw new IllegalStateException(
String.format(Locale.ROOT, "Input time field '%s' not found", dataDescription.getTimeField()));
}
inputOutputMap.add(new InputOutputMap(inIndex, outIndex));
for (String field : analysisConfig.analysisFields()) {
Integer inIndex = inFieldIndexes.get(field);
++outIndex;
inIndex = inFieldIndexes.get(field);
if (inIndex != null) {
inputOutputMap.add(new InputOutputMap(inIndex, outIndex));
}
++outIndex;
}
return inputOutputMap;
@ -442,7 +268,6 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
return inputOutputMap;
}
/**
* Check that all the fields are present in the header.
* Either return true or throw a MissingFieldException
@ -453,7 +278,6 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
protected abstract boolean checkForMissingFields(Collection<String> inputFields, Map<String, Integer> inputFieldIndexes,
String[] header);
/**
* Input and output array indexes map
*/
@ -466,6 +290,4 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
outputIndex = out;
}
}
}

View File

@ -3,11 +3,11 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
public abstract class TransformException extends Exception {
public class CannotParseTimestampException extends Exception {
public TransformException(String message) {
super(message);
public CannotParseTimestampException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -6,12 +6,12 @@
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;
@ -36,6 +36,9 @@ import java.util.Map;
* line.
*/
class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
private static final Logger LOGGER = Loggers.getLogger(CsvDataToProcessWriter.class);
/**
* Maximum number of lines allowed within a single CSV record.
* <p>
@ -51,13 +54,13 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
public CsvDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess,
DataDescription dataDescription, AnalysisConfig analysisConfig,
TransformConfigs transforms, DataCountsReporter dataCountsReporter, Logger logger) {
super(includeControlField, autodetectProcess, dataDescription, analysisConfig, transforms, dataCountsReporter, logger);
DataCountsReporter dataCountsReporter) {
super(includeControlField, autodetectProcess, dataDescription, analysisConfig, dataCountsReporter, LOGGER);
}
/**
* Read the csv inputIndex, transform to length encoded values and pipe to
* the OutputStream. If any of the expected fields in the transform inputs,
* the OutputStream. If any of the expected fields in the
* analysis inputIndex or if the expected time field is missing from the CSV
* header a exception is thrown
*/
@ -74,15 +77,14 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
try (CsvListReader csvReader = new CsvListReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), csvPref)) {
String[] header = csvReader.getHeader(true);
if (header == null) { // null if EoF
return dataCountsReporter.incrementalStats();
}
long inputFieldCount = Math.max(header.length - 1, 0); // time field doesn't count
buildTransforms(header);
buildFieldIndexMapping(header);
//backing array for the inputIndex
// backing array for the inputIndex
String[] inputRecord = new String[header.length];
int maxIndex = 0;
@ -98,7 +100,7 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
Arrays.fill(record, "");
if (maxIndex >= line.size()) {
logger.warn("Not enough fields in csv record, expected at least " + maxIndex + ". " + line);
LOGGER.warn("Not enough fields in csv record, expected at least " + maxIndex + ". " + line);
for (InputOutputMap inOut : inputOutputMap) {
if (inOut.inputIndex >= line.size()) {
@ -117,7 +119,7 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
}
fillRecordFromLine(line, inputRecord);
applyTransformsAndWrite(inputRecord, record, inputFieldCount);
transformTimeAndWrite(record, inputFieldCount);
}
// This function can throw
@ -148,7 +150,7 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
String msg = String.format(Locale.ROOT, "Field configured for analysis '%s' is not in the CSV header '%s'",
field, Arrays.toString(header));
logger.error(msg);
LOGGER.error(msg);
throw new IllegalArgumentException(msg);
}
}

View File

@ -10,7 +10,6 @@ import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
/**
* Factory for creating the suitable writer depending on
@ -32,17 +31,14 @@ public final class DataToProcessWriterFactory {
*/
public static DataToProcessWriter create(boolean includeControlField, AutodetectProcess autodetectProcess,
DataDescription dataDescription, AnalysisConfig analysisConfig,
TransformConfigs transforms, DataCountsReporter dataCountsReporter, Logger logger) {
DataCountsReporter dataCountsReporter) {
switch (dataDescription.getFormat()) {
case JSON:
return new JsonDataToProcessWriter(includeControlField, autodetectProcess, dataDescription, analysisConfig,
transforms, dataCountsReporter, logger);
dataCountsReporter);
case DELIMITED:
return new CsvDataToProcessWriter(includeControlField, autodetectProcess, dataDescription, analysisConfig,
transforms, dataCountsReporter, logger);
case SINGLE_LINE:
return new SingleLineDataToProcessWriter(includeControlField, autodetectProcess, dataDescription, analysisConfig,
transforms, dataCountsReporter, logger);
dataCountsReporter);
default:
throw new IllegalArgumentException();
}

View File

@ -0,0 +1,38 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.elasticsearch.xpack.ml.utils.time.DateTimeFormatterTimestampConverter;
import org.elasticsearch.xpack.ml.utils.time.TimestampConverter;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.Locale;
/**
* A transformer that attempts to parse a String timestamp as a data according to a time format.
* It converts that to a long that represents the equivalent milliseconds since the epoch.
*/
public class DateFormatDateTransformer implements DateTransformer {
private final String timeFormat;
private final TimestampConverter dateToEpochConverter;
public DateFormatDateTransformer(String timeFormat) {
this.timeFormat = timeFormat;
dateToEpochConverter = DateTimeFormatterTimestampConverter.ofPattern(timeFormat, ZoneOffset.UTC);
}
@Override
public long transform(String timestamp) throws CannotParseTimestampException {
try {
return dateToEpochConverter.toEpochMillis(timestamp);
} catch (DateTimeParseException e) {
String message = String.format(Locale.ROOT, "Cannot parse date '%s' with format string '%s'", timestamp, timeFormat);
throw new CannotParseTimestampException(message, e);
}
}
}

View File

@ -0,0 +1,19 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
/**
* An interface for transforming a String timestamp into epoch_millis.
*/
public interface DateTransformer {
/**
*
* @param timestamp A String representing a timestamp
* @return Milliseconds since the epoch that the timestamp corresponds to
* @throws CannotParseTimestampException If the timestamp cannot be parsed
*/
long transform(String timestamp) throws CannotParseTimestampException;
}

View File

@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import java.util.Locale;
/**
* A transformer that attempts to parse a String timestamp
* as a double and convert that to a long that represents
* an epoch. If m_IsMillisecond is true, it will convert to seconds.
*/
public class DoubleDateTransformer implements DateTransformer {
private static final long MS_IN_SECOND = 1000;
private final boolean isMillisecond;
public DoubleDateTransformer(boolean isMillisecond) {
this.isMillisecond = isMillisecond;
}
@Override
public long transform(String timestamp) throws CannotParseTimestampException {
try {
long longValue = Double.valueOf(timestamp).longValue();
return isMillisecond ? longValue : longValue * MS_IN_SECOND;
} catch (NumberFormatException e) {
String message = String.format(Locale.ROOT, "Cannot parse timestamp '%s' as epoch value", timestamp);
throw new CannotParseTimestampException(message, e);
}
}
}

View File

@ -8,12 +8,12 @@ package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import java.io.IOException;
import java.io.InputStream;
@ -31,10 +31,11 @@ import java.util.Map;
*/
class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
private static final Logger LOGGER = Loggers.getLogger(JsonDataToProcessWriter.class);
public JsonDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess, DataDescription dataDescription,
AnalysisConfig analysisConfig, TransformConfigs transforms, DataCountsReporter dataCountsReporter,
Logger logger) {
super(includeControlField, autodetectProcess, dataDescription, analysisConfig, transforms, dataCountsReporter, logger);
AnalysisConfig analysisConfig, DataCountsReporter dataCountsReporter) {
super(includeControlField, autodetectProcess, dataDescription, analysisConfig, dataCountsReporter, LOGGER);
}
/**
@ -61,7 +62,7 @@ class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
private void writeJson(JsonParser parser) throws IOException {
Collection<String> analysisFields = inputFields();
buildTransforms(analysisFields.toArray(new String[0]));
buildFieldIndexMapping(analysisFields.toArray(new String[0]));
int numFields = outputFieldCount();
String[] input = new String[numFields];
@ -70,7 +71,7 @@ class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
// We never expect to get the control field
boolean[] gotFields = new boolean[analysisFields.size()];
JsonRecordReader recordReader = new SimpleJsonRecordReader(parser, inFieldIndexes, logger);
JsonRecordReader recordReader = new SimpleJsonRecordReader(parser, inFieldIndexes, LOGGER);
long inputFieldCount = recordReader.read(input, gotFields);
while (inputFieldCount >= 0) {
Arrays.fill(record, "");
@ -87,7 +88,7 @@ class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
record[inOut.outputIndex] = (field == null) ? "" : field;
}
applyTransformsAndWrite(input, record, inputFieldCount);
transformTimeAndWrite(record, inputFieldCount);
inputFieldCount = recordReader.read(input, gotFields);
}

View File

@ -1,71 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
/**
* This writer is used for reading inputIndex data that are unstructured and
* each record is a single line. The writer applies transforms and pipes
* the records into length encoded outputIndex.
* <p>
* This writer is expected only to be used in combination of transforms
* that will extract the time and the other fields used in the analysis.
* <p>
* Records for which no time can be extracted will be ignored.
*/
public class SingleLineDataToProcessWriter extends AbstractDataToProcessWriter {
private static final String RAW = "raw";
protected SingleLineDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess,
DataDescription dataDescription, AnalysisConfig analysisConfig,
TransformConfigs transformConfigs, DataCountsReporter dataCountsReporter, Logger logger) {
super(includeControlField, autodetectProcess, dataDescription, analysisConfig, transformConfigs, dataCountsReporter, logger);
}
@Override
public DataCounts write(InputStream inputStream) throws IOException {
dataCountsReporter.startNewIncrementalCount();
try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
String[] header = {RAW};
buildTransforms(header);
int numFields = outputFieldCount();
String[] record = new String[numFields];
for (String line = bufferedReader.readLine(); line != null;
line = bufferedReader.readLine()) {
Arrays.fill(record, "");
applyTransformsAndWrite(new String[]{line}, record, 1);
}
dataCountsReporter.finishReporting();
}
return dataCountsReporter.incrementalStats();
}
@Override
protected boolean checkForMissingFields(Collection<String> inputFields,
Map<String, Integer> inputFieldIndexes, String[] header) {
return true;
}
}

View File

@ -18,13 +18,13 @@ enum Level {
INFLUENCER("infl"),
PARTITION("part");
private final String m_Key;
private final String key;
Level(String key) {
m_Key = key;
this.key = key;
}
public String asString() {
return m_Key;
return key;
}
}

View File

@ -1,35 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.rest.validate;
import org.elasticsearch.client.node.NodeClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.action.AcknowledgedRestListener;
import org.elasticsearch.xpack.ml.MlPlugin;
import org.elasticsearch.xpack.ml.action.ValidateTransformAction;
import java.io.IOException;
public class RestValidateTransformAction extends BaseRestHandler {
public RestValidateTransformAction(Settings settings, RestController controller) {
super(settings);
controller.registerHandler(RestRequest.Method.POST, MlPlugin.BASE_PATH + "_validate/transform", this);
}
@Override
protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
XContentParser parser = restRequest.contentOrSourceParamParser();
ValidateTransformAction.Request validateDetectorRequest = ValidateTransformAction.Request.parseRequest(parser);
return channel ->
client.execute(ValidateTransformAction.INSTANCE, validateDetectorRequest, new AcknowledgedRestListener<>(channel));
}
}

View File

@ -1,35 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.rest.validate;
import org.elasticsearch.client.node.NodeClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.action.AcknowledgedRestListener;
import org.elasticsearch.xpack.ml.MlPlugin;
import org.elasticsearch.xpack.ml.action.ValidateTransformsAction;
import java.io.IOException;
public class RestValidateTransformsAction extends BaseRestHandler {
public RestValidateTransformsAction(Settings settings, RestController controller) {
super(settings);
controller.registerHandler(RestRequest.Method.POST, MlPlugin.BASE_PATH + "_validate/transforms", this);
}
@Override
protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
XContentParser parser = restRequest.contentOrSourceParamParser();
ValidateTransformsAction.Request validateDetectorRequest = ValidateTransformsAction.Request.PARSER.apply(parser, null);
return channel ->
client.execute(ValidateTransformsAction.INSTANCE, validateDetectorRequest, new AcknowledgedRestListener<>(channel));
}
}

View File

@ -1,56 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.StringJoiner;
import org.apache.logging.log4j.Logger;
/**
* Concatenate input fields
*/
public class Concat extends Transform {
private static final String EMPTY_STRING = "";
private final String delimiter;
public Concat(List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
delimiter = EMPTY_STRING;
}
public Concat(String join, List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
delimiter = join;
}
public String getDelimiter() {
return delimiter;
}
/**
* Concat has only 1 output field
*/
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
if (writeIndexes.isEmpty()) {
return TransformResult.FAIL;
}
TransformIndex writeIndex = writeIndexes.get(0);
StringJoiner joiner = new StringJoiner(delimiter);
for (TransformIndex i : readIndexes) {
joiner.add(readWriteArea[i.array][i.index]);
}
readWriteArea[writeIndex.array][writeIndex.index] = joiner.toString();
return TransformResult.OK;
}
}

View File

@ -1,173 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
/**
* Transform inputs and outputs can be chained together this class provides
* methods for finding the chains of dependencies is a list of transforms. The
* results are ordered list of transforms that should be executed in order
* starting at index 0
*/
public final class DependencySorter {
/**
* Hide public constructor
*/
private DependencySorter() {
}
/**
* For the input field get the chain of transforms that must be executed to
* get that field. The returned list is ordered so that the ones at the end
* of the list are dependent on those at the beginning.
* <p>
* Note if there is a circular dependency in the list of transforms this
* will cause a stack overflow. Check with
* {@linkplain org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier#checkForCircularDependencies(List)}
* first.
*
* @return List of transforms ordered by dependencies
*/
public static List<TransformConfig> findDependencies(String input, List<TransformConfig> transforms) {
return findDependencies(Arrays.asList(input), transforms);
}
/**
* For the list of input fields get the chain of transforms that must be
* executed to get those fields. The returned list is ordered so that the
* ones at the end of the list are dependent on those at the beginning
* <p>
* Note if there is a circular dependency in the list of transforms this
* will cause a stack overflow. Check with
* {@linkplain org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier#checkForCircularDependencies(List)}
* first.
*
* @return List of transforms ordered by dependencies
*/
public static List<TransformConfig> findDependencies(List<String> inputs, List<TransformConfig> transforms) {
List<TransformConfig> dependencies = new LinkedList<>();
ListIterator<TransformConfig> itr = transforms.listIterator();
while (itr.hasNext()) {
TransformConfig tc = itr.next();
for (String input : inputs) {
if (tc.getOutputs().contains(input)) {
findDependenciesRecursive(tc, transforms, dependencies);
}
}
}
return dependencies;
}
/**
* Recursively find the transform dependencies and add them to the
* dependency list
*
*/
private static void findDependenciesRecursive(TransformConfig transform, List<TransformConfig> transforms,
List<TransformConfig> dependencies) {
int index = dependencies.indexOf(transform);
if (index >= 0) {
return;
}
ListIterator<TransformConfig> itr = transforms.listIterator();
while (itr.hasNext()) {
TransformConfig tc = itr.next();
for (String input : transform.getInputs()) {
if (tc.getOutputs().contains(input)) {
findDependenciesRecursive(tc, transforms, dependencies);
}
}
}
dependencies.add(transform);
}
/**
* Return an ordered list of transforms (the same size as the input list)
* that sorted in terms of dependencies.
* <p>
* Note if there is a circular dependency in the list of transforms this
* will cause a stack overflow. Check with
* {@linkplain org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier#checkForCircularDependencies(List)}
* first.
*
* @return List of transforms ordered by dependencies
*/
public static List<TransformConfig> sortByDependency(List<TransformConfig> transforms) {
List<TransformConfig> orderedDependencies = new LinkedList<>();
List<TransformConfig> transformsCopy = new LinkedList<>(transforms);
transformsCopy = orderDependenciesRecursive(transformsCopy, orderedDependencies);
while (transformsCopy.isEmpty() == false) {
transformsCopy = orderDependenciesRecursive(transformsCopy, orderedDependencies);
}
return orderedDependencies;
}
/**
* Find the dependencies of the head of the <code>transforms</code> list
* adding them to the <code>dependencies</code> list. The returned list is a
* copy of the input <code>transforms</code> with the dependent transforms
* (i.e. those that have been ordered and add to <code>dependencies</code>)
* removed.
* <p>
* In the case where the input <code>transforms</code> list contains
* multiple chains of dependencies this function should be called multiple
* times using its return value as the input <code>transforms</code>
* parameter
* <p>
* To avoid concurrent modification of the transforms list a new copy is
* made for each recursive call and a new modified list returned
*
* @param dependencies
* Transforms are added to this list
* @return As transforms are moved from <code>transforms</code> to
* <code>dependencies</code> this list is a new copy of the
* <code>transforms</code> input with the moved transforms removed.
*/
private static List<TransformConfig> orderDependenciesRecursive(List<TransformConfig> transforms, List<TransformConfig> dependencies) {
if (transforms.isEmpty()) {
return transforms;
}
ListIterator<TransformConfig> itr = transforms.listIterator();
TransformConfig transform = itr.next();
itr.remove();
int index = dependencies.indexOf(transform);
if (index >= 0) {
return transforms;
}
while (itr.hasNext()) {
TransformConfig tc = itr.next();
for (String input : transform.getInputs()) {
if (tc.getOutputs().contains(input)) {
transforms = orderDependenciesRecursive(new LinkedList<TransformConfig>(transforms), dependencies);
itr = transforms.listIterator();
}
}
}
dependencies.add(transform);
return transforms;
}
}

View File

@ -1,34 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.Condition;
/**
* Abstract base class for exclude filters
*/
public abstract class ExcludeFilter extends Transform {
private final Condition condition;
/**
* The condition should have been verified by now and it <i>must</i> have a
* valid value &amp; operator
*/
public ExcludeFilter(Condition condition, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
this.condition = condition;
}
public Condition getCondition() {
return condition;
}
}

View File

@ -1,85 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
/**
* Parses a numeric value from a field and compares it against a hard
* value using a certain {@link Operator}
*/
public class ExcludeFilterNumeric extends ExcludeFilter {
private final double filterValue;
/**
* The condition should have been verified by now but if they are not valid
* then the default of &lt; (less than) and filter of 0.0 are used meaning
* that no values are excluded.
*/
public ExcludeFilterNumeric(Condition condition, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(condition, readIndexes, writeIndexes, logger);
filterValue = parseFilterValue(getCondition().getValue());
}
/**
* If no condition then the default is &lt; (less than) and filter value of
* 0.0 are used meaning that only -ve values are excluded.
*/
public ExcludeFilterNumeric(List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(new Condition(Operator.LT, "0.0"),
readIndexes, writeIndexes, logger);
filterValue = 0.0;
}
private double parseFilterValue(String fieldValue) {
double result = 0.0;
try {
result = Double.parseDouble(fieldValue);
} catch (NumberFormatException e) {
logger.warn("Exclude transform cannot parse a number from field '" + fieldValue + "'. Using default 0.0");
}
return result;
}
/**
* Returns {@link TransformResult#EXCLUDE} if the value should be excluded
*/
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
TransformResult result = TransformResult.OK;
for (TransformIndex readIndex : readIndexes) {
String field = readWriteArea[readIndex.array][readIndex.index];
try {
double value = Double.parseDouble(field);
if (getCondition().getOperator().test(value, filterValue)) {
result = TransformResult.EXCLUDE;
break;
}
} catch (NumberFormatException e) {
}
}
return result;
}
public double filterValue() {
return filterValue;
}
}

View File

@ -1,49 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.Condition;
/**
* Matches a field against a regex
*/
public class ExcludeFilterRegex extends ExcludeFilter {
private final Pattern pattern;
public ExcludeFilterRegex(Condition condition, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(condition, readIndexes, writeIndexes, logger);
pattern = Pattern.compile(getCondition().getValue());
}
/**
* Returns {@link TransformResult#EXCLUDE} if the record matches the regex
*/
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
TransformResult result = TransformResult.OK;
for (TransformIndex readIndex : readIndexes) {
String field = readWriteArea[readIndex.array][readIndex.index];
Matcher match = pattern.matcher(field);
if (match.matches()) {
result = TransformResult.EXCLUDE;
break;
}
}
return result;
}
}

View File

@ -1,47 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import org.apache.logging.log4j.Logger;
/**
* Split a hostname into Highest Registered Domain and sub domain.
* TODO Reimplement porting the code from C++
*/
public class HighestRegisteredDomain extends Transform {
/**
* Immutable class for the domain split results
*/
public static class DomainSplit {
private String subDomain;
private String highestRegisteredDomain;
private DomainSplit(String subDomain, String highestRegisteredDomain) {
this.subDomain = subDomain;
this.highestRegisteredDomain = highestRegisteredDomain;
}
public String getSubDomain() {
return subDomain;
}
public String getHighestRegisteredDomain() {
return highestRegisteredDomain;
}
}
public HighestRegisteredDomain(List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
}
@Override
public TransformResult transform(String[][] readWriteArea) {
return TransformResult.FAIL;
}
}

View File

@ -1,46 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.Logger;
public class RegexExtract extends Transform {
private final Pattern pattern;
public RegexExtract(String regex, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
pattern = Pattern.compile(regex);
}
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
TransformIndex readIndex = readIndexes.get(0);
String field = readWriteArea[readIndex.array][readIndex.index];
Matcher match = pattern.matcher(field);
if (match.find()) {
int maxMatches = Math.min(writeIndexes.size(), match.groupCount());
for (int i = 0; i < maxMatches; i++) {
TransformIndex index = writeIndexes.get(i);
readWriteArea[index.array][index.index] = match.group(i + 1);
}
return TransformResult.OK;
} else {
logger.warn("Transform 'extract' failed to match field: " + field);
}
return TransformResult.FAIL;
}
}

View File

@ -1,53 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
import org.apache.logging.log4j.Logger;
public class RegexSplit extends Transform {
private final Pattern pattern;
public RegexSplit(String regex, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
pattern = Pattern.compile(regex);
}
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
TransformIndex readIndex = readIndexes.get(0);
String field = readWriteArea[readIndex.array][readIndex.index];
String[] split = pattern.split(field);
warnIfOutputCountIsNotMatched(split.length, field);
int count = Math.min(split.length, writeIndexes.size());
for (int i = 0; i < count; i++) {
TransformIndex index = writeIndexes.get(i);
readWriteArea[index.array][index.index] = split[i];
}
return TransformResult.OK;
}
private void warnIfOutputCountIsNotMatched(int splitCount, String field) {
if (splitCount != writeIndexes.size()) {
String warning = String.format(Locale.ROOT,
"Transform 'split' has %d output(s) but splitting value '%s' resulted to %d part(s)",
writeIndexes.size(), field, splitCount);
logger.warn(warning);
}
}
}

View File

@ -1,49 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.Locale;
import java.util.function.Function;
import org.apache.logging.log4j.Logger;
public class StringTransform extends Transform {
private final Function<String, String> convertFunction;
private StringTransform(Function<String, String> convertFunction,
List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
this.convertFunction = convertFunction;
if (readIndexes.size() != 1 || writeIndexes.size() != 1) {
throw new IllegalArgumentException();
}
}
@Override
public TransformResult transform(String[][] readWriteArea) throws TransformException {
TransformIndex readIndex = readIndexes.get(0);
TransformIndex writeIndex = writeIndexes.get(0);
String input = readWriteArea[readIndex.array][readIndex.index];
readWriteArea[writeIndex.array][writeIndex.index] = convertFunction.apply(input);
return TransformResult.OK;
}
public static StringTransform createLowerCase(List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
return new StringTransform(s -> s.toLowerCase(Locale.ROOT), readIndexes, writeIndexes, logger);
}
public static StringTransform createUpperCase(List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
return new StringTransform(s -> s.toUpperCase(Locale.ROOT), readIndexes, writeIndexes, logger);
}
public static StringTransform createTrim(List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
return new StringTransform(s -> s.trim(), readIndexes, writeIndexes, logger);
}
}

View File

@ -1,103 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.Objects;
import org.apache.logging.log4j.Logger;
/**
* Abstract transform class.
* Instances are created with maps telling it which field(s)
* to read from in the input array and where to write to.
* The read/write area is passed in the {@linkplain #transform(String[][])}
* function.
* <p>
* Some transforms may fail and we will continue processing for
* others a failure is terminal meaning the record should not be
* processed further
*/
public abstract class Transform {
/**
* OK means the transform was successful,
* FAIL means the transform failed but it's ok to continue processing
* EXCLUDE means the no further processing should take place and the record discarded
*/
public enum TransformResult {
OK, FAIL, EXCLUDE
}
public static class TransformIndex {
public final int array;
public final int index;
public TransformIndex(int a, int b) {
this.array = a;
this.index = b;
}
@Override
public int hashCode() {
return Objects.hash(array, index);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
TransformIndex other = (TransformIndex) obj;
return Objects.equals(this.array, other.array)
&& Objects.equals(this.index, other.index);
}
}
protected final Logger logger;
protected final List<TransformIndex> readIndexes;
protected final List<TransformIndex> writeIndexes;
/**
* @param readIndexes Read inputs from these indexes
* @param writeIndexes Outputs are written to these indexes
* @param logger Transform results go into these indexes
*/
public Transform(List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
this.logger = logger;
this.readIndexes = readIndexes;
this.writeIndexes = writeIndexes;
}
/**
* The indexes for the inputs
*/
public final List<TransformIndex> getReadIndexes() {
return readIndexes;
}
/**
* The write output indexes
*/
public final List<TransformIndex> getWriteIndexes() {
return writeIndexes;
}
/**
* Transform function.
* The read write array of arrays area typically contains an input array,
* scratch area array and the output array. The scratch area is used in the
* case where the transform is chained so reads/writes to an intermediate area
*/
public abstract TransformResult transform(String[][] readWriteArea)
throws TransformException;
}

View File

@ -1,122 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
/**
* Create transforms from the configuration object.
* Transforms need to know where to read strings from and where
* write the output to hence input and output maps required by the
* create method.
*/
public class TransformFactory {
public static final int INPUT_ARRAY_INDEX = 0;
public static final int SCRATCH_ARRAY_INDEX = 1;
public static final int OUTPUT_ARRAY_INDEX = 2;
public Transform create(TransformConfig transformConfig,
Map<String, Integer> inputIndexesMap,
Map<String, Integer> scratchAreaIndexesMap,
Map<String, Integer> outputIndexesMap,
Logger logger) {
int[] input = new int[transformConfig.getInputs().size()];
fillIndexArray(transformConfig.getInputs(), inputIndexesMap, input);
List<TransformIndex> readIndexes = new ArrayList<>();
for (String field : transformConfig.getInputs()) {
Integer index = inputIndexesMap.get(field);
if (index != null) {
readIndexes.add(new TransformIndex(INPUT_ARRAY_INDEX, index));
} else {
index = scratchAreaIndexesMap.get(field);
if (index != null) {
readIndexes.add(new TransformIndex(SCRATCH_ARRAY_INDEX, index));
} else if (outputIndexesMap.containsKey(field)) { // also check the outputs array for this input
index = outputIndexesMap.get(field);
readIndexes.add(new TransformIndex(SCRATCH_ARRAY_INDEX, index));
} else {
throw new IllegalStateException("Transform input '" + field +
"' cannot be found");
}
}
}
List<TransformIndex> writeIndexes = new ArrayList<>();
for (String field : transformConfig.getOutputs()) {
Integer index = outputIndexesMap.get(field);
if (index != null) {
writeIndexes.add(new TransformIndex(OUTPUT_ARRAY_INDEX, index));
} else {
index = scratchAreaIndexesMap.get(field);
if (index != null) {
writeIndexes.add(new TransformIndex(SCRATCH_ARRAY_INDEX, index));
}
}
}
TransformType type = transformConfig.type();
switch (type) {
case DOMAIN_SPLIT:
return new HighestRegisteredDomain(readIndexes, writeIndexes, logger);
case CONCAT:
if (transformConfig.getArguments().isEmpty()) {
return new Concat(readIndexes, writeIndexes, logger);
} else {
return new Concat(transformConfig.getArguments().get(0),
readIndexes, writeIndexes, logger);
}
case REGEX_EXTRACT:
return new RegexExtract(transformConfig.getArguments().get(0), readIndexes,
writeIndexes, logger);
case REGEX_SPLIT:
return new RegexSplit(transformConfig.getArguments().get(0), readIndexes,
writeIndexes, logger);
case EXCLUDE:
if (transformConfig.getCondition().getOperator().expectsANumericArgument()) {
return new ExcludeFilterNumeric(transformConfig.getCondition(),
readIndexes, writeIndexes, logger);
} else {
return new ExcludeFilterRegex(transformConfig.getCondition(), readIndexes,
writeIndexes, logger);
}
case LOWERCASE:
return StringTransform.createLowerCase(readIndexes, writeIndexes, logger);
case UPPERCASE:
return StringTransform.createUpperCase(readIndexes, writeIndexes, logger);
case TRIM:
return StringTransform.createTrim(readIndexes, writeIndexes, logger);
default:
// This code will never be hit - it's to
// keep the compiler happy.
throw new IllegalArgumentException("Unknown transform type " + type);
}
}
/**
* For each <code>field</code> fill the <code>indexArray</code>
* with the index from the <code>indexes</code> map.
*/
private static void fillIndexArray(List<String> fields, Map<String, Integer> indexes,
int[] indexArray) {
int i = 0;
for (String field : fields) {
Integer index = indexes.get(field);
if (index != null) {
indexArray[i++] = index;
}
}
}
}

View File

@ -1,43 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.transforms.TransformException;
import org.elasticsearch.xpack.ml.utils.time.DateTimeFormatterTimestampConverter;
import org.elasticsearch.xpack.ml.utils.time.TimestampConverter;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.List;
import java.util.Locale;
/**
* A transform that attempts to parse a String timestamp
* according to a timeFormat. It converts that
* to a long that represents the equivalent epoch.
*/
public class DateFormatTransform extends DateTransform {
private final String timeFormat;
private final TimestampConverter dateToEpochConverter;
public DateFormatTransform(String timeFormat, List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
this.timeFormat = timeFormat;
dateToEpochConverter = DateTimeFormatterTimestampConverter.ofPattern(timeFormat, ZoneOffset.UTC);
}
@Override
protected long toEpochMs(String field) throws TransformException {
try {
return dateToEpochConverter.toEpochMillis(field);
} catch (DateTimeParseException pe) {
String message = String.format(Locale.ROOT, "Cannot parse date '%s' with format string '%s'", field, timeFormat);
throw new ParseTimestampException(message);
}
}
}

View File

@ -1,62 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.transforms.Transform;
import org.elasticsearch.xpack.ml.transforms.TransformException;
/**
* Abstract class introduces the {@link #epochMs()} method for
* date transforms
*/
public abstract class DateTransform extends Transform {
protected static final int SECONDS_TO_MS = 1000;
private long epochMs;
public DateTransform(List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
}
/**
* The epoch time from the last transform
*/
public long epochMs() {
return epochMs;
}
/**
* Expects 1 input and 1 output.
*/
@Override
public final TransformResult transform(String[][] readWriteArea) throws TransformException {
if (readIndexes.isEmpty()) {
throw new ParseTimestampException("Cannot parse null string");
}
if (writeIndexes.isEmpty()) {
throw new ParseTimestampException("No write index for the datetime format transform");
}
TransformIndex i = readIndexes.get(0);
String field = readWriteArea[i.array][i.index];
if (field == null) {
throw new ParseTimestampException("Cannot parse null string");
}
epochMs = toEpochMs(field);
TransformIndex writeIndex = writeIndexes.get(0);
readWriteArea[writeIndex.array][writeIndex.index] = Long.toString(epochMs / SECONDS_TO_MS);
return TransformResult.OK;
}
protected abstract long toEpochMs(String field) throws TransformException;
}

View File

@ -1,42 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import java.util.List;
import java.util.Locale;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.transforms.TransformException;
/**
* A transformer that attempts to parse a String timestamp
* as a double and convert that to a long that represents
* an epoch time in seconds.
* If isMillisecond is true, it assumes the number represents
* time in milli-seconds and will convert to seconds
*/
public class DoubleDateTransform extends DateTransform {
private final boolean isMillisecond;
public DoubleDateTransform(boolean isMillisecond, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
this.isMillisecond = isMillisecond;
}
@Override
protected long toEpochMs(String field) throws TransformException {
try {
long longValue = Double.valueOf(field).longValue();
return isMillisecond ? longValue : longValue * SECONDS_TO_MS;
} catch (NumberFormatException e) {
String message = String.format(Locale.ROOT, "Cannot parse timestamp '%s' as epoch value", field);
throw new ParseTimestampException(message);
}
}
}

View File

@ -1,16 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import org.elasticsearch.xpack.ml.transforms.TransformException;
public class ParseTimestampException extends TransformException {
public ParseTimestampException(String message) {
super(message);
}
}

View File

@ -56,7 +56,6 @@ job.config.condition.invalid.value.null = Invalid condition: the value field can
job.config.condition.invalid.value.numeric = Invalid condition value: cannot parse a double from string ''{0}''
job.config.condition.invalid.value.regex = Invalid condition value: ''{0}'' is not a valid regular expression
job.config.condition.unknown.operator = Unknown condition operator ''{0}''
job.config.dataformat.requires.transform = When the data format is {0}, transforms are required.
job.config.detectionrule.condition.categorical.invalid.option = Invalid detector rule: a categorical rule_condition does not support {0}
job.config.detectionrule.condition.categorical.missing.option = Invalid detector rule: a categorical rule_condition requires {0} to be set
job.config.detectionrule.condition.invalid.fieldname = Invalid detector rule: field_name has to be one of {0}; actual was ''{1}''
@ -118,19 +117,6 @@ job.config.update.results.retention.days.invalid = Invalid update value for resu
job.config.update.datafeed.config.parse.error = JSON parse error reading the update value for datafeed_config
job.config.update.datafeed.config.cannot.be.null = Invalid update value for datafeed_config: null
job.config.transform.circular.dependency = Transform type {0} with inputs {1} has a circular dependency
job.config.transform.condition.required = A condition must be defined for transform ''{0}''
job.config.transform.duplicated.output.name = Transform ''{0}'' has an output with the same name as the summary count field. Transform outputs cannot use the summary count field, please review your configuration
job.config.transform.extract.groups.should.match.output.count = Transform ''{0}'' expects {1} output(s) but regex ''{2}'' captures {3} group(s)
job.config.transform.inputs.contain.empty.string = Transform type {0} contains empty input
job.config.transform.invalid.argument = Transform ''{0}'' has invalid argument ''{1}''
job.config.transform.invalid.argument.count = Transform type {0} expected {1} argument(s), got {2}
job.config.transform.invalid.input.count = Transform type {0} expected {1} input(s), got {2}
job.config.transform.invalid.output.count = Transform type {0} expected {1} output(s), got {2}
job.config.transform.outputs.contain.empty.string = Transform type {0} contains empty output
job.config.transform.outputs.unused = None of the outputs of transform ''{0}'' are used. Please review your configuration
job.config.transform.output.name.used.more.than.once = Transform output name ''{0}'' is used more than once
job.config.transform.unknown.type = Unknown TransformType ''{0}''
job.config.unknown.function = Unknown function ''{0}''
job.index.already.exists = Cannot create index ''{0}'' as it already exists
@ -164,9 +150,6 @@ json.job.config.parse.error = JSON parse error reading the job configuration
json.detector.config.mapping.error = JSON mapping error reading the detector configuration
json.detector.config.parse.error = JSON parse error reading the detector configuration
json.transform.config.mapping.error = JSON mapping error reading the transform configuration
json.transform.config.parse.error = JSON parse error reading the transform configuration
rest.action.not.allowed.for.datafeed.job = This action is not allowed for a datafeed job
rest.invalid.datetime.params = Query param ''{0}'' with value ''{1}'' cannot be parsed as a date or converted to a number (epoch).

View File

@ -14,8 +14,6 @@ import org.elasticsearch.xpack.ml.job.config.IgnoreDowntime;
import org.elasticsearch.xpack.ml.job.config.Job;
import org.elasticsearch.xpack.ml.job.config.ModelDebugConfig;
import org.elasticsearch.xpack.ml.action.util.QueryPage;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.support.AbstractStreamableTestCase;
import java.util.ArrayList;
@ -43,11 +41,6 @@ public class GetJobsActionResponseTests extends AbstractStreamableTestCase<GetJo
Collections.singletonList(new Detector.Builder("metric", "some_field").build())).build();
AnalysisLimits analysisLimits = new AnalysisLimits(randomNonNegativeLong(), randomNonNegativeLong());
DataDescription dataDescription = randomBoolean() ? new DataDescription.Builder().build() : null;
int numTransformers = randomIntBetween(0, 32);
List<TransformConfig> transformConfigList = new ArrayList<>(numTransformers);
for (int i = 0; i < numTransformers; i++) {
transformConfigList.add(new TransformConfig(TransformType.UPPERCASE.prettyName()));
}
ModelDebugConfig modelDebugConfig = randomBoolean() ? new ModelDebugConfig(randomDouble(), randomAsciiOfLength(10)) : null;
IgnoreDowntime ignoreDowntime = randomFrom(IgnoreDowntime.values());
Long normalizationWindowDays = randomBoolean() ? randomLong() : null;
@ -59,7 +52,7 @@ public class GetJobsActionResponseTests extends AbstractStreamableTestCase<GetJo
String modelSnapshotId = randomBoolean() ? randomAsciiOfLength(10) : null;
String indexName = randomAsciiOfLength(10);
Job job = new Job(jobId, description, createTime, finishedTime, lastDataTime,
timeout, analysisConfig, analysisLimits, dataDescription, transformConfigList,
timeout, analysisConfig, analysisLimits, dataDescription,
modelDebugConfig, ignoreDowntime, normalizationWindowDays, backgroundPersistInterval,
modelSnapshotRetentionDays, resultsRetentionDays, customConfig, modelSnapshotId, indexName);

View File

@ -1,33 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.action;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.xpack.ml.action.ValidateTransformAction.Request;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.support.AbstractStreamableXContentTestCase;
public class ValidateTransformActionRequestTests extends AbstractStreamableXContentTestCase<ValidateTransformAction.Request> {
@Override
protected Request createTestInstance() {
TransformType transformType = randomFrom(TransformType.values());
TransformConfig transform = new TransformConfig(transformType.prettyName());
return new Request(transform);
}
@Override
protected Request createBlankInstance() {
return new Request();
}
@Override
protected Request parseInstance(XContentParser parser) {
return Request.parseRequest(parser);
}
}

View File

@ -1,41 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.action;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.xpack.ml.action.ValidateTransformsAction.Request;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.support.AbstractStreamableXContentTestCase;
import java.util.ArrayList;
import java.util.List;
public class ValidateTransformsActionRequestTests extends AbstractStreamableXContentTestCase<ValidateTransformsAction.Request> {
@Override
protected Request createTestInstance() {
int size = randomInt(10);
List<TransformConfig> transforms = new ArrayList<>();
for (int i = 0; i < size; i++) {
TransformType transformType = randomFrom(TransformType.values());
TransformConfig transform = new TransformConfig(transformType.prettyName());
transforms.add(transform);
}
return new Request(transforms);
}
@Override
protected Request createBlankInstance() {
return new Request();
}
@Override
protected Request parseInstance(XContentParser parser) {
return Request.PARSER.apply(parser, null);
}
}

View File

@ -67,13 +67,6 @@ public class ConditionTests extends AbstractSerializingTestCase<Condition> {
return Condition.PARSER.apply(parser, null);
}
public void testInvalidTransformName() throws Exception {
BytesArray json = new BytesArray("{ \"value\":\"someValue\" }");
XContentParser parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> Condition.PARSER.apply(parser, null));
assertThat(ex.getMessage(), containsString("Required [operator]"));
}
public void testVerifyArgsNumericArgs() {
new Condition(Operator.LTE, "100");
new Condition(Operator.GT, "10.0");

View File

@ -25,15 +25,11 @@ public class DataFormatTests extends ESTestCase {
assertEquals(DataFormat.JSON, DataFormat.forString("json"));
assertEquals(DataFormat.JSON, DataFormat.forString("JSON"));
assertEquals(DataFormat.SINGLE_LINE, DataFormat.forString("single_line"));
assertEquals(DataFormat.SINGLE_LINE, DataFormat.forString("SINGLE_LINE"));
}
public void testValidOrdinals() {
assertThat(DataFormat.JSON.ordinal(), equalTo(0));
assertThat(DataFormat.DELIMITED.ordinal(), equalTo(1));
assertThat(DataFormat.SINGLE_LINE.ordinal(), equalTo(2));
}
public void testwriteTo() throws Exception {
@ -50,13 +46,6 @@ public class DataFormatTests extends ESTestCase {
assertThat(in.readVInt(), equalTo(1));
}
}
try (BytesStreamOutput out = new BytesStreamOutput()) {
DataFormat.SINGLE_LINE.writeTo(out);
try (StreamInput in = out.bytes().streamInput()) {
assertThat(in.readVInt(), equalTo(2));
}
}
}
public void testReadFrom() throws Exception {
@ -72,12 +61,6 @@ public class DataFormatTests extends ESTestCase {
assertThat(DataFormat.readFromStream(in), equalTo(DataFormat.DELIMITED));
}
}
try (BytesStreamOutput out = new BytesStreamOutput()) {
out.writeVInt(2);
try (StreamInput in = out.bytes().streamInput()) {
assertThat(DataFormat.readFromStream(in), equalTo(DataFormat.SINGLE_LINE));
}
}
}
public void testInvalidReadFrom() throws Exception {

View File

@ -10,8 +10,6 @@ import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.support.AbstractSerializingTestCase;
import java.util.ArrayList;
@ -58,7 +56,6 @@ public class JobTests extends AbstractSerializingTestCase<Job> {
assertNull(job.getBackgroundPersistInterval());
assertNull(job.getModelSnapshotRetentionDays());
assertNull(job.getResultsRetentionDays());
assertEquals(Collections.emptyList(), job.getTransforms());
assertNotNull(job.allFields());
assertFalse(job.allFields().isEmpty());
}
@ -298,92 +295,6 @@ public class JobTests extends AbstractSerializingTestCase<Job> {
builder.build();
}
public void testCheckTransformOutputIsUsed_throws() {
Job.Builder builder = buildJobBuilder("foo");
TransformConfig tc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
tc.setInputs(Arrays.asList("dns"));
builder.setTransforms(Arrays.asList(tc));
expectThrows(IllegalArgumentException.class, builder::build);
Detector.Builder newDetector = new Detector.Builder();
newDetector.setFunction(Detector.MIN);
newDetector.setFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
AnalysisConfig.Builder config = new AnalysisConfig.Builder(Collections.singletonList(newDetector.build()));
builder.setAnalysisConfig(config);
builder.build();
}
public void testCheckTransformDuplicatOutput_outputIsSummaryCountField() {
Job.Builder builder = buildJobBuilder("foo");
AnalysisConfig.Builder config = createAnalysisConfig();
config.setSummaryCountFieldName("summaryCountField");
builder.setAnalysisConfig(config);
TransformConfig tc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
tc.setInputs(Arrays.asList("dns"));
tc.setOutputs(Arrays.asList("summaryCountField"));
builder.setTransforms(Arrays.asList(tc));
expectThrows(IllegalArgumentException.class, builder::build);
}
public void testCheckTransformOutputIsUsed_outputIsSummaryCountField() {
Job.Builder builder = buildJobBuilder("foo");
TransformConfig tc = new TransformConfig(TransformType.Names.EXTRACT_NAME);
tc.setInputs(Arrays.asList("dns"));
tc.setOutputs(Arrays.asList("summaryCountField"));
tc.setArguments(Arrays.asList("(.*)"));
builder.setTransforms(Arrays.asList(tc));
expectThrows(IllegalArgumentException.class, builder::build);
}
public void testCheckTransformOutputIsUsed_transformHasNoOutput() {
Job.Builder builder = buildJobBuilder("foo");
// The exclude filter has no output
TransformConfig tc = new TransformConfig(TransformType.Names.EXCLUDE_NAME);
tc.setCondition(new Condition(Operator.MATCH, "whitelisted_host"));
tc.setInputs(Arrays.asList("dns"));
builder.setTransforms(Arrays.asList(tc));
builder.build();
}
public void testVerify_GivenDataFormatIsSingleLineAndNullTransforms() {
String errorMessage = Messages.getMessage(
Messages.JOB_CONFIG_DATAFORMAT_REQUIRES_TRANSFORM,
DataDescription.DataFormat.SINGLE_LINE);
Job.Builder builder = buildJobBuilder("foo");
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataDescription.DataFormat.SINGLE_LINE);
builder.setDataDescription(dataDescription);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, builder::build);
assertEquals(errorMessage, e.getMessage());
}
public void testVerify_GivenDataFormatIsSingleLineAndEmptyTransforms() {
String errorMessage = Messages.getMessage(
Messages.JOB_CONFIG_DATAFORMAT_REQUIRES_TRANSFORM,
DataDescription.DataFormat.SINGLE_LINE);
Job.Builder builder = buildJobBuilder("foo");
builder.setTransforms(new ArrayList<>());
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataDescription.DataFormat.SINGLE_LINE);
builder.setDataDescription(dataDescription);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, builder::build);
assertEquals(errorMessage, e.getMessage());
}
public void testVerify_GivenDataFormatIsSingleLineAndNonEmptyTransforms() {
ArrayList<TransformConfig> transforms = new ArrayList<>();
TransformConfig transform = new TransformConfig("trim");
transform.setInputs(Arrays.asList("raw"));
transform.setOutputs(Arrays.asList("time"));
transforms.add(transform);
Job.Builder builder = buildJobBuilder("foo");
builder.setTransforms(transforms);
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataDescription.DataFormat.SINGLE_LINE);
builder.setDataDescription(dataDescription);
builder.build();
}
public void testVerify_GivenNegativeRenormalizationWindowDays() {
String errorMessage = Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW,
"renormalizationWindowDays", 0, -1);
@ -488,23 +399,12 @@ public class JobTests extends AbstractSerializingTestCase<Job> {
builder.setDataDescription(dataDescription);
}
String[] outputs;
TransformType[] transformTypes ;
AnalysisConfig ac = analysisConfig.build();
if (randomBoolean()) {
transformTypes = new TransformType[] {TransformType.TRIM, TransformType.LOWERCASE};
outputs = new String[] {ac.getDetectors().get(0).getFieldName(), ac.getDetectors().get(0).getOverFieldName()};
} else {
transformTypes = new TransformType[] {TransformType.TRIM};
outputs = new String[] {ac.getDetectors().get(0).getFieldName()};
}
List<TransformConfig> transformConfigList = new ArrayList<>(transformTypes.length);
for (int i = 0; i < transformTypes.length; i++) {
TransformConfig tc = new TransformConfig(transformTypes[i].prettyName());
tc.setInputs(Collections.singletonList("input" + i));
tc.setOutputs(Collections.singletonList(outputs[i]));
transformConfigList.add(tc);
}
builder.setTransforms(transformConfigList);
if (randomBoolean()) {
builder.setModelDebugConfig(new ModelDebugConfig(randomDouble(), randomAsciiOfLength(10)));
}

View File

@ -1,196 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.support.AbstractSerializingTestCase;
import java.util.Arrays;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.instanceOf;
public class TransformConfigTests extends AbstractSerializingTestCase<TransformConfig> {
@Override
protected TransformConfig createTestInstance() {
TransformType transformType = randomFrom(TransformType.values());
TransformConfig config = new TransformConfig(transformType.prettyName());
if (randomBoolean()) {
config.setInputs(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
config.setOutputs(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
config.setArguments(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
// no need to randomize, it is properly randomily tested in ConditionTest
config.setCondition(new Condition(Operator.LT, Double.toString(randomDouble())));
}
return config;
}
@Override
protected Writeable.Reader<TransformConfig> instanceReader() {
return TransformConfig::new;
}
@Override
protected TransformConfig parseInstance(XContentParser parser) {
return TransformConfig.PARSER.apply(parser, null);
}
public void testGetOutputs_GivenNoExplicitOutputsSpecified() {
TransformConfig config = new TransformConfig("concat");
assertEquals(Arrays.asList("concat"), config.getOutputs());
}
public void testGetOutputs_GivenEmptyOutputsSpecified() {
TransformConfig config = new TransformConfig("concat");
assertEquals(Arrays.asList("concat"), config.getOutputs());
}
public void testGetOutputs_GivenOutputsSpecified() {
TransformConfig config = new TransformConfig("concat");
config.setOutputs(Arrays.asList("o1", "o2"));
assertEquals(Arrays.asList("o1", "o2"), config.getOutputs());
}
public void testVerify_GivenUnknownTransform() {
ESTestCase.expectThrows(IllegalArgumentException.class, () -> new TransformConfig("unknown+transform"));
}
public void testEquals_GivenSameReference() {
TransformConfig config = new TransformConfig(TransformType.CONCAT.prettyName());
assertTrue(config.equals(config));
}
public void testEquals_GivenDifferentClass() {
TransformConfig config = new TransformConfig(TransformType.CONCAT.prettyName());
assertFalse(config.equals("a string"));
}
public void testEquals_GivenNull() {
TransformConfig config = new TransformConfig(TransformType.CONCAT.prettyName());
assertFalse(config.equals(null));
}
public void testEquals_GivenEqualTransform() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
config1.setOutputs(Arrays.asList("output"));
config1.setArguments(Arrays.asList("-"));
config1.setCondition(new Condition(Operator.EQ, "5"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input2"));
config2.setOutputs(Arrays.asList("output"));
config2.setArguments(Arrays.asList("-"));
config2.setCondition(new Condition(Operator.EQ, "5"));
assertTrue(config1.equals(config2));
assertTrue(config2.equals(config1));
}
public void testEquals_GivenDifferentType() {
TransformConfig config1 = new TransformConfig("concat");
TransformConfig config2 = new TransformConfig("lowercase");
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentInputs() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input3"));
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentOutputs() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
config1.setOutputs(Arrays.asList("output1"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input2"));
config2.setOutputs(Arrays.asList("output2"));
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentArguments() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
config1.setOutputs(Arrays.asList("output"));
config1.setArguments(Arrays.asList("-"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input2"));
config2.setOutputs(Arrays.asList("output"));
config2.setArguments(Arrays.asList("--"));
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentConditions() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
config1.setOutputs(Arrays.asList("output"));
config1.setArguments(Arrays.asList("-"));
config1.setCondition(new Condition(Operator.MATCH, "foo"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input2"));
config2.setOutputs(Arrays.asList("output"));
config2.setArguments(Arrays.asList("-"));
config2.setCondition(new Condition(Operator.MATCH, "bar"));
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testInvalidTransformName() throws Exception {
BytesArray json = new BytesArray("{ \"transform\":\"\" }");
XContentParser parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
ParsingException ex = expectThrows(ParsingException.class,
() -> TransformConfig.PARSER.apply(parser, null));
assertThat(ex.getMessage(), containsString("[transform] failed to parse field [transform]"));
Throwable cause = ex.getRootCause();
assertNotNull(cause);
assertThat(cause, instanceOf(IllegalArgumentException.class));
assertThat(cause.getMessage(),
containsString("Unknown [transformType]: []"));
}
}

View File

@ -1,81 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.support.AbstractSerializingTestCase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class TransformConfigsTests extends AbstractSerializingTestCase<TransformConfigs> {
@Override
protected TransformConfigs createTestInstance() {
int size = randomInt(10);
List<TransformConfig> transforms = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
TransformType transformType = randomFrom(TransformType.values());
TransformConfig config = new TransformConfig(transformType.prettyName());
if (randomBoolean()) {
config.setInputs(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
config.setOutputs(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
config.setArguments(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
// no need to randomize, it is properly randomily tested in ConditionTest
config.setCondition(new Condition(Operator.EQ, Double.toString(randomDouble())));
}
transforms.add(config);
}
return new TransformConfigs(transforms);
}
@Override
protected Reader<TransformConfigs> instanceReader() {
return TransformConfigs::new;
}
@Override
protected TransformConfigs parseInstance(XContentParser parser) {
return TransformConfigs.PARSER.apply(parser, null);
}
public void testInputOutputFieldNames() {
List<TransformConfig> transforms = new ArrayList<>();
transforms.add(createConcatTransform(Arrays.asList("a", "b", "c"), Arrays.asList("c1")));
transforms.add(createConcatTransform(Arrays.asList("d", "e", "c"), Arrays.asList("c2")));
transforms.add(createConcatTransform(Arrays.asList("f", "a", "c"), Arrays.asList("c3")));
TransformConfigs tcs = new TransformConfigs(transforms);
List<String> inputNames = Arrays.asList("a", "b", "c", "d", "e", "f");
Set<String> inputSet = new HashSet<>(inputNames);
assertEquals(inputSet, tcs.inputFieldNames());
List<String> outputNames = Arrays.asList("c1", "c2", "c3");
Set<String> outputSet = new HashSet<>(outputNames);
assertEquals(outputSet, tcs.outputFieldNames());
}
private TransformConfig createConcatTransform(List<String> inputs, List<String> outputs) {
TransformConfig concat = new TransformConfig(TransformType.CONCAT.prettyName());
concat.setInputs(inputs);
concat.setOutputs(outputs);
return concat;
}
}

View File

@ -1,66 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import com.fasterxml.jackson.core.JsonProcessingException;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
public class TransformSerialisationTests extends ESTestCase {
public void testDeserialise_singleFieldAsArray() throws JsonProcessingException, IOException {
String json = "{\"inputs\":\"dns\", \"transform\":\"domain_split\"}";
XContentParser parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
TransformConfig tr = TransformConfig.PARSER.apply(parser, null);
assertEquals(1, tr.getInputs().size());
assertEquals("dns", tr.getInputs().get(0));
assertEquals("domain_split", tr.getTransform());
assertEquals(2, tr.getOutputs().size());
assertEquals("subDomain", tr.getOutputs().get(0));
assertEquals("hrd", tr.getOutputs().get(1));
json = "{\"inputs\":\"dns\", \"transform\":\"domain_split\", \"outputs\":\"catted\"}";
parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
tr = TransformConfig.PARSER.apply(parser, null);
assertEquals(1, tr.getInputs().size());
assertEquals("dns", tr.getInputs().get(0));
assertEquals("domain_split", tr.getTransform());
assertEquals(1, tr.getOutputs().size());
assertEquals("catted", tr.getOutputs().get(0));
}
public void testDeserialise_fieldsArray() throws JsonProcessingException, IOException {
String json = "{\"inputs\":[\"dns\"], \"transform\":\"domain_split\"}";
XContentParser parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
TransformConfig tr = TransformConfig.PARSER.apply(parser, null);
assertEquals(1, tr.getInputs().size());
assertEquals("dns", tr.getInputs().get(0));
assertEquals("domain_split", tr.getTransform());
json = "{\"inputs\":[\"a\", \"b\", \"c\"], \"transform\":\"concat\", \"outputs\":[\"catted\"]}";
parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
tr = TransformConfig.PARSER.apply(parser, null);
assertEquals(3, tr.getInputs().size());
assertEquals("a", tr.getInputs().get(0));
assertEquals("b", tr.getInputs().get(1));
assertEquals("c", tr.getInputs().get(2));
assertEquals("concat", tr.getTransform());
assertEquals(1, tr.getOutputs().size());
assertEquals("catted", tr.getOutputs().get(0));
}
}

View File

@ -1,52 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.test.ESTestCase;
import java.util.EnumSet;
import java.util.Set;
public class TransformTypeTests extends ESTestCase {
public void testFromString() {
Set<TransformType> all = EnumSet.allOf(TransformType.class);
for (TransformType type : all) {
assertEquals(type.prettyName(), type.toString());
TransformType created = TransformType.fromString(type.prettyName());
assertEquals(type, created);
}
}
public void testFromString_UnknownType() {
ESTestCase.expectThrows(IllegalArgumentException.class, () -> TransformType.fromString("random_type"));
}
public void testForString() {
assertEquals(TransformType.fromString("domain_split"), TransformType.DOMAIN_SPLIT);
assertEquals(TransformType.fromString("concat"), TransformType.CONCAT);
assertEquals(TransformType.fromString("extract"), TransformType.REGEX_EXTRACT);
assertEquals(TransformType.fromString("split"), TransformType.REGEX_SPLIT);
assertEquals(TransformType.fromString("exclude"), TransformType.EXCLUDE);
assertEquals(TransformType.fromString("lowercase"), TransformType.LOWERCASE);
assertEquals(TransformType.fromString("uppercase"), TransformType.UPPERCASE);
assertEquals(TransformType.fromString("trim"), TransformType.TRIM);
}
public void testValidOrdinals() {
assertEquals(0, TransformType.DOMAIN_SPLIT.ordinal());
assertEquals(1, TransformType.CONCAT.ordinal());
assertEquals(2, TransformType.REGEX_EXTRACT.ordinal());
assertEquals(3, TransformType.REGEX_SPLIT.ordinal());
assertEquals(4, TransformType.EXCLUDE.ordinal());
assertEquals(5, TransformType.LOWERCASE.ordinal());
assertEquals(6, TransformType.UPPERCASE.ordinal());
assertEquals(7, TransformType.TRIM.ordinal());
}
}

View File

@ -5,78 +5,48 @@
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.autodetect.writer.AbstractDataToProcessWriter.InputOutputMap;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.transforms.Concat;
import org.elasticsearch.xpack.ml.transforms.HighestRegisteredDomain;
import org.elasticsearch.xpack.ml.transforms.RegexSplit;
import org.elasticsearch.xpack.ml.transforms.StringTransform;
import org.elasticsearch.xpack.ml.transforms.Transform;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.junit.Assert;
import org.junit.Before;
import org.mockito.Mockito;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
/**
* Testing methods of AbstractDataToProcessWriter but uses the concrete
* instances.
* <p>
* Asserts that the transforms have the right input and outputs.
* Testing methods of AbstractDataToProcessWriter but uses the concrete instances.
*/
public class AbstractDataToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess;
private DataCountsReporter dataCountsReporter;
private Logger jobLogger;
@Before
public void setUpMocks() {
autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class);
jobLogger = Mockito.mock(Logger.class);
}
public void testInputFields_MulitpleInputsSingleOutput() throws IOException {
public void testInputFields() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("time_field");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("host-metric");
detector.setDetectorDescription("metric(value) by host-metric");
detector.setByFieldName("metric");
detector.setPartitionFieldName("host");
detector.setDetectorDescription("metric(value) by metric partitionfield=host");
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig tc = new TransformConfig(TransformType.Names.CONCAT_NAME);
tc.setInputs(Arrays.asList("host", "metric"));
tc.setOutputs(Arrays.asList("host-metric"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(tc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, dataCountsReporter);
writer.writeHeader();
@ -88,314 +58,32 @@ public class AbstractDataToProcessWriterTests extends ESTestCase {
assertTrue(inputFields.contains("metric"));
String[] header = { "time_field", "metric", "host", "value" };
writer.buildTransforms(header);
List<Transform> trs = writer.postDateTransforms;
assertEquals(1, trs.size());
Transform tr = trs.get(0);
List<TransformIndex> readIndexes = tr.getReadIndexes();
assertEquals(readIndexes.get(0), new TransformIndex(0, 2));
assertEquals(readIndexes.get(1), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = tr.getWriteIndexes();
assertEquals(writeIndexes.get(0), new TransformIndex(2, 1));
writer.buildFieldIndexMapping(header);
Map<String, Integer> inputIndexes = writer.getInputFieldIndexes();
assertEquals(4, inputIndexes.size());
Assert.assertEquals(new Integer(0), inputIndexes.get("time_field"));
Assert.assertEquals(new Integer(1), inputIndexes.get("metric"));
Assert.assertEquals(new Integer(2), inputIndexes.get("host"));
Assert.assertEquals(new Integer(3), inputIndexes.get("value"));
assertEquals(new Integer(0), inputIndexes.get("time_field"));
assertEquals(new Integer(1), inputIndexes.get("metric"));
assertEquals(new Integer(2), inputIndexes.get("host"));
assertEquals(new Integer(3), inputIndexes.get("value"));
Map<String, Integer> outputIndexes = writer.getOutputFieldIndexes();
assertEquals(4, outputIndexes.size());
Assert.assertEquals(new Integer(0), outputIndexes.get("time_field"));
Assert.assertEquals(new Integer(1), outputIndexes.get("host-metric"));
Assert.assertEquals(new Integer(2), outputIndexes.get("value"));
Assert.assertEquals(new Integer(3), outputIndexes.get(LengthEncodedWriter.CONTROL_FIELD_NAME));
assertEquals(5, outputIndexes.size());
assertEquals(new Integer(0), outputIndexes.get("time_field"));
assertEquals(new Integer(1), outputIndexes.get("host"));
assertEquals(new Integer(2), outputIndexes.get("metric"));
assertEquals(new Integer(3), outputIndexes.get("value"));
assertEquals(new Integer(4), outputIndexes.get(LengthEncodedWriter.CONTROL_FIELD_NAME));
List<InputOutputMap> inOutMaps = writer.getInputOutputMap();
assertEquals(1, inOutMaps.size());
assertEquals(inOutMaps.get(0).inputIndex, 3);
assertEquals(inOutMaps.get(0).outputIndex, 2);
}
public void testInputFields_SingleInputMulitpleOutputs() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("time_field");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
detector.setOverFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(1));
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig tc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
tc.setInputs(Arrays.asList("domain"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(tc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
Set<String> inputFields = new HashSet<>(writer.inputFields());
assertEquals(3, inputFields.size());
assertTrue(inputFields.contains("time_field"));
assertTrue(inputFields.contains("value"));
assertTrue(inputFields.contains("domain"));
String[] header = { "time_field", "domain", "value" };
writer.buildTransforms(header);
List<Transform> trs = writer.postDateTransforms;
assertEquals(1, trs.size());
Map<String, Integer> inputIndexes = writer.getInputFieldIndexes();
assertEquals(3, inputIndexes.size());
Assert.assertEquals(new Integer(0), inputIndexes.get("time_field"));
Assert.assertEquals(new Integer(1), inputIndexes.get("domain"));
Assert.assertEquals(new Integer(2), inputIndexes.get("value"));
Map<String, Integer> outputIndexes = writer.getOutputFieldIndexes();
List<String> allOutputs = new ArrayList<>(TransformType.DOMAIN_SPLIT.defaultOutputNames());
allOutputs.add("value");
Collections.sort(allOutputs); // outputs are in alphabetical order
assertEquals(5, outputIndexes.size()); // time + control field + outputs
Assert.assertEquals(new Integer(0), outputIndexes.get("time_field"));
int count = 1;
for (String f : allOutputs) {
Assert.assertEquals(new Integer(count++), outputIndexes.get(f));
}
Assert.assertEquals(new Integer(allOutputs.size() + 1), outputIndexes.get(LengthEncodedWriter.CONTROL_FIELD_NAME));
List<InputOutputMap> inOutMaps = writer.getInputOutputMap();
assertEquals(1, inOutMaps.size());
assertEquals(inOutMaps.get(0).inputIndex, 2);
assertEquals(inOutMaps.get(0).outputIndex, allOutputs.indexOf("value") + 1);
Transform tr = trs.get(0);
assertEquals(tr.getReadIndexes().get(0), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = new ArrayList<>();
int[] outIndexes = new int[TransformType.DOMAIN_SPLIT.defaultOutputNames().size()];
for (int i = 0; i < outIndexes.length; i++) {
writeIndexes.add(new TransformIndex(2, allOutputs.indexOf(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(i)) + 1));
}
assertEquals(writeIndexes, tr.getWriteIndexes());
}
/**
* Only one output of the transform is used
*/
public void testInputFields_SingleInputMulitpleOutputs_OnlyOneOutputUsed() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("time_field");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig tc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
tc.setInputs(Arrays.asList("domain"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(tc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
Set<String> inputFields = new HashSet<>(writer.inputFields());
assertEquals(3, inputFields.size());
assertTrue(inputFields.contains("time_field"));
assertTrue(inputFields.contains("value"));
assertTrue(inputFields.contains("domain"));
String[] header = { "time_field", "domain", "value" };
writer.buildTransforms(header);
List<Transform> trs = writer.postDateTransforms;
assertEquals(1, trs.size());
Map<String, Integer> inputIndexes = writer.getInputFieldIndexes();
assertEquals(3, inputIndexes.size());
Assert.assertEquals(new Integer(0), inputIndexes.get("time_field"));
Assert.assertEquals(new Integer(1), inputIndexes.get("domain"));
Assert.assertEquals(new Integer(2), inputIndexes.get("value"));
Map<String, Integer> outputIndexes = writer.getOutputFieldIndexes();
List<String> allOutputs = new ArrayList<>();
allOutputs.add(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
allOutputs.add("value");
Collections.sort(allOutputs); // outputs are in alphabetical order
assertEquals(4, outputIndexes.size()); // time + control field + outputs
Assert.assertEquals(new Integer(0), outputIndexes.get("time_field"));
int count = 1;
for (String f : allOutputs) {
Assert.assertEquals(new Integer(count++), outputIndexes.get(f));
}
Assert.assertEquals(new Integer(allOutputs.size() + 1), outputIndexes.get(LengthEncodedWriter.CONTROL_FIELD_NAME));
List<InputOutputMap> inOutMaps = writer.getInputOutputMap();
assertEquals(1, inOutMaps.size());
assertEquals(inOutMaps.get(0).inputIndex, 2);
assertEquals(inOutMaps.get(0).outputIndex, allOutputs.indexOf("value") + 1);
Transform tr = trs.get(0);
assertEquals(tr.getReadIndexes().get(0), new TransformIndex(0, 1));
TransformIndex ti = new TransformIndex(2, allOutputs.indexOf(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0)) + 1);
assertEquals(tr.getWriteIndexes().get(0), ti);
}
/**
* Only one output of the transform is used
*/
public void testBuildTransforms_ChainedTransforms() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("datetime");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig concatTc = new TransformConfig(TransformType.Names.CONCAT_NAME);
concatTc.setInputs(Arrays.asList("date", "time"));
concatTc.setOutputs(Arrays.asList("datetime"));
TransformConfig hrdTc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
hrdTc.setInputs(Arrays.asList("domain"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(concatTc, hrdTc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
Set<String> inputFields = new HashSet<>(writer.inputFields());
assertEquals(4, inputFields.size());
assertTrue(inputFields.contains("date"));
assertTrue(inputFields.contains("time"));
assertTrue(inputFields.contains("value"));
assertTrue(inputFields.contains("domain"));
String[] header = { "date", "time", "domain", "value" };
writer.buildTransforms(header);
List<Transform> trs = writer.dateInputTransforms;
assertEquals(1, trs.size());
assertTrue(trs.get(0) instanceof Concat);
trs = writer.postDateTransforms;
assertEquals(1, trs.size());
assertTrue(trs.get(0) instanceof HighestRegisteredDomain);
Map<String, Integer> inputIndexes = writer.getInputFieldIndexes();
assertEquals(4, inputIndexes.size());
Assert.assertEquals(new Integer(0), inputIndexes.get("date"));
Assert.assertEquals(new Integer(1), inputIndexes.get("time"));
Assert.assertEquals(new Integer(2), inputIndexes.get("domain"));
Assert.assertEquals(new Integer(3), inputIndexes.get("value"));
}
/**
* The exclude transform returns fail fatal meaning the record shouldn't be
* processed.
*/
public void testApplyTransforms_transformReturnsExclude()
throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("datetime");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("metric");
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig excludeConfig = new TransformConfig(TransformType.EXCLUDE.prettyName());
excludeConfig.setInputs(Arrays.asList("metric"));
excludeConfig.setCondition(new Condition(Operator.MATCH, "metricA"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(excludeConfig));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
String[] header = { "datetime", "metric", "value" };
writer.buildTransforms(header);
// metricA is excluded
String[] input = { "1", "metricA", "0" };
String[] output = new String[3];
assertFalse(writer.applyTransformsAndWrite(input, output, 3));
verify(autodetectProcess, never()).writeRecord(output);
verify(dataCountsReporter, never()).reportRecordWritten(anyLong(), anyLong());
// reset the call counts etc.
Mockito.reset(dataCountsReporter);
// this is ok
input = new String[] { "2", "metricB", "0" };
String[] expectedOutput = { "2", null, null };
assertTrue(writer.applyTransformsAndWrite(input, output, 3));
verify(autodetectProcess, times(1)).writeRecord(expectedOutput);
verify(dataCountsReporter, times(1)).reportRecordWritten(3, 2000);
}
public void testBuildTransforms_DateTransformsAreSorted() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("datetime");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("type");
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig concatTc = new TransformConfig(TransformType.Names.CONCAT_NAME);
concatTc.setInputs(Arrays.asList("DATE", "time"));
concatTc.setOutputs(Arrays.asList("datetime"));
TransformConfig upperTc = new TransformConfig(TransformType.Names.UPPERCASE_NAME);
upperTc.setInputs(Arrays.asList("date"));
upperTc.setOutputs(Arrays.asList("DATE"));
TransformConfig splitTc = new TransformConfig(TransformType.Names.SPLIT_NAME);
splitTc.setInputs(Arrays.asList("date-somethingelse"));
splitTc.setOutputs(Arrays.asList("date"));
splitTc.setArguments(Arrays.asList("-"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(upperTc, concatTc, splitTc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
String[] header = { "date-somethingelse", "time", "type", "value" };
writer.buildTransforms(header);
// the date input transforms should be in this order
List<Transform> trs = writer.dateInputTransforms;
assertEquals(3, trs.size());
assertTrue(trs.get(0) instanceof RegexSplit);
assertTrue(trs.get(1) instanceof StringTransform);
assertTrue(trs.get(2) instanceof Concat);
assertEquals(4, inOutMaps.size());
assertEquals(inOutMaps.get(0).inputIndex, 0);
assertEquals(inOutMaps.get(0).outputIndex, 0);
assertEquals(inOutMaps.get(1).inputIndex, 2);
assertEquals(inOutMaps.get(1).outputIndex, 1);
assertEquals(inOutMaps.get(2).inputIndex, 1);
assertEquals(inOutMaps.get(2).outputIndex, 2);
assertEquals(inOutMaps.get(3).inputIndex, 3);
assertEquals(inOutMaps.get(3).outputIndex, 3);
}
}

View File

@ -5,7 +5,6 @@
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
@ -14,9 +13,6 @@ import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.junit.Before;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
@ -43,11 +39,9 @@ import static org.mockito.Mockito.when;
public class CsvDataToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess;
private List<TransformConfig> transforms;
private DataDescription.Builder dataDescription;
private AnalysisConfig analysisConfig;
private DataCountsReporter dataCountsReporter;
private Logger jobLogger;
private List<String[]> writtenRecords;
@ -55,7 +49,6 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
public void setUpMocks() throws IOException {
autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class);
jobLogger = Mockito.mock(Logger.class);
writtenRecords = new ArrayList<>();
doAnswer(new Answer<Void>() {
@ -68,8 +61,6 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
}
}).when(autodetectProcess).writeRecord(any(String[].class));
transforms = new ArrayList<>();
dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(',');
dataDescription.setFormat(DataFormat.DELIMITED);
@ -79,8 +70,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
analysisConfig = new AnalysisConfig.Builder(Arrays.asList(detector)).build();
}
public void testWrite_GivenTimeFormatIsEpochAndDataIsValid()
throws IOException {
public void testWrite_GivenTimeFormatIsEpochAndDataIsValid() throws IOException {
StringBuilder input = new StringBuilder();
input.append("time,metric,value\n");
input.append("1,foo,1.0\n");
@ -101,40 +91,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenTransformAndEmptyField()
throws IOException {
TransformConfig transform = new TransformConfig("uppercase");
transform.setInputs(Arrays.asList("value"));
transform.setOutputs(Arrays.asList("transformed"));
transforms.add(transform);
Detector existingDetector = analysisConfig.getDetectors().get(0);
Detector.Builder newDetector = new Detector.Builder(existingDetector);
newDetector.setFieldName("transformed");
analysisConfig.getDetectors().set(0, newDetector.build());
StringBuilder input = new StringBuilder();
input.append("time,metric,value\n");
input.append("1,,foo\n");
input.append("2,,\n");
InputStream inputStream = createInputStream(input.toString());
CsvDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[] { "time", "transformed", "." });
expectedRecords.add(new String[] { "1", "FOO", "" });
expectedRecords.add(new String[] { "2", "", "" });
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenTimeFormatIsEpochAndTimestampsAreOutOfOrder()
throws IOException {
public void testWrite_GivenTimeFormatIsEpochAndTimestampsAreOutOfOrder() throws IOException {
StringBuilder input = new StringBuilder();
input.append("time,metric,value\n");
input.append("3,foo,3.0\n");
@ -157,8 +114,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenTimeFormatIsEpochAndAllRecordsAreOutOfOrder()
throws IOException {
public void testWrite_GivenTimeFormatIsEpochAndAllRecordsAreOutOfOrder() throws IOException {
StringBuilder input = new StringBuilder();
input.append("time,metric,value\n");
input.append("1,foo,1.0\n");
@ -182,8 +138,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenTimeFormatIsEpochAndSomeTimestampsWithinLatencySomeOutOfOrder()
throws IOException {
public void testWrite_GivenTimeFormatIsEpochAndSomeTimestampsWithinLatencySomeOutOfOrder() throws IOException {
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(new Detector.Builder("metric", "value").build()));
builder.setLatency(2L);
analysisConfig = builder.build();
@ -216,8 +171,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting();
}
public void testWrite_NullByte()
throws IOException {
public void testWrite_NullByte() throws IOException {
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(new Detector.Builder("metric", "value").build()));
builder.setLatency(0L);
analysisConfig = builder.build();
@ -225,7 +179,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
StringBuilder input = new StringBuilder();
input.append("metric,value,time\n");
input.append("foo,4.0,1\n");
input.append("\0"); // the csv reader skips over this line
input.append("\0"); // the csv reader treats this as a line (even though it doesn't end with \n) and skips over it
input.append("foo,5.0,2\n");
input.append("foo,3.0,3\n");
input.append("bar,4.0,4\n");
@ -245,7 +199,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
expectedRecords.add(new String[] { "4", "4.0", "" });
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter, times(1)).reportMissingField();
verify(dataCountsReporter, times(2)).reportMissingField();
verify(dataCountsReporter, times(1)).reportRecordWritten(2, 1000);
verify(dataCountsReporter, times(1)).reportRecordWritten(2, 2000);
verify(dataCountsReporter, times(1)).reportRecordWritten(2, 3000);
@ -270,83 +224,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
assertEquals(0L, counts.getInputRecordCount());
}
public void testWrite_GivenDateTimeFieldIsOutputOfTransform()
throws IOException {
TransformConfig transform = new TransformConfig("concat");
transform.setInputs(Arrays.asList("date", "time-of-day"));
transform.setOutputs(Arrays.asList("datetime"));
transforms.add(transform);
dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(',');
dataDescription.setTimeField("datetime");
dataDescription.setFormat(DataFormat.DELIMITED);
dataDescription.setTimeFormat("yyyy-MM-ddHH:mm:ssX");
CsvDataToProcessWriter writer = createWriter();
writer.writeHeader();
StringBuilder input = new StringBuilder();
input.append("date,time-of-day,metric,value\n");
input.append("1970-01-01,00:00:01Z,foo,5.0\n");
input.append("1970-01-01,00:00:02Z,foo,6.0\n");
InputStream inputStream = createInputStream(input.toString());
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[] { "datetime", "value", "." });
expectedRecords.add(new String[] { "1", "5.0", "" });
expectedRecords.add(new String[] { "2", "6.0", "" });
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenChainedTransforms_SortsByDependencies()
throws IOException {
TransformConfig tc1 = new TransformConfig(TransformType.Names.UPPERCASE_NAME);
tc1.setInputs(Arrays.asList("dns"));
tc1.setOutputs(Arrays.asList("dns_upper"));
TransformConfig tc2 = new TransformConfig(TransformType.Names.CONCAT_NAME);
tc2.setInputs(Arrays.asList("dns1", "dns2"));
tc2.setArguments(Arrays.asList("."));
tc2.setOutputs(Arrays.asList("dns"));
transforms.add(tc1);
transforms.add(tc2);
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("dns_upper");
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
analysisConfig = builder.build();
StringBuilder input = new StringBuilder();
input.append("time,dns1,dns2,value\n");
input.append("1,www,foo.com,1.0\n");
input.append("2,www,bar.com,2.0\n");
InputStream inputStream = createInputStream(input.toString());
CsvDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[] { "time", "dns_upper", "value", "." });
expectedRecords.add(new String[] { "1", "WWW.FOO.COM", "1.0", "" });
expectedRecords.add(new String[] { "2", "WWW.BAR.COM", "2.0", "" });
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenMisplacedQuoteMakesRecordExtendOverTooManyLines()
throws IOException {
public void testWrite_GivenMisplacedQuoteMakesRecordExtendOverTooManyLines() throws IOException {
StringBuilder input = new StringBuilder();
input.append("time,metric,value\n");
@ -372,7 +250,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
private CsvDataToProcessWriter createWriter() {
return new CsvDataToProcessWriter(true, autodetectProcess, dataDescription.build(), analysisConfig,
new TransformConfigs(transforms), dataCountsReporter, jobLogger);
dataCountsReporter);
}
private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) {

View File

@ -5,14 +5,12 @@
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import static org.mockito.Mockito.mock;
@ -31,15 +29,8 @@ public class DataToProcessWriterFactoryTests extends ESTestCase {
assertTrue(createWriter(dataDescription.build()) instanceof CsvDataToProcessWriter);
}
public void testCreate_GivenDataFormatIsSingleLine() {
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataFormat.SINGLE_LINE);
assertTrue(createWriter(dataDescription.build()) instanceof SingleLineDataToProcessWriter);
}
private static DataToProcessWriter createWriter(DataDescription dataDescription) {
return DataToProcessWriterFactory.create(true, mock(AutodetectProcess.class), dataDescription,
mock(AnalysisConfig.class), mock(TransformConfigs.class), mock(DataCountsReporter.class), mock(Logger.class));
mock(AnalysisConfig.class), mock(DataCountsReporter.class));
}
}

View File

@ -1,141 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.junit.Before;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.verify;
public class DataWithTransformsToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess;
private DataCountsReporter dataCountsReporter;
private Logger logger;
private List<String[]> writtenRecords;
@Before
public void setUpMocks() throws IOException {
autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class);
logger = Mockito.mock(Logger.class);
writtenRecords = new ArrayList<>();
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
String[] record = (String[]) invocation.getArguments()[0];
String[] copy = Arrays.copyOf(record, record.length);
writtenRecords.add(copy);
return null;
}
}).when(autodetectProcess).writeRecord(any(String[].class));
}
public void testCsvWriteWithConcat() throws IOException {
StringBuilder input = new StringBuilder();
input.append("time,host,metric,value\n");
input.append("1,hostA,foo,3.0\n");
input.append("2,hostB,bar,2.0\n");
input.append("2,hostA,bar,2.0\n");
InputStream inputStream = createInputStream(input.toString());
AbstractDataToProcessWriter writer = createWriter(true);
writer.writeHeader();
writer.write(inputStream);
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "concat", "value", "."});
expectedRecords.add(new String[]{"1", "hostAfoo", "3.0", ""});
expectedRecords.add(new String[]{"2", "hostBbar", "2.0", ""});
expectedRecords.add(new String[]{"2", "hostAbar", "2.0", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testJsonWriteWithConcat() throws IOException {
StringBuilder input = new StringBuilder();
input.append("{\"time\" : 1, \"host\" : \"hostA\", \"metric\" : \"foo\", \"value\" : 3.0}\n");
input.append("{\"time\" : 2, \"host\" : \"hostB\", \"metric\" : \"bar\", \"value\" : 2.0}\n");
input.append("{\"time\" : 2, \"host\" : \"hostA\", \"metric\" : \"bar\", \"value\" : 2.0}\n");
InputStream inputStream = createInputStream(input.toString());
AbstractDataToProcessWriter writer = createWriter(false);
writer.writeHeader();
writer.write(inputStream);
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "concat", "value", "."});
expectedRecords.add(new String[]{"1", "hostAfoo", "3.0", ""});
expectedRecords.add(new String[]{"2", "hostBbar", "2.0", ""});
expectedRecords.add(new String[]{"2", "hostAbar", "2.0", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
private static InputStream createInputStream(String input) {
return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
}
private AbstractDataToProcessWriter createWriter(boolean doCsv) {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setFieldDelimiter(',');
dd.setFormat(doCsv ? DataFormat.DELIMITED : DataFormat.JSON);
dd.setTimeFormat(DataDescription.EPOCH);
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("concat");
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig tc = new TransformConfig(TransformType.Names.CONCAT_NAME);
tc.setInputs(Arrays.asList("host", "metric"));
TransformConfigs tcs = new TransformConfigs(Arrays.asList(tc));
if (doCsv) {
return new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, tcs, dataCountsReporter, logger);
} else {
return new JsonDataToProcessWriter(true, autodetectProcess, dd.build(), ac, tcs, dataCountsReporter, logger);
}
}
private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) {
assertEquals(expectedRecords.size(), writtenRecords.size());
for (int i = 0; i < expectedRecords.size(); i++) {
assertArrayEquals(expectedRecords.get(i), writtenRecords.get(i));
}
}
}

View File

@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.elasticsearch.test.ESTestCase;
public class DateFormatDateTransformerTests extends ESTestCase {
public void testTransform_GivenValidTimestamp() throws CannotParseTimestampException {
DateFormatDateTransformer transformer = new DateFormatDateTransformer("yyyy-MM-dd HH:mm:ssXXX");
assertEquals(1388534400000L, transformer.transform("2014-01-01 00:00:00Z"));
}
public void testTransform_GivenInvalidTimestamp() throws CannotParseTimestampException {
DateFormatDateTransformer transformer = new DateFormatDateTransformer("yyyy-MM-dd HH:mm:ssXXX");
CannotParseTimestampException e = ESTestCase.expectThrows(CannotParseTimestampException.class,
() -> transformer.transform("invalid"));
assertEquals("Cannot parse date 'invalid' with format string 'yyyy-MM-dd HH:mm:ssXXX'", e.getMessage());
}
}

View File

@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.elasticsearch.test.ESTestCase;
public class DoubleDateTransformerTests extends ESTestCase {
public void testTransform_GivenTimestampIsNotMilliseconds() throws CannotParseTimestampException {
DoubleDateTransformer transformer = new DoubleDateTransformer(false);
assertEquals(1000000, transformer.transform("1000"));
}
public void testTransform_GivenTimestampIsMilliseconds() throws CannotParseTimestampException {
DoubleDateTransformer transformer = new DoubleDateTransformer(true);
assertEquals(1000, transformer.transform("1000"));
}
public void testTransform_GivenTimestampIsNotValidDouble() throws CannotParseTimestampException {
DoubleDateTransformer transformer = new DoubleDateTransformer(false);
CannotParseTimestampException e = ESTestCase.expectThrows(CannotParseTimestampException.class,
() -> transformer.transform("invalid"));
assertEquals("Cannot parse timestamp 'invalid' as epoch value", e.getMessage());
}
}

View File

@ -5,7 +5,6 @@
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
@ -14,9 +13,6 @@ import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.junit.Before;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
@ -41,9 +37,7 @@ public class JsonDataToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess;
private DataCountsReporter dataCountsReporter;
private Logger logger;
private List<TransformConfig> transforms;
private DataDescription.Builder dataDescription;
private AnalysisConfig analysisConfig;
@ -53,7 +47,6 @@ public class JsonDataToProcessWriterTests extends ESTestCase {
public void setUpMocks() throws IOException {
autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class);
logger = Mockito.mock(Logger.class);
writtenRecords = new ArrayList<>();
doAnswer(new Answer<Void>() {
@ -66,7 +59,6 @@ public class JsonDataToProcessWriterTests extends ESTestCase {
}
}).when(autodetectProcess).writeRecord(any(String[].class));
transforms = new ArrayList<>();
dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataFormat.JSON);
@ -284,85 +276,13 @@ public class JsonDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenDateTimeFieldIsOutputOfTransform() throws Exception {
TransformConfig transform = new TransformConfig("concat");
transform.setInputs(Arrays.asList("date", "time-of-day"));
transform.setOutputs(Arrays.asList("datetime"));
transforms.add(transform);
dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(',');
dataDescription.setTimeField("datetime");
dataDescription.setFormat(DataFormat.DELIMITED);
dataDescription.setTimeFormat("yyyy-MM-ddHH:mm:ssX");
JsonDataToProcessWriter writer = createWriter();
writer.writeHeader();
StringBuilder input = new StringBuilder();
input.append("{\"date\":\"1970-01-01\", \"time-of-day\":\"00:00:01Z\", \"value\":\"5.0\"}");
input.append("{\"date\":\"1970-01-01\", \"time-of-day\":\"00:00:02Z\", \"value\":\"6.0\"}");
InputStream inputStream = createInputStream(input.toString());
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"datetime", "value", "."});
expectedRecords.add(new String[]{"1", "5.0", ""});
expectedRecords.add(new String[]{"2", "6.0", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenChainedTransforms_SortsByDependencies() throws Exception {
TransformConfig tc1 = new TransformConfig(TransformType.Names.UPPERCASE_NAME);
tc1.setInputs(Arrays.asList("dns"));
tc1.setOutputs(Arrays.asList("dns_upper"));
TransformConfig tc2 = new TransformConfig(TransformType.Names.CONCAT_NAME);
tc2.setInputs(Arrays.asList("dns1", "dns2"));
tc2.setArguments(Arrays.asList("."));
tc2.setOutputs(Arrays.asList("dns"));
transforms.add(tc1);
transforms.add(tc2);
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("dns_upper");
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
analysisConfig = builder.build();
StringBuilder input = new StringBuilder();
input.append("{\"time\":\"1\", \"dns1\":\"www\", \"dns2\":\"foo.com\", \"value\":\"1.0\"}");
input.append("{\"time\":\"2\", \"dns1\":\"www\", \"dns2\":\"bar.com\", \"value\":\"2.0\"}");
InputStream inputStream = createInputStream(input.toString());
JsonDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "dns_upper", "value", "."});
expectedRecords.add(new String[]{"1", "WWW.FOO.COM", "1.0", ""});
expectedRecords.add(new String[]{"2", "WWW.BAR.COM", "2.0", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
private static InputStream createInputStream(String input) {
return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
}
private JsonDataToProcessWriter createWriter() {
return new JsonDataToProcessWriter(true, autodetectProcess, dataDescription.build(), analysisConfig,
new TransformConfigs(transforms), dataCountsReporter, logger);
dataCountsReporter);
}
private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) {

View File

@ -1,181 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.junit.Before;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
public class SingleLineDataToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess;
private DataDescription.Builder dataDescription;
private AnalysisConfig analysisConfig;
private List<TransformConfig> transformConfigs;
private DataCountsReporter dataCountsReporter;
private List<String[]> writtenRecords;
@Before
public void setUpMocks() throws IOException {
autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class);
writtenRecords = new ArrayList<>();
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
String[] record = (String[]) invocation.getArguments()[0];
String[] copy = Arrays.copyOf(record, record.length);
writtenRecords.add(copy);
return null;
}
}).when(autodetectProcess).writeRecord(any(String[].class));
dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(',');
dataDescription.setFormat(DataFormat.SINGLE_LINE);
dataDescription.setTimeFormat("yyyy-MM-dd HH:mm:ssX");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setByFieldName("message");
analysisConfig = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
transformConfigs = new ArrayList<>();
}
public void testWrite_GivenDataIsValid() throws Exception {
TransformConfig transformConfig = new TransformConfig("extract");
transformConfig.setInputs(Arrays.asList("raw"));
transformConfig.setOutputs(Arrays.asList("time", "message"));
transformConfig.setArguments(Arrays.asList("(.{20}) (.*)"));
transformConfigs.add(transformConfig);
StringBuilder input = new StringBuilder();
input.append("2015-04-29 10:00:00Z This is message 1\n");
input.append("2015-04-29 11:00:00Z This is message 2\r");
input.append("2015-04-29 12:00:00Z This is message 3\r\n");
InputStream inputStream = createInputStream(input.toString());
SingleLineDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).getLatestRecordTime();
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
verify(dataCountsReporter, times(1)).setAnalysedFieldsPerRecord(1);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430301600000L);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430305200000L);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430308800000L);
verify(dataCountsReporter, times(1)).incrementalStats();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "message", "."});
expectedRecords.add(new String[]{"1430301600", "This is message 1", ""});
expectedRecords.add(new String[]{"1430305200", "This is message 2", ""});
expectedRecords.add(new String[]{"1430308800", "This is message 3", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
verifyNoMoreInteractions(dataCountsReporter);
}
public void testWrite_GivenDataContainsInvalidRecords() throws Exception {
TransformConfig transformConfig = new TransformConfig("extract");
transformConfig.setInputs(Arrays.asList("raw"));
transformConfig.setOutputs(Arrays.asList("time", "message"));
transformConfig.setArguments(Arrays.asList("(.{20}) (.*)"));
transformConfigs.add(transformConfig);
StringBuilder input = new StringBuilder();
input.append("2015-04-29 10:00:00Z This is message 1\n");
input.append("No transform\n");
input.append("Transform can apply but no date to be parsed\n");
input.append("\n");
input.append("2015-04-29 12:00:00Z This is message 3\n");
InputStream inputStream = createInputStream(input.toString());
SingleLineDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).getLatestRecordTime();
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
verify(dataCountsReporter, times(1)).setAnalysedFieldsPerRecord(1);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430301600000L);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430308800000L);
verify(dataCountsReporter, times(3)).reportDateParseError(1);
verify(dataCountsReporter, times(1)).incrementalStats();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "message", "."});
expectedRecords.add(new String[]{"1430301600", "This is message 1", ""});
expectedRecords.add(new String[]{"1430308800", "This is message 3", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
verifyNoMoreInteractions(dataCountsReporter);
}
public void testWrite_GivenNoTransforms() throws Exception {
StringBuilder input = new StringBuilder();
input.append("2015-04-29 10:00:00Z This is message 1\n");
InputStream inputStream = createInputStream(input.toString());
SingleLineDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
verify(dataCountsReporter, times(1)).setAnalysedFieldsPerRecord(1);
verify(dataCountsReporter, times(1)).reportDateParseError(1);
verify(dataCountsReporter, times(1)).incrementalStats();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "message", "."});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).getLatestRecordTime();
verify(dataCountsReporter).finishReporting();
verifyNoMoreInteractions(dataCountsReporter);
}
private static InputStream createInputStream(String input) {
return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
}
private SingleLineDataToProcessWriter createWriter() {
return new SingleLineDataToProcessWriter(true, autodetectProcess, dataDescription.build(),
analysisConfig, new TransformConfigs(transformConfigs), dataCountsReporter, Mockito.mock(Logger.class));
}
private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) {
assertEquals(expectedRecords.size(), writtenRecords.size());
for (int i = 0; i < expectedRecords.size(); i++) {
assertArrayEquals(expectedRecords.get(i), writtenRecords.get(i));
}
}
}

View File

@ -1,97 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class ConcatTests extends ESTestCase {
public void testMultipleInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1), new TransformIndex(0, 2), new TransformIndex(0, 4));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
Concat concat = new Concat(readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {};
String[] output = new String[2];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, concat.transform(readWriteArea));
assertNull(output[0]);
assertEquals("bce", output[1]);
}
public void testWithDelimiter() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1), new TransformIndex(0, 2), new TransformIndex(0, 4));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
Concat concat = new Concat("--", readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {};
String[] output = new String[2];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, concat.transform(readWriteArea));
assertNull(output[0]);
assertEquals("b--c--e", output[1]);
}
public void testZeroInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray();
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
Concat concat = new Concat(readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, concat.transform(readWriteArea));
assertEquals("", output[0]);
}
public void testNoOutput() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1), new TransformIndex(0, 2), new TransformIndex(0, 3));
List<TransformIndex> writeIndexes = createIndexArray();
Concat concat = new Concat(readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.FAIL, concat.transform(readWriteArea));
assertNull(output[0]);
}
public void testScratchAreaInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1), new TransformIndex(0, 2),
new TransformIndex(1, 0), new TransformIndex(1, 2));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(1, 4));
Concat concat = new Concat(readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {"a", "b", "c", "d", null};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, concat.transform(readWriteArea));
assertEquals("bcac", scratch[4]);
}
}

View File

@ -1,272 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
public class DependencySorterTests extends ESTestCase {
public void testFindDependencies_GivenNoDependencies() {
List<TransformConfig> transforms = new ArrayList<>();
List<TransformConfig> deps = DependencySorter.findDependencies("metricField", transforms);
assertEquals(0, deps.size());
}
public void testFindDependencies_Given1Dependency() {
List<TransformConfig> transforms = new ArrayList<>();
List<String> inputs = Arrays.asList("ina", "inb");
List<String> outputs = Arrays.asList("ab");
TransformConfig concat = createConcatTransform(inputs, outputs);
transforms.add(concat);
List<String> inputs2 = Arrays.asList("inc", "ind");
List<String> outputs2 = Arrays.asList("cd");
TransformConfig concat2 = createConcatTransform(inputs2, outputs2);
transforms.add(concat2);
List<TransformConfig> deps = DependencySorter.findDependencies("cd", transforms);
assertEquals(1, deps.size());
assertEquals(deps.get(0), concat2);
}
public void testFindDependencies_Given2Dependencies() {
List<TransformConfig> transforms = new ArrayList<>();
List<String> inputs = Arrays.asList("ina", "inb");
List<String> outputs = Arrays.asList("ab");
TransformConfig concat = createConcatTransform(inputs, outputs);
transforms.add(concat);
List<String> inputs2 = Arrays.asList("inc", "ind");
List<String> outputs2 = Arrays.asList("cd");
TransformConfig concat2 = createConcatTransform(inputs2, outputs2);
transforms.add(concat2);
List<TransformConfig> deps = DependencySorter.findDependencies(Arrays.asList("cd", "ab"),
transforms);
assertEquals(2, deps.size());
assertTrue(deps.contains(concat));
assertTrue(deps.contains(concat2));
}
public void testFindDependencies_GivenChainOfDependencies() {
List<TransformConfig> transforms = new ArrayList<>();
List<String> inputs = Arrays.asList("ina", "inb");
List<String> outputs = Arrays.asList("ab");
TransformConfig concat = createConcatTransform(inputs, outputs);
transforms.add(concat);
List<String> inputs2 = Arrays.asList("ab", "inc");
List<String> outputs2 = Arrays.asList("abc");
TransformConfig dependentConcat = createConcatTransform(inputs2, outputs2);
transforms.add(dependentConcat);
List<TransformConfig> deps = DependencySorter.findDependencies("abc",
transforms);
assertEquals(2, deps.size());
assertEquals(concat, deps.get(0));
assertEquals(dependentConcat, deps.get(1));
}
/**
* 2 separate inputs with chain of dependencies one of which is shared
*/
public void testFindDependencies_Given2ChainsAndSharedDependencys() {
List<TransformConfig> transforms = new ArrayList<>();
List<String> inputs2 = Arrays.asList("ab", "inc");
List<String> outputs2 = Arrays.asList("abc");
TransformConfig dependentConcat1 = createConcatTransform(inputs2, outputs2);
transforms.add(dependentConcat1);
List<String> inputs3 = Arrays.asList("ab", "ind");
List<String> outputs3 = Arrays.asList("abd");
TransformConfig dependentConcat2 = createConcatTransform(inputs3, outputs3);
transforms.add(dependentConcat2);
List<String> inputs = Arrays.asList("ina", "inb");
List<String> outputs = Arrays.asList("ab");
TransformConfig concat = createConcatTransform(inputs, outputs);
transforms.add(concat);
List<TransformConfig> deps = DependencySorter.findDependencies(Arrays.asList("abc", "abd"),
transforms);
assertEquals(3, deps.size());
assertEquals(concat, deps.get(0));
assertEquals(dependentConcat1, deps.get(1));
assertEquals(dependentConcat2, deps.get(2));
}
public void testSortByDependency_NoDependencies() {
List<TransformConfig> transforms = new ArrayList<>();
TransformConfig concat = createConcatTransform(Arrays.asList("ina", "inb"),
Arrays.asList("ab"));
transforms.add(concat);
TransformConfig hrd1 = createHrdTransform(Arrays.asList("dns"),
Arrays.asList("subdomain", "hrd"));
transforms.add(hrd1);
TransformConfig hrd2 = createHrdTransform(Arrays.asList("dns2"),
Arrays.asList("subdomain"));
transforms.add(hrd2);
List<TransformConfig> orderedDeps = DependencySorter.sortByDependency(transforms);
assertEquals(transforms.size(), orderedDeps.size());
}
public void testSortByDependency_SingleChain() {
List<TransformConfig> transforms = new ArrayList<>();
// Chain of 3 dependencies
TransformConfig chain1Hrd = createHrdTransform(Arrays.asList("ab"),
Arrays.asList("subdomain", "hrd"));
transforms.add(chain1Hrd);
TransformConfig chain1Concat = createConcatTransform(Arrays.asList("ina", "inb"),
Arrays.asList("ab"));
transforms.add(chain1Concat);
TransformConfig chain1Concat2 = createConcatTransform(Arrays.asList("subdomain", "port"),
Arrays.asList());
transforms.add(chain1Concat2);
List<TransformConfig> orderedDeps = DependencySorter.sortByDependency(transforms);
assertEquals(transforms.size(), orderedDeps.size());
int chain1ConcatIndex = orderedDeps.indexOf(chain1Concat);
assertTrue(chain1ConcatIndex == 0);
int chain1HrdIndex = orderedDeps.indexOf(chain1Hrd);
assertTrue(chain1HrdIndex == 1);
int chain1Concat2Index = orderedDeps.indexOf(chain1Concat2);
assertTrue(chain1Concat2Index == 2);
assertTrue(chain1ConcatIndex < chain1HrdIndex);
assertTrue(chain1HrdIndex < chain1Concat2Index);
}
public void testSortByDependency_3ChainsInOrder() {
List<TransformConfig> transforms = new ArrayList<>();
// Chain of 1
TransformConfig noChainHrd = createHrdTransform(Arrays.asList("dns"),
Arrays.asList("subdomain"));
transforms.add(noChainHrd);
// Chain of 2 dependencies
TransformConfig chain1Concat = createConcatTransform(Arrays.asList("ina", "inb"),
Arrays.asList("ab"));
transforms.add(chain1Concat);
TransformConfig chain1Hrd = createHrdTransform(Arrays.asList("ab"),
Arrays.asList("subdomain", "hrd"));
transforms.add(chain1Hrd);
// Chain of 2 dependencies
TransformConfig chain2Concat2 = createConcatTransform(Arrays.asList("cd", "ine"),
Arrays.asList("cde"));
transforms.add(chain2Concat2);
TransformConfig chain2Concat = createConcatTransform(Arrays.asList("inc", "ind"),
Arrays.asList("cd"));
transforms.add(chain2Concat);
List<TransformConfig> orderedDeps = DependencySorter.sortByDependency(transforms);
assertEquals(transforms.size(), orderedDeps.size());
int chain1ConcatIndex = orderedDeps.indexOf(chain1Concat);
assertTrue(chain1ConcatIndex >= 0);
int chain1HrdIndex = orderedDeps.indexOf(chain1Hrd);
assertTrue(chain1HrdIndex >= 1);
assertTrue(chain1ConcatIndex < chain1HrdIndex);
int chain2ConcatIndex = orderedDeps.indexOf(chain2Concat);
assertTrue(chain2ConcatIndex >= 0);
int chain2Concat2Index = orderedDeps.indexOf(chain2Concat2);
assertTrue(chain2Concat2Index >= 1);
assertTrue(chain2ConcatIndex < chain2Concat2Index);
}
public void testSortByDependency_3ChainsOutOfOrder() {
List<TransformConfig> transforms = new ArrayList<>();
TransformConfig chain1Hrd = createHrdTransform(Arrays.asList("ab"),
Arrays.asList("subdomain", "hrd"));
transforms.add(chain1Hrd);
TransformConfig chain2Concat2 = createConcatTransform(Arrays.asList("cd", "ine"),
Arrays.asList("cde"));
transforms.add(chain2Concat2);
TransformConfig chain1Concat = createConcatTransform(Arrays.asList("ina", "inb"),
Arrays.asList("ab"));
transforms.add(chain1Concat);
TransformConfig noChainHrd = createHrdTransform(Arrays.asList("dns"),
Arrays.asList("subdomain"));
transforms.add(noChainHrd);
TransformConfig chain2Concat = createConcatTransform(Arrays.asList("inc", "ind"),
Arrays.asList("cd"));
transforms.add(chain2Concat);
List<TransformConfig> orderedDeps = DependencySorter.sortByDependency(transforms);
assertEquals(transforms.size(), orderedDeps.size());
int chain1ConcatIndex = orderedDeps.indexOf(chain1Concat);
assertTrue(chain1ConcatIndex >= 0);
int chain1HrdIndex = orderedDeps.indexOf(chain1Hrd);
assertTrue(chain1HrdIndex >= 0);
assertTrue(chain1ConcatIndex < chain1HrdIndex);
int chain2ConcatIndex = orderedDeps.indexOf(chain2Concat);
assertTrue(chain2ConcatIndex >= 0);
int chain2Concat2Index = orderedDeps.indexOf(chain2Concat2);
assertTrue(chain2Concat2Index >= 0);
assertTrue(chain2ConcatIndex < chain2Concat2Index);
}
private TransformConfig createConcatTransform(List<String> inputs, List<String> outputs) {
TransformConfig concat = new TransformConfig(TransformType.CONCAT.prettyName());
concat.setInputs(inputs);
concat.setOutputs(outputs);
return concat;
}
private TransformConfig createHrdTransform(List<String> inputs, List<String> outputs) {
TransformConfig concat = new TransformConfig(TransformType.DOMAIN_SPLIT.prettyName());
concat.setInputs(inputs);
concat.setOutputs(outputs);
return concat;
}
}

View File

@ -1,115 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class ExcludeFilterNumericTests extends ESTestCase {
public void testEq()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.EQ, "5.0");
String[] input = {"5"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "5.10000";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testGT()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.GT, "10.000");
String[] input = {"100"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "1.0";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testGTE()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.GTE, "10.000");
String[] input = {"100"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "10";
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "9.5";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testLT()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.LT, "2000");
String[] input = {"100.2"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "2005.0000";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testLTE()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.LTE, "2000");
String[] input = {"100.2"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "2000.0000";
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "9000.5";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
private ExcludeFilterNumeric createTransform(Operator op, String filterValue) {
Condition condition = new Condition(op, filterValue);
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
return new ExcludeFilterNumeric(condition, readIndexes, writeIndexes, mock(Logger.class));
}
}

View File

@ -1,116 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class ExcludeFilterTests extends ESTestCase {
public void testTransform_matches() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "cat");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"cat"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
}
public void testTransform_noMatches() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "boat");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"cat"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testTransform_matchesRegex() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "metric[0-9]+");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"metric01"};
String[] scratch = {};
String[] output = new String[3];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
readWriteArea[0] = new String[]{"metric02-A"};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testTransform_matchesMultipleInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0),
new TransformIndex(0, 1),
new TransformIndex(0, 2));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "boat");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"cat", "hat", "boat"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
}
public void testTransform() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "^(?!latency\\.total).*$");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"utilization.total"};
String[] scratch = {};
String[] output = new String[3];
String[][] readWriteArea = {input, scratch, output};
TransformResult tr = transform.transform(readWriteArea);
assertEquals(TransformResult.EXCLUDE, tr);
readWriteArea[0] = new String[]{"latency.total"};
tr = transform.transform(readWriteArea);
assertEquals(TransformResult.OK, tr);
}
}

View File

@ -1,448 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import org.elasticsearch.test.ESTestCase;
// TODO Reimplement
public class HighestRegisteredDomainTests extends ESTestCase {
// private void checkHighestRegisteredDomain(String fullName, String
// registeredNameExpected)
// {
// InternetDomainName effectiveTLD = InternetDomainName.from(fullName);
//
// effectiveTLD = effectiveTLD.topPrivateDomain();
// assertTrue(effectiveTLD.isTopPrivateDomain());
// String registeredName = effectiveTLD.toString();
//
// assertEquals(registeredNameExpected, registeredName);
// }
//
// private void checkIsPublicSuffix(String suffix)
// {
// InternetDomainName effectiveTLD = InternetDomainName.from(suffix);
// assertTrue(effectiveTLD.isPublicSuffix());
// }
//
// private void testDomainSplit(String subDomainExpected,
// String domainExpected, String hostName)
// {
// HighestRegisteredDomain.DomainSplit split =
// HighestRegisteredDomain.lookup(hostName);
//
// assertEquals(subDomainExpected, split.getSubDomain());
// assertEquals(domainExpected, split.getHighestRegisteredDomain());
// }
//
// @Test
// public void testDomainSplit()
// {
// testDomainSplit("", "", "");
// testDomainSplit("", "", ".");
//
// // Test cases from
// https://github.com/john-kurkowski/tldextract/tree/master/tldextract/tests
// testDomainSplit("www", "google.com", "www.google.com");
// testDomainSplit("www.maps", "google.co.uk", "www.maps.google.co.uk");
// testDomainSplit("www", "theregister.co.uk", "www.theregister.co.uk");
// testDomainSplit("", "gmail.com", "gmail.com");
// testDomainSplit("media.forums", "theregister.co.uk",
// "media.forums.theregister.co.uk");
// testDomainSplit("www", "www.com", "www.www.com");
// testDomainSplit("", "www.com", "www.com");
// testDomainSplit("", "internalunlikelyhostname",
// "internalunlikelyhostname");
// testDomainSplit("internalunlikelyhostname", "bizarre",
// "internalunlikelyhostname.bizarre");
// testDomainSplit("", "internalunlikelyhostname.info",
// "internalunlikelyhostname.info"); // .info is a valid TLD
// testDomainSplit("internalunlikelyhostname", "information",
// "internalunlikelyhostname.information");
// testDomainSplit("", "216.22.0.192", "216.22.0.192");
// testDomainSplit("", "::1", "::1");
// testDomainSplit("", "FE80:0000:0000:0000:0202:B3FF:FE1E:8329",
// "FE80:0000:0000:0000:0202:B3FF:FE1E:8329");
// testDomainSplit("216.22", "project.coop", "216.22.project.coop");
// testDomainSplit("www", "xn--h1alffa9f.xn--p1ai",
// "www.xn--h1alffa9f.xn--p1ai");
// testDomainSplit("", "", "");
// testDomainSplit("www", "parliament.uk", "www.parliament.uk");
// testDomainSplit("www", "parliament.co.uk", "www.parliament.co.uk");
// testDomainSplit("www.a", "cgs.act.edu.au", "www.a.cgs.act.edu.au");
// testDomainSplit("www", "google.com.au", "www.google.com.au");
// testDomainSplit("www", "metp.net.cn", "www.metp.net.cn");
// testDomainSplit("www", "waiterrant.blogspot.com",
// "www.waiterrant.blogspot.com");
//
// testDomainSplit("", "kittens.blogspot.co.uk", "kittens.blogspot.co.uk");
// testDomainSplit("", "ml.s3.amazonaws.com",
// "ml.s3.amazonaws.com");
// testDomainSplit("daves_bucket", "ml.s3.amazonaws.com",
// "daves_bucket.ml.s3.amazonaws.com");
//
// testDomainSplit("example", "example", "example.example");
// testDomainSplit("b.example", "example", "b.example.example");
// testDomainSplit("a.b.example", "example", "a.b.example.example");
//
// testDomainSplit("example", "local", "example.local");
// testDomainSplit("b.example", "local", "b.example.local");
// testDomainSplit("a.b.example", "local", "a.b.example.local");
//
// testDomainSplit("r192494180984795-1-1041782-channel-live.ums",
// "ustream.tv", "r192494180984795-1-1041782-channel-live.ums.ustream.tv");
//
// testDomainSplit("192.168.62.9", "prelert.com",
// "192.168.62.9.prelert.com");
//
// // These are not a valid DNS names
// testDomainSplit("kerberos.http.192.168", "62.222",
// "kerberos.http.192.168.62.222");
// testDomainSplit("192.168", "62.9\143\127", "192.168.62.9\143\127");
// }
//
// @Test
// public void testTooLongDnsName()
// {
// // no part of the DNS name can be longer than 63 octets
// String dnsLongerThan254Chars =
// "davesbucketdavesbucketdavesbucketdavesbucketdavesbucketdaves.bucketdavesbucketdavesbucketdavesbucketdavesbucketdaves.bucketdav
// esbucketdavesbucketdavesbucketdavesbucket.davesbucketdavesbucketdavesbucketdavesbucket.davesbucketdavesbucket.ml.s3.a
// mazonaws.com";
// String hrd = "ml.s3.amazonaws.com";
// testDomainSplit(dnsLongerThan254Chars.substring(0,
// dnsLongerThan254Chars.length() - (hrd.length() + 1)),
// hrd, dnsLongerThan254Chars);
//
// // this one needs sanitising
// dnsLongerThan254Chars =
// "_davesbucketdavesbucketdavesbucketdavesbucket-davesbucketdaves.-bucketdavesbucketdavesbucketdavesbucketdavesbucketdaves.bucket
// davesbucketdavesbucketdavesbucketdavesbucket.davesbucketdavesbucketdavesbucketdavesbucket.davesbucketdavesbucket.ml.s3.ama
// zonaws.com";
// hrd = "ml.s3.amazonaws.com";
// testDomainSplit(dnsLongerThan254Chars.substring(0,
// dnsLongerThan254Chars.length() - (hrd.length() + 1)),
// hrd, dnsLongerThan254Chars);
//
// String bad =
// "0u1aof\209\1945\188hI4\236\197\205J\244\188\247\223\190F\2135\229gVE7\230i\215\231\205Qzay\225UJ\192pw\216\231\204\194\216\
// 193QV4g\196\207Whpvx.fVxl\194BjA\245kbYk\211XG\235\198\218B\252\219\225S\197\217I\2538n\229\244\213\252\215Ly\226NW\242\248\
// 244Q\220\245\221c\207\189\205Hxq5\224\240.\189Jt4\243\245t\244\198\199p\210\1987r\2050L\239sR0M\190w\238\223\234L\226\2242D\233
// \210\206\195h\199\206tA\214J\192C\224\191b\188\201\251\198M\244h\206.\198\242l\2114\191JBU\198h\207\215w\243\228R\1924\242\208\19
// 1CV\208p\197gDW\198P\217\195X\191Fp\196\197J\193\245\2070\196zH\197\243\253g\239.adz.beacon.base.net";
// hrd = "base.net";
// testDomainSplit(bad.substring(0, bad.length() - (hrd.length() +1)), hrd,
// bad);
// }
//
// @Test
// public void testDomainSplit_SanitisedDomains()
// {
// testDomainSplit("_example", "local", "_example.local");
// testDomainSplit("www._maps", "google.co.uk", "www._maps.google.co.uk");
// testDomainSplit("-forum", "theregister.co.uk",
// "-forum.theregister.co.uk");
//
// testDomainSplit("www._yourmp", "parliament.uk",
// "www._yourmp.parliament.uk");
// testDomainSplit("www.-a", "cgs.act.edu.au", "www.-a.cgs.act.edu.au");
//
// testDomainSplit("", "-foundation.org", "-foundation.org");
// testDomainSplit("www", "-foundation.org", "www.-foundation.org");
// testDomainSplit("", "_nfsv4idmapdomain", "_nfsv4idmapdomain");
// testDomainSplit("_nfsv4idmapdomain", "prelert.com",
// "_nfsv4idmapdomain.prelert.com");
//
// testDomainSplit("lb._dns-sd._udp.0.123.168", "192.in-addr.arpa",
// "lb._dns-sd._udp.0.123.168.192.in-addr.arpa");
// testDomainSplit("_kerberos._http.192.168", "62.222",
// "_kerberos._http.192.168.62.222");
// }
//
// @Test
// public void testHighestRegisteredDomainCases()
// {
// // Any copyright is dedicated to the Public Domain.
// // http://creativecommons.org/publicdomain/zero/1.0/
//
// // Domain parts starting with _ aren't valid
// assertFalse(InternetDomainName.isValid("_nfsv4idmapdomain.prelert.com"));
//
// // Mixed case.
// checkIsPublicSuffix("COM");
// checkHighestRegisteredDomain("example.COM", "example.com");
// checkHighestRegisteredDomain("WwW.example.COM", "example.com");
//
// // These pass steve's test but fail here. Example isn't a valid
// (declared, not active) TLD
//// checkIsPublicSuffix("example");
//// checkTopLevelDomain("example.example", "example.example");
//// checkTopLevelDomain("b.example.example", "example.example");
//// checkTopLevelDomain("a.b.example.example", "example.example");
//
// // Listed, but non-Internet, TLD.
// // checkIsPublicSuffix("local"); // These pass Steve's tests but not
// public suffix here
// //checkIsPublicSuffix("example.local", "");
// //checkIsPublicSuffix("b.example.local", "");
// //checkIsPublicSuffix("a.b.example.local", "");
//
// // TLD with only 1 rule.
// checkIsPublicSuffix("biz");
// checkHighestRegisteredDomain("domain.biz", "domain.biz");
// checkHighestRegisteredDomain("b.domain.biz", "domain.biz");
// checkHighestRegisteredDomain("a.b.domain.biz", "domain.biz");
// // TLD with some 2-level rules.
// // checkPublicSuffix("com", "");
// checkHighestRegisteredDomain("example.com", "example.com");
// checkHighestRegisteredDomain("b.example.com", "example.com");
// checkHighestRegisteredDomain("a.b.example.com", "example.com");
// checkIsPublicSuffix("uk.com");
// checkHighestRegisteredDomain("example.uk.com", "example.uk.com");
// checkHighestRegisteredDomain("b.example.uk.com", "example.uk.com");
// checkHighestRegisteredDomain("a.b.example.uk.com", "example.uk.com");
// checkHighestRegisteredDomain("test.ac", "test.ac");
// // TLD with only 1 (wildcard) rule.
//
// // cy passes Steve's test but is not considered a valid TLD here
// // gov.cy is.
// checkIsPublicSuffix("gov.cy");
// checkHighestRegisteredDomain("c.gov.cy", "c.gov.cy"); // changed to pass
// test - inserted .gov, .net
// checkHighestRegisteredDomain("b.c.net.cy", "c.net.cy");
// checkHighestRegisteredDomain("a.b.c.net.cy", "c.net.cy");
//
// // More complex TLD.
// checkIsPublicSuffix("jp"); // jp is valid because you can have any 2nd
// level domain
// checkIsPublicSuffix("ac.jp");
// checkIsPublicSuffix("kyoto.jp");
// checkIsPublicSuffix("c.kobe.jp");
// checkIsPublicSuffix("ide.kyoto.jp");
// checkHighestRegisteredDomain("test.jp", "test.jp");
// checkHighestRegisteredDomain("www.test.jp", "test.jp");
// checkHighestRegisteredDomain("test.ac.jp", "test.ac.jp");
// checkHighestRegisteredDomain("www.test.ac.jp", "test.ac.jp");
// checkHighestRegisteredDomain("test.kyoto.jp", "test.kyoto.jp");
// checkHighestRegisteredDomain("b.ide.kyoto.jp", "b.ide.kyoto.jp");
// checkHighestRegisteredDomain("a.b.ide.kyoto.jp", "b.ide.kyoto.jp");
// checkHighestRegisteredDomain("b.c.kobe.jp", "b.c.kobe.jp");
// checkHighestRegisteredDomain("a.b.c.kobe.jp", "b.c.kobe.jp");
// checkHighestRegisteredDomain("city.kobe.jp", "city.kobe.jp");
// checkHighestRegisteredDomain("www.city.kobe.jp", "city.kobe.jp");
//
//
// // TLD with a wildcard rule and exceptions.
//// checkIsPublicSuffix("ck"); // Passes Steve's test but is not considered
// a valid TLD here
//// checkIsPublicSuffix("test.ck");
//// checkTopLevelDomain("b.test.ck", "b.test.ck");
//// checkTopLevelDomain("a.b.test.ck", "b.test.ck");
//// checkTopLevelDomain("www.ck", "www.ck");
//// checkTopLevelDomain("www.www.ck", "www.ck");
//
// // US K12.
// checkIsPublicSuffix("us");
// checkIsPublicSuffix("ak.us");
// checkIsPublicSuffix("k12.ak.us");
// checkHighestRegisteredDomain("test.us", "test.us");
// checkHighestRegisteredDomain("www.test.us", "test.us");
// checkHighestRegisteredDomain("test.ak.us", "test.ak.us");
// checkHighestRegisteredDomain("www.test.ak.us", "test.ak.us");
// checkHighestRegisteredDomain("test.k12.ak.us", "test.k12.ak.us");
// checkHighestRegisteredDomain("www.test.k12.ak.us", "test.k12.ak.us");
//
// // IDN labels.
// checkIsPublicSuffix("公司.cn");
// checkIsPublicSuffix("中国");
// checkHighestRegisteredDomain("食狮.com.cn", "食狮.com.cn");
// checkHighestRegisteredDomain("食狮.公司.cn", "食狮.公司.cn");
// checkHighestRegisteredDomain("www.食狮.公司.cn", "食狮.公司.cn");
// checkHighestRegisteredDomain("shishi.公司.cn", "shishi.公司.cn");
// checkHighestRegisteredDomain("食狮.中国", "食狮.中国");
// checkHighestRegisteredDomain("www.食狮.中国", "食狮.中国");
// checkHighestRegisteredDomain("shishi.中国", "shishi.中国");
//
// // Same as above, but punycoded.
// checkIsPublicSuffix("xn--55qx5d.cn");
// checkIsPublicSuffix("xn--fiqs8s");
// checkHighestRegisteredDomain("xn--85x722f.com.cn", "xn--85x722f.com.cn");
// checkHighestRegisteredDomain("xn--85x722f.xn--55qx5d.cn",
// "xn--85x722f.xn--55qx5d.cn");
// checkHighestRegisteredDomain("www.xn--85x722f.xn--55qx5d.cn",
// "xn--85x722f.xn--55qx5d.cn");
// checkHighestRegisteredDomain("shishi.xn--55qx5d.cn",
// "shishi.xn--55qx5d.cn");
// checkHighestRegisteredDomain("xn--85x722f.xn--fiqs8s",
// "xn--85x722f.xn--fiqs8s");
// checkHighestRegisteredDomain("www.xn--85x722f.xn--fiqs8s",
// "xn--85x722f.xn--fiqs8s");
// checkHighestRegisteredDomain("shishi.xn--fiqs8s", "shishi.xn--fiqs8s");
// }
//
// @Test
// public void testSanitiseDomainName()
// {
// String ok_domain = "nfsv4idmapdomain.prelert.com";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "nfsv4idmapdomain\u3002ml\uFF0Ecom";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "www.test.ac\uFF61jp";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "xn--85x722f.com.cn";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "x_n--85x722f.com.cn";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "食狮.com.cn";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
//
// String bad_domain = "_nfsv4idmapdomain.prelert.com";
// assertFalse(InternetDomainName.isValid(bad_domain));
// String sanitisedDomain =
// HighestRegisteredDomain.sanitiseDomainName(bad_domain);
// assertTrue(sanitisedDomain != ok_domain);
// assertEquals("p_nfsv4idmapdomain.pprelert.com", sanitisedDomain);
// assertEquals(bad_domain,
// HighestRegisteredDomain.desanitise(sanitisedDomain));
//
// bad_domain = "_www.test.ac\uFF61jp";
// assertFalse(InternetDomainName.isValid(bad_domain));
// sanitisedDomain = HighestRegisteredDomain.sanitiseDomainName(bad_domain);
// assertTrue(sanitisedDomain != ok_domain);
// assertEquals(HighestRegisteredDomain.replaceDots("p_www.test.ac\uFF61jp"),
// sanitisedDomain);
// assertEquals(HighestRegisteredDomain.replaceDots(bad_domain),
// HighestRegisteredDomain.desanitise(sanitisedDomain));
//
// bad_domain = "_xn--85x722f.com.cn";
// assertFalse(InternetDomainName.isValid(bad_domain));
// sanitisedDomain = HighestRegisteredDomain.sanitiseDomainName(bad_domain);
// assertTrue(sanitisedDomain != ok_domain);
// assertEquals("p_xn--85x722f.com.cn", sanitisedDomain);
// assertEquals(bad_domain,
// HighestRegisteredDomain.desanitise(sanitisedDomain));
//
// bad_domain = "-foundation.org";
// assertFalse(InternetDomainName.isValid(bad_domain));
// sanitisedDomain = HighestRegisteredDomain.sanitiseDomainName(bad_domain);
// assertTrue(sanitisedDomain != ok_domain);
// assertEquals("p-foundation.org", sanitisedDomain);
// assertEquals(bad_domain,
// HighestRegisteredDomain.desanitise(sanitisedDomain));
// }
//
// /**
// * Get sub domain only
// * @throws TransformException
// */
// @Test
// public void testTransform_SingleOutput() throws TransformException
// {
// List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0,
// 2));
// List<TransformIndex> writeIndexes = createIndexArray(new
// TransformIndex(2, 0));
//
// HighestRegisteredDomain transform = new HighestRegisteredDomain(
// readIndexes, writeIndexes, mock(Logger.class));
//
// String [] input = {"", "", "www.test.ac.jp"};
// String [] scratch = {};
// String [] output = new String [2];
// String [][] readWriteArea = {input, scratch, output};
//
// transform.transform(readWriteArea);
// assertEquals("www", output[0]);
// assertNull(output[1]);
//
// input[2] = "a.b.domain.biz";
// transform.transform(readWriteArea);
// assertEquals("a.b", output[0]);
// assertNull(output[1]);
// }
//
//
//
// @Test
// public void testTransform_AllOutputs() throws TransformException
// {
// List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0,
// 2));
// List<TransformIndex> writeIndexes = createIndexArray(new
// TransformIndex(2, 0), new TransformIndex(2, 1));
//
// HighestRegisteredDomain transform = new HighestRegisteredDomain(
// readIndexes, writeIndexes, mock(Logger.class));
//
//
// String [] input = {"", "", "www.test.ac.jp"};
// String [] scratch = {};
// String [] output = new String [2];
// String [][] readWriteArea = {input, scratch, output};
//
// transform.transform(readWriteArea);
// assertEquals("www", output[0]);
// assertEquals("test.ac.jp", output[1]);
//
// input[2] = "a.b.domain.biz";
// transform.transform(readWriteArea);
// assertEquals("a.b", output[0]);
// assertEquals("domain.biz", output[1]);
// }
//
// @Test
// public void testTransformTrimWhiteSpace() throws TransformException
// {
// List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1,
// 2));
// List<TransformIndex> writeIndexes = createIndexArray(new
// TransformIndex(2, 0), new TransformIndex(2, 1));
//
// HighestRegisteredDomain transform = new HighestRegisteredDomain(
// readIndexes, writeIndexes, mock(Logger.class));
//
// String [] input = {};
// String [] scratch = {"", "", " time.apple.com "};
// String [] output = new String [2];
// String [][] readWriteArea = {input, scratch, output};
//
// transform.transform(readWriteArea);
// assertEquals("time", output[0]);
// assertEquals("apple.com", output[1]);
// }
//
// @Test
// public void testTransform_WriteToScratch() throws TransformException
// {
// List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1,
// 2));
// List<TransformIndex> writeIndexes = createIndexArray(new
// TransformIndex(2, 0), new TransformIndex(2, 1));
//
// HighestRegisteredDomain transform = new HighestRegisteredDomain(
// readIndexes, writeIndexes, mock(Logger.class));
//
// String [] input = {};
// String [] scratch = {"", "", " time.apple.com "};
// String [] output = new String [2];
// String [][] readWriteArea = {input, scratch, output};
//
// assertEquals(TransformResult.OK, transform.transform(readWriteArea));
// assertEquals("time", output[0]);
// assertEquals("apple.com", output[1]);
// }
}

View File

@ -1,40 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class RegexExtractTests extends ESTestCase {
public void testTransform() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0),
new TransformIndex(2, 1), new TransformIndex(2, 2));
String regex = "Tag=\"Windfarm ([0-9]+)\\.Turbine ([0-9]+)\\.(.*)\"";
RegexExtract transform = new RegexExtract(regex, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"Tag=\"Windfarm 04.Turbine 06.Temperature\""};
String[] scratch = {};
String[] output = new String[3];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertEquals("04", output[0]);
assertEquals("06", output[1]);
assertEquals("Temperature", output[2]);
}
}

View File

@ -1,54 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class RegexSplitTests extends ESTestCase {
public void testTransform() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0),
new TransformIndex(2, 1), new TransformIndex(2, 2));
String regex = ":";
RegexSplit transform = new RegexSplit(regex, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"A:B:C"};
String[] scratch = {};
String[] output = new String[3];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertArrayEquals(readWriteArea[2], new String[]{"A", "B", "C"});
readWriteArea[0] = new String[]{"A:B:C:D"};
readWriteArea[2] = new String[]{"", "", ""};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertArrayEquals(readWriteArea[2], new String[]{"A", "B", "C"});
readWriteArea[0] = new String[]{"A"};
readWriteArea[2] = new String[]{""};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertArrayEquals(readWriteArea[2], new String[]{"A"});
readWriteArea[0] = new String[]{""};
readWriteArea[2] = new String[]{""};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertArrayEquals(readWriteArea[2], new String[]{""});
}
}

View File

@ -1,169 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
import java.util.List;
import java.util.Locale;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
public class StringTransformTests extends ESTestCase {
public void testUpperCaseTransform_GivenZeroInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray();
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createUpperCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testUpperCaseTransform_GivenTwoInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(
new TransformIndex(0, 0), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createUpperCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testUpperCaseTransform_GivenZeroOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray();
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createUpperCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testUpperCaseTransform_GivenTwoOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(
new TransformIndex(1, 1), new TransformIndex(1, 2));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createUpperCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testUpperCaseTransform_GivenSingleInputAndSingleOutput() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
StringTransform upperCase = StringTransform.createUpperCase(readIndexes, writeIndexes,
mock(Logger.class));
String[] input = {"aa", "aBcD", "cc", "dd", "ee"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, upperCase.transform(readWriteArea));
assertEquals("aBcD".toUpperCase(Locale.ROOT), output[0]);
}
public void testLowerCaseTransform_GivenZeroInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray();
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createLowerCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testLowerCaseTransform_GivenTwoInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(
new TransformIndex(0, 0), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createLowerCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testLowerCaseTransform_GivenZeroOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray();
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createLowerCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testLowerCaseTransform_GivenTwoOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(
new TransformIndex(1, 1), new TransformIndex(1, 2));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createLowerCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testLowerCaseTransform_GivenSingleInputAndSingleOutput() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
StringTransform upperCase = StringTransform.createLowerCase(readIndexes, writeIndexes,
mock(Logger.class));
String[] input = {"aa", "AbCde", "cc", "dd", "ee"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, upperCase.transform(readWriteArea));
assertEquals("AbCde".toLowerCase(Locale.ROOT), output[0]);
}
public void testTrimTransform_GivenZeroInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray();
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createTrim(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTrimTransform_GivenTwoInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(
new TransformIndex(0, 0), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createTrim(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTrimTransform_GivenZeroOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray();
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createTrim(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTrimTransform_GivenTwoOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(
new TransformIndex(1, 1), new TransformIndex(1, 2));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createTrim(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTrimTransform_GivenSingleInputAndSingleOutput() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
StringTransform upperCase = StringTransform.createTrim(readIndexes, writeIndexes,
mock(Logger.class));
String[] input = {" a ", "\t b ", " c", "d", "e"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, upperCase.transform(readWriteArea));
assertEquals("\t b".trim(), output[0]);
}
}

View File

@ -1,127 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.mockito.Mockito.mock;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
public class TransformFactoryTests extends ESTestCase {
public void testIndexesMapping() {
TransformConfig conf = new TransformConfig(TransformType.CONCAT.prettyName());
conf.setInputs(Arrays.asList("field1", "field2"));
conf.setOutputs(Arrays.asList("concatted"));
Map<String, Integer> inputMap = new HashMap<>();
inputMap.put("field1", 5);
inputMap.put("field2", 3);
Map<String, Integer> scratchMap = new HashMap<>();
Map<String, Integer> outputMap = new HashMap<>();
outputMap.put("concatted", 2);
Transform tr = new TransformFactory().create(conf, inputMap, scratchMap,
outputMap, mock(Logger.class));
assertTrue(tr instanceof Concat);
List<TransformIndex> inputIndexes = tr.getReadIndexes();
assertEquals(inputIndexes.get(0), new TransformIndex(0, 5));
assertEquals(inputIndexes.get(1), new TransformIndex(0, 3));
List<TransformIndex> outputIndexes = tr.getWriteIndexes();
assertEquals(outputIndexes.get(0), new TransformIndex(2, 2));
}
public void testConcatWithOptionalArgs() {
TransformConfig conf = new TransformConfig(TransformType.CONCAT.prettyName());
conf.setInputs(Arrays.asList("field1", "field2"));
conf.setOutputs(Arrays.asList("concatted"));
Map<String, Integer> inputMap = new HashMap<>();
inputMap.put("field1", 5);
inputMap.put("field2", 3);
Map<String, Integer> scratchMap = new HashMap<>();
Map<String, Integer> outputMap = new HashMap<>();
outputMap.put("concatted", 2);
Transform tr = new TransformFactory().create(conf, inputMap, scratchMap,
outputMap, mock(Logger.class));
assertTrue(tr instanceof Concat);
assertEquals("", ((Concat) tr).getDelimiter());
conf.setArguments(Arrays.asList("delimiter"));
tr = new TransformFactory().create(conf, inputMap, scratchMap,
outputMap, mock(Logger.class));
assertTrue(tr instanceof Concat);
assertEquals("delimiter", ((Concat) tr).getDelimiter());
}
public void testAllTypesCreated() {
EnumSet<TransformType> all = EnumSet.allOf(TransformType.class);
Map<String, Integer> inputIndexes = new HashMap<>();
Map<String, Integer> scratchMap = new HashMap<>();
Map<String, Integer> outputIndexes = new HashMap<>();
for (TransformType type : all) {
TransformConfig conf = TransformTestUtils.createValidTransform(type);
conf.getInputs().stream().forEach(input -> inputIndexes.put(input, 0));
conf.getOutputs().stream().forEach(output -> outputIndexes.put(output, 0));
// throws IllegalArgumentException if it doesn't handle the type
new TransformFactory().create(conf, inputIndexes, scratchMap,
outputIndexes, mock(Logger.class));
}
}
public void testExcludeTransformsCreated() {
Map<String, Integer> inputIndexes = new HashMap<>();
Map<String, Integer> scratchMap = new HashMap<>();
Map<String, Integer> outputIndexes = new HashMap<>();
TransformConfig conf = new TransformConfig(TransformType.EXCLUDE.prettyName());
conf.setInputs(new ArrayList<>());
conf.setOutputs(new ArrayList<>());
conf.setCondition(new Condition(Operator.LT, "2000"));
ExcludeFilterNumeric numericTransform =
(ExcludeFilterNumeric) new TransformFactory().create(conf, inputIndexes,
scratchMap, outputIndexes, mock(Logger.class));
assertEquals(Operator.LT, numericTransform.getCondition().getOperator());
assertEquals(2000, numericTransform.filterValue(), 0.0000001);
conf.setCondition(new Condition(Operator.MATCH, "aaaaa"));
ExcludeFilterRegex regexTransform =
(ExcludeFilterRegex) new TransformFactory().create(conf, inputIndexes,
scratchMap, outputIndexes, mock(Logger.class));
assertEquals(Operator.MATCH, regexTransform.getCondition().getOperator());
assertEquals("aaaaa", regexTransform.getCondition().getValue());
}
}

View File

@ -1,83 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiFunction;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.job.config.transform.IntRange;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
public final class TransformTestUtils {
private TransformTestUtils() {
}
public static List<TransformIndex> createIndexArray(TransformIndex... indexs) {
List<TransformIndex> result = new ArrayList<Transform.TransformIndex>();
for (TransformIndex i : indexs) {
result.add(i);
}
return result;
}
public static TransformConfig createValidTransform(TransformType type) {
List<String> inputs = createValidArgs(type.arityRange(), type,
(arg, t) -> Integer.toString(arg));
List<String> args = createValidArgs(type.argumentsRange(), type,
TransformTestUtils::createValidArgument);
List<String> outputs = createValidArgs(type.outputsRange(), type,
(arg, t) -> Integer.toString(arg));
Condition condition = null;
if (type.hasCondition()) {
condition = new Condition(Operator.EQ, "100");
}
TransformConfig tr = new TransformConfig(type.toString());
tr.setInputs(inputs);
tr.setArguments(args);
tr.setOutputs(outputs);
tr.setCondition(condition);
return tr;
}
private static List<String> createValidArgs(IntRange range, TransformType type,
BiFunction<Integer, TransformType, String> argumentCreator) {
List<String> args = new ArrayList<>();
int validCount = getValidCount(range);
for (int arg = 0; arg < validCount; ++arg) {
args.add(argumentCreator.apply(arg, type));
}
return args;
}
private static String createValidArgument(int argNumber, TransformType type) {
switch (type) {
case REGEX_EXTRACT:
return Integer.toString(argNumber) + ".Foo ([0-9]+)";
case CONCAT:
case DOMAIN_SPLIT:
case EXCLUDE:
case LOWERCASE:
case REGEX_SPLIT:
case TRIM:
case UPPERCASE:
return Integer.toString(argNumber);
default:
throw new IllegalArgumentException();
}
}
private static int getValidCount(IntRange range) {
return range.hasUpperBound() ? range.upper() : range.lower();
}
}

View File

@ -1,121 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.TransformException;
import java.util.Collections;
import java.util.List;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
public class DateFormatTransformTests extends ESTestCase {
public void testTransform_GivenValidTimestamp() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("yyyy-MM-dd HH:mm:ss.SSSXXX",
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"2014-01-01 13:42:56.500Z"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
assertEquals(1388583776500L, transformer.epochMs());
assertEquals("1388583776", output[0]);
}
public void testTransform_GivenInvalidFormat() throws TransformException {
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class,
() -> new DateFormatTransform("yyyy-MM HH:mm:ss", Collections.emptyList(), Collections.emptyList(), mock(Logger.class)));
assertEquals("Timestamp cannot be derived from pattern: yyyy-MM HH:mm:ss", e.getMessage());
}
public void testTransform_GivenInvalidTimestamp() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("yyyy-MM-dd HH:mm:ss", readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"invalid"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
ParseTimestampException e = ESTestCase.expectThrows(ParseTimestampException.class,
() -> transformer.transform(readWriteArea));
assertEquals("Cannot parse date 'invalid' with format string 'yyyy-MM-dd HH:mm:ss'", e.getMessage());
}
public void testTransform_GivenNull() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("yyyy-MM-dd HH:mm:ss", readIndexes, writeIndexes, mock(Logger.class));
String[] input = {};
String[] scratch = {null};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
ESTestCase.expectThrows(ParseTimestampException.class, () -> transformer.transform(readWriteArea));
}
public void testTransform_GivenBadFormat() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> new DateFormatTransform("e", readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTransform_FromScratchArea() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("yyyy-MM-dd HH:mm:ssXXX", readIndexes, writeIndexes, mock(Logger.class));
String[] input = {};
String[] scratch = {"2014-01-01 00:00:00Z"};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
assertEquals(1388534400000L, transformer.epochMs());
assertEquals("1388534400", output[0]);
}
public void testTransform_WithBrackets() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("'['yyyy-MM-dd HH:mm:ssX']'",
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"[2014-06-23 00:00:00Z]"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
}
}

View File

@ -1,89 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.TransformException;
public class DoubleDateTransformTests extends ESTestCase {
public void testTransform_GivenTimestampIsNotMilliseconds() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DoubleDateTransform transformer = new DoubleDateTransform(false,
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"1000"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
assertEquals(1000000, transformer.epochMs());
assertEquals("1000", output[0]);
}
public void testTransform_GivenTimestampIsMilliseconds() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DoubleDateTransform transformer = new DoubleDateTransform(true,
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"1000"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
assertEquals(1000, transformer.epochMs());
assertEquals("1", output[0]);
}
public void testTransform_GivenTimestampIsNotValidDouble() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DoubleDateTransform transformer = new DoubleDateTransform(false,
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"invalid"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
ParseTimestampException e = ESTestCase.expectThrows(ParseTimestampException.class,
() -> transformer.transform(readWriteArea));
assertEquals("Cannot parse timestamp 'invalid' as epoch value", e.getMessage());
}
public void testTransform_InputFromScratchArea() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DoubleDateTransform transformer = new DoubleDateTransform(false,
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {};
String[] scratch = {"1000"};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
}
}

View File

@ -1,14 +0,0 @@
{
"xpack.ml.validate_transform": {
"methods": [ "POST" ],
"url": {
"path": "/_xpack/ml/_validate/transform",
"paths": [ "/_xpack/ml/_validate/transform" ],
"params": {}
},
"body": {
"description" : "The transform",
"required" : true
}
}
}

View File

@ -1,14 +0,0 @@
{
"xpack.ml.validate_transforms": {
"methods": [ "POST" ],
"url": {
"path": "/_xpack/ml/_validate/transforms",
"paths": [ "/_xpack/ml/_validate/transforms" ],
"params": {}
},
"body": {
"description" : "The transforms",
"required" : true
}
}
}

View File

@ -1,63 +0,0 @@
---
"Test valid transform":
- do:
xpack.ml.validate_transform:
body: >
{
"transform":"concat",
"inputs": [ "one", "two" ],
"outputs": [ "oneplustwo" ]
}
- match: { acknowledged: true }
---
"Test invalid transform":
- do:
catch: /Transform type concat expected \[2‥\+∞\) input\(s\), got 1/
xpack.ml.validate_transform:
body: >
{
"transform":"concat",
"inputs": [ "justone" ],
"outputs": [ "stilljustone" ]
}
---
"Test valid transforms":
- do:
xpack.ml.validate_transforms:
body: >
{
"transforms": [
{
"transform":"concat",
"inputs": [ "one", "two" ],
"outputs": [ "oneplustwo" ]
},
{
"transform":"domain_split",
"inputs": [ "domain" ],
"outputs": [ "sub_domain", "highest_registered_domain" ]
}
]
}
- match: { acknowledged: true }
---
"Test invalid transforms":
- do:
catch: /Transform type concat with inputs \[one, two\] has a circular dependency/
xpack.ml.validate_transforms:
body: >
{
"transforms": [
{
"transform":"concat",
"inputs": [ "one", "two" ],
"outputs": [ "three" ]
},
{
"transform":"concat",
"inputs": [ "two", "three" ],
"outputs": [ "one" ]
}
]
}