Removing transforms and the SINGLE_LINE input format (elastic/elasticsearch#790)

Most transforms will be replaced with Painless scripts.

The exception is the DateTransform, whose functionality is now simplified
to what existed before the other transforms were added.

The SINGLE_LINE format relied on transforms to extract fields, so has also
been removed, but this is reasonable as it strays into Logstash territory.

Relates elastic/elasticsearch#630

Closes elastic/elasticsearch#39

Original commit: elastic/x-pack-elasticsearch@a593d3e0ad
This commit is contained in:
David Roberts 2017-01-25 15:51:50 +00:00 committed by GitHub
parent 99c9d3733f
commit 4b366f8ef6
77 changed files with 265 additions and 6045 deletions

View File

@ -63,8 +63,6 @@ import org.elasticsearch.xpack.ml.action.UpdateDatafeedStatusAction;
import org.elasticsearch.xpack.ml.action.UpdateJobStatusAction; import org.elasticsearch.xpack.ml.action.UpdateJobStatusAction;
import org.elasticsearch.xpack.ml.action.UpdateModelSnapshotAction; import org.elasticsearch.xpack.ml.action.UpdateModelSnapshotAction;
import org.elasticsearch.xpack.ml.action.ValidateDetectorAction; import org.elasticsearch.xpack.ml.action.ValidateDetectorAction;
import org.elasticsearch.xpack.ml.action.ValidateTransformAction;
import org.elasticsearch.xpack.ml.action.ValidateTransformsAction;
import org.elasticsearch.xpack.ml.datafeed.DatafeedJobRunner; import org.elasticsearch.xpack.ml.datafeed.DatafeedJobRunner;
import org.elasticsearch.xpack.ml.job.JobManager; import org.elasticsearch.xpack.ml.job.JobManager;
import org.elasticsearch.xpack.ml.job.metadata.MlInitializationService; import org.elasticsearch.xpack.ml.job.metadata.MlInitializationService;
@ -112,8 +110,6 @@ import org.elasticsearch.xpack.ml.rest.results.RestGetCategoriesAction;
import org.elasticsearch.xpack.ml.rest.results.RestGetInfluencersAction; import org.elasticsearch.xpack.ml.rest.results.RestGetInfluencersAction;
import org.elasticsearch.xpack.ml.rest.results.RestGetRecordsAction; import org.elasticsearch.xpack.ml.rest.results.RestGetRecordsAction;
import org.elasticsearch.xpack.ml.rest.validate.RestValidateDetectorAction; import org.elasticsearch.xpack.ml.rest.validate.RestValidateDetectorAction;
import org.elasticsearch.xpack.ml.rest.validate.RestValidateTransformAction;
import org.elasticsearch.xpack.ml.rest.validate.RestValidateTransformsAction;
import org.elasticsearch.xpack.ml.utils.NamedPipeHelper; import org.elasticsearch.xpack.ml.utils.NamedPipeHelper;
import java.io.IOException; import java.io.IOException;
@ -255,8 +251,6 @@ public class MlPlugin extends Plugin implements ActionPlugin {
new RestCloseJobAction(settings, restController), new RestCloseJobAction(settings, restController),
new RestFlushJobAction(settings, restController), new RestFlushJobAction(settings, restController),
new RestValidateDetectorAction(settings, restController), new RestValidateDetectorAction(settings, restController),
new RestValidateTransformAction(settings, restController),
new RestValidateTransformsAction(settings, restController),
new RestGetCategoriesAction(settings, restController), new RestGetCategoriesAction(settings, restController),
new RestGetModelSnapshotsAction(settings, restController), new RestGetModelSnapshotsAction(settings, restController),
new RestRevertModelSnapshotAction(settings, restController), new RestRevertModelSnapshotAction(settings, restController),
@ -295,8 +289,6 @@ public class MlPlugin extends Plugin implements ActionPlugin {
new ActionHandler<>(CloseJobAction.INSTANCE, CloseJobAction.TransportAction.class), new ActionHandler<>(CloseJobAction.INSTANCE, CloseJobAction.TransportAction.class),
new ActionHandler<>(FlushJobAction.INSTANCE, FlushJobAction.TransportAction.class), new ActionHandler<>(FlushJobAction.INSTANCE, FlushJobAction.TransportAction.class),
new ActionHandler<>(ValidateDetectorAction.INSTANCE, ValidateDetectorAction.TransportAction.class), new ActionHandler<>(ValidateDetectorAction.INSTANCE, ValidateDetectorAction.TransportAction.class),
new ActionHandler<>(ValidateTransformAction.INSTANCE, ValidateTransformAction.TransportAction.class),
new ActionHandler<>(ValidateTransformsAction.INSTANCE, ValidateTransformsAction.TransportAction.class),
new ActionHandler<>(GetCategoriesAction.INSTANCE, GetCategoriesAction.TransportAction.class), new ActionHandler<>(GetCategoriesAction.INSTANCE, GetCategoriesAction.TransportAction.class),
new ActionHandler<>(GetModelSnapshotsAction.INSTANCE, GetModelSnapshotsAction.TransportAction.class), new ActionHandler<>(GetModelSnapshotsAction.INSTANCE, GetModelSnapshotsAction.TransportAction.class),
new ActionHandler<>(RevertModelSnapshotAction.INSTANCE, RevertModelSnapshotAction.TransportAction.class), new ActionHandler<>(RevertModelSnapshotAction.INSTANCE, RevertModelSnapshotAction.TransportAction.class),

View File

@ -1,164 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.action;
import org.elasticsearch.action.Action;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestBuilder;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.HandledTransportAction;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.ElasticsearchClient;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigVerifier;
import java.io.IOException;
import java.util.Objects;
public class ValidateTransformAction
extends Action<ValidateTransformAction.Request, ValidateTransformAction.Response, ValidateTransformAction.RequestBuilder> {
public static final ValidateTransformAction INSTANCE = new ValidateTransformAction();
public static final String NAME = "cluster:admin/ml/validate/transform";
protected ValidateTransformAction() {
super(NAME);
}
@Override
public RequestBuilder newRequestBuilder(ElasticsearchClient client) {
return new RequestBuilder(client, INSTANCE);
}
@Override
public Response newResponse() {
return new Response();
}
public static class RequestBuilder extends ActionRequestBuilder<Request, Response, RequestBuilder> {
protected RequestBuilder(ElasticsearchClient client, ValidateTransformAction action) {
super(client, action, new Request());
}
}
public static class Request extends ActionRequest implements ToXContent {
private TransformConfig transform;
public static Request parseRequest(XContentParser parser) {
TransformConfig transform = TransformConfig.PARSER.apply(parser, null);
return new Request(transform);
}
Request() {
this.transform = null;
}
public Request(TransformConfig transform) {
this.transform = transform;
}
public TransformConfig getTransform() {
return transform;
}
@Override
public ActionRequestValidationException validate() {
return null;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
transform.writeTo(out);
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
transform = new TransformConfig(in);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
transform.toXContent(builder, params);
return builder;
}
@Override
public int hashCode() {
return Objects.hash(transform);
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Request other = (Request) obj;
return Objects.equals(transform, other.transform);
}
}
public static class Response extends AcknowledgedResponse {
public Response() {
super();
}
public Response(boolean acknowledged) {
super(acknowledged);
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
readAcknowledged(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
writeAcknowledged(out);
}
}
public static class TransportAction extends HandledTransportAction<Request, Response> {
@Inject
public TransportAction(Settings settings, TransportService transportService, ClusterService clusterService, ThreadPool threadPool,
ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver) {
super(settings, ValidateTransformAction.NAME, threadPool, transportService, actionFilters, indexNameExpressionResolver,
Request::new);
}
@Override
protected void doExecute(Request request, ActionListener<Response> listener) {
TransformConfigVerifier.verify(request.getTransform());
listener.onResponse(new Response(true));
}
}
}

View File

@ -1,173 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.action;
import org.elasticsearch.action.Action;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestBuilder;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.HandledTransportAction;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.ElasticsearchClient;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
public class ValidateTransformsAction
extends Action<ValidateTransformsAction.Request, ValidateTransformsAction.Response, ValidateTransformsAction.RequestBuilder> {
public static final ValidateTransformsAction INSTANCE = new ValidateTransformsAction();
public static final String NAME = "cluster:admin/ml/validate/transforms";
protected ValidateTransformsAction() {
super(NAME);
}
@Override
public RequestBuilder newRequestBuilder(ElasticsearchClient client) {
return new RequestBuilder(client, INSTANCE);
}
@Override
public Response newResponse() {
return new Response();
}
public static class RequestBuilder extends ActionRequestBuilder<Request, Response, RequestBuilder> {
protected RequestBuilder(ElasticsearchClient client, ValidateTransformsAction action) {
super(client, action, new Request());
}
}
public static class Request extends ActionRequest implements ToXContent {
public static final ParseField TRANSFORMS = new ParseField("transforms");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<Request, Void> PARSER = new ConstructingObjectParser<>(NAME,
a -> new Request((List<TransformConfig>) a[0]));
static {
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), TransformConfig.PARSER, TRANSFORMS);
}
private List<TransformConfig> transforms;
Request() {
this.transforms = null;
}
public Request(List<TransformConfig> transforms) {
this.transforms = transforms;
}
public List<TransformConfig> getTransforms() {
return transforms;
}
@Override
public ActionRequestValidationException validate() {
return null;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeList(transforms);
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
transforms = in.readList(TransformConfig::new);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.array(TRANSFORMS.getPreferredName(), transforms.toArray(new Object[transforms.size()]));
builder.endObject();
return builder;
}
@Override
public int hashCode() {
return Objects.hash(transforms);
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Request other = (Request) obj;
return Objects.equals(transforms, other.transforms);
}
}
public static class Response extends AcknowledgedResponse {
public Response() {
super();
}
public Response(boolean acknowledged) {
super(acknowledged);
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
readAcknowledged(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
writeAcknowledged(out);
}
}
public static class TransportAction extends HandledTransportAction<Request, Response> {
@Inject
public TransportAction(Settings settings, TransportService transportService, ClusterService clusterService, ThreadPool threadPool,
ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver) {
super(settings, ValidateTransformsAction.NAME, threadPool, transportService, actionFilters, indexNameExpressionResolver,
Request::new);
}
@Override
protected void doExecute(Request request, ActionListener<Response> listener) {
TransformConfigsVerifier.verify(request.getTransforms());
listener.onResponse(new Response(true));
}
}
}

View File

@ -41,8 +41,7 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
*/ */
public enum DataFormat implements Writeable { public enum DataFormat implements Writeable {
JSON("json"), JSON("json"),
DELIMITED("delimited"), DELIMITED("delimited");
SINGLE_LINE("single_line");
/** /**
* Delimited used to be called delineated. We keep supporting that for backwards * Delimited used to be called delineated. We keep supporting that for backwards

View File

@ -18,9 +18,6 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser.Token; import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.xpack.ml.job.messages.Messages; import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier;
import org.elasticsearch.xpack.ml.utils.MlStrings; import org.elasticsearch.xpack.ml.utils.MlStrings;
import org.elasticsearch.xpack.ml.utils.time.TimeUtils; import org.elasticsearch.xpack.ml.utils.time.TimeUtils;
@ -64,7 +61,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
public static final ParseField MODEL_SNAPSHOT_RETENTION_DAYS = new ParseField("model_snapshot_retention_days"); public static final ParseField MODEL_SNAPSHOT_RETENTION_DAYS = new ParseField("model_snapshot_retention_days");
public static final ParseField RESULTS_RETENTION_DAYS = new ParseField("results_retention_days"); public static final ParseField RESULTS_RETENTION_DAYS = new ParseField("results_retention_days");
public static final ParseField TIMEOUT = new ParseField("timeout"); public static final ParseField TIMEOUT = new ParseField("timeout");
public static final ParseField TRANSFORMS = new ParseField("transforms");
public static final ParseField MODEL_SNAPSHOT_ID = new ParseField("model_snapshot_id"); public static final ParseField MODEL_SNAPSHOT_ID = new ParseField("model_snapshot_id");
public static final ParseField INDEX_NAME = new ParseField("index_name"); public static final ParseField INDEX_NAME = new ParseField("index_name");
@ -107,7 +103,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
PARSER.declareObject(Builder::setAnalysisConfig, AnalysisConfig.PARSER, ANALYSIS_CONFIG); PARSER.declareObject(Builder::setAnalysisConfig, AnalysisConfig.PARSER, ANALYSIS_CONFIG);
PARSER.declareObject(Builder::setAnalysisLimits, AnalysisLimits.PARSER, ANALYSIS_LIMITS); PARSER.declareObject(Builder::setAnalysisLimits, AnalysisLimits.PARSER, ANALYSIS_LIMITS);
PARSER.declareObject(Builder::setDataDescription, DataDescription.PARSER, DATA_DESCRIPTION); PARSER.declareObject(Builder::setDataDescription, DataDescription.PARSER, DATA_DESCRIPTION);
PARSER.declareObjectArray(Builder::setTransforms, TransformConfig.PARSER, TRANSFORMS);
PARSER.declareObject(Builder::setModelDebugConfig, ModelDebugConfig.PARSER, MODEL_DEBUG_CONFIG); PARSER.declareObject(Builder::setModelDebugConfig, ModelDebugConfig.PARSER, MODEL_DEBUG_CONFIG);
PARSER.declareField(Builder::setIgnoreDowntime, (p, c) -> IgnoreDowntime.fromString(p.text()), IGNORE_DOWNTIME, ValueType.STRING); PARSER.declareField(Builder::setIgnoreDowntime, (p, c) -> IgnoreDowntime.fromString(p.text()), IGNORE_DOWNTIME, ValueType.STRING);
PARSER.declareLong(Builder::setTimeout, TIMEOUT); PARSER.declareLong(Builder::setTimeout, TIMEOUT);
@ -130,7 +125,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
private final AnalysisConfig analysisConfig; private final AnalysisConfig analysisConfig;
private final AnalysisLimits analysisLimits; private final AnalysisLimits analysisLimits;
private final DataDescription dataDescription; private final DataDescription dataDescription;
private final List<TransformConfig> transforms;
private final ModelDebugConfig modelDebugConfig; private final ModelDebugConfig modelDebugConfig;
private final IgnoreDowntime ignoreDowntime; private final IgnoreDowntime ignoreDowntime;
private final Long renormalizationWindowDays; private final Long renormalizationWindowDays;
@ -143,7 +137,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
public Job(String jobId, String description, Date createTime, Date finishedTime, Date lastDataTime, long timeout, public Job(String jobId, String description, Date createTime, Date finishedTime, Date lastDataTime, long timeout,
AnalysisConfig analysisConfig, AnalysisLimits analysisLimits, DataDescription dataDescription, AnalysisConfig analysisConfig, AnalysisLimits analysisLimits, DataDescription dataDescription,
List<TransformConfig> transforms, ModelDebugConfig modelDebugConfig, IgnoreDowntime ignoreDowntime, ModelDebugConfig modelDebugConfig, IgnoreDowntime ignoreDowntime,
Long renormalizationWindowDays, Long backgroundPersistInterval, Long modelSnapshotRetentionDays, Long resultsRetentionDays, Long renormalizationWindowDays, Long backgroundPersistInterval, Long modelSnapshotRetentionDays, Long resultsRetentionDays,
Map<String, Object> customSettings, String modelSnapshotId, String indexName) { Map<String, Object> customSettings, String modelSnapshotId, String indexName) {
this.jobId = jobId; this.jobId = jobId;
@ -155,7 +149,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
this.analysisConfig = analysisConfig; this.analysisConfig = analysisConfig;
this.analysisLimits = analysisLimits; this.analysisLimits = analysisLimits;
this.dataDescription = dataDescription; this.dataDescription = dataDescription;
this.transforms = transforms;
this.modelDebugConfig = modelDebugConfig; this.modelDebugConfig = modelDebugConfig;
this.ignoreDowntime = ignoreDowntime; this.ignoreDowntime = ignoreDowntime;
this.renormalizationWindowDays = renormalizationWindowDays; this.renormalizationWindowDays = renormalizationWindowDays;
@ -177,7 +170,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
analysisConfig = new AnalysisConfig(in); analysisConfig = new AnalysisConfig(in);
analysisLimits = in.readOptionalWriteable(AnalysisLimits::new); analysisLimits = in.readOptionalWriteable(AnalysisLimits::new);
dataDescription = in.readOptionalWriteable(DataDescription::new); dataDescription = in.readOptionalWriteable(DataDescription::new);
transforms = in.readList(TransformConfig::new);
modelDebugConfig = in.readOptionalWriteable(ModelDebugConfig::new); modelDebugConfig = in.readOptionalWriteable(ModelDebugConfig::new);
ignoreDowntime = in.readOptionalWriteable(IgnoreDowntime::fromStream); ignoreDowntime = in.readOptionalWriteable(IgnoreDowntime::fromStream);
renormalizationWindowDays = in.readOptionalLong(); renormalizationWindowDays = in.readOptionalLong();
@ -302,10 +294,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
return dataDescription; return dataDescription;
} }
public List<TransformConfig> getTransforms() {
return transforms;
}
/** /**
* The duration of the renormalization window in days * The duration of the renormalization window in days
* *
@ -342,7 +330,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
/** /**
* Get a list of all input data fields mentioned in the job configuration, * Get a list of all input data fields mentioned in the job configuration,
* namely analysis fields, time field and transform input fields. * namely analysis fields and the time field.
* *
* @return the list of fields - never <code>null</code> * @return the list of fields - never <code>null</code>
*/ */
@ -354,16 +342,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
allFields.addAll(analysisConfig.analysisFields()); allFields.addAll(analysisConfig.analysisFields());
} }
// transform input fields
if (transforms != null) {
for (TransformConfig tc : transforms) {
List<String> inputFields = tc.getInputs();
if (inputFields != null) {
allFields.addAll(inputFields);
}
}
}
// time field // time field
if (dataDescription != null) { if (dataDescription != null) {
String timeField = dataDescription.getTimeField(); String timeField = dataDescription.getTimeField();
@ -399,7 +377,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
analysisConfig.writeTo(out); analysisConfig.writeTo(out);
out.writeOptionalWriteable(analysisLimits); out.writeOptionalWriteable(analysisLimits);
out.writeOptionalWriteable(dataDescription); out.writeOptionalWriteable(dataDescription);
out.writeList(transforms);
out.writeOptionalWriteable(modelDebugConfig); out.writeOptionalWriteable(modelDebugConfig);
out.writeOptionalWriteable(ignoreDowntime); out.writeOptionalWriteable(ignoreDowntime);
out.writeOptionalLong(renormalizationWindowDays); out.writeOptionalLong(renormalizationWindowDays);
@ -439,9 +416,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
if (dataDescription != null) { if (dataDescription != null) {
builder.field(DATA_DESCRIPTION.getPreferredName(), dataDescription, params); builder.field(DATA_DESCRIPTION.getPreferredName(), dataDescription, params);
} }
if (transforms != null) {
builder.field(TRANSFORMS.getPreferredName(), transforms);
}
if (modelDebugConfig != null) { if (modelDebugConfig != null) {
builder.field(MODEL_DEBUG_CONFIG.getPreferredName(), modelDebugConfig, params); builder.field(MODEL_DEBUG_CONFIG.getPreferredName(), modelDebugConfig, params);
} }
@ -488,7 +462,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
&& (this.timeout == that.timeout) && (this.timeout == that.timeout)
&& Objects.equals(this.analysisConfig, that.analysisConfig) && Objects.equals(this.analysisConfig, that.analysisConfig)
&& Objects.equals(this.analysisLimits, that.analysisLimits) && Objects.equals(this.dataDescription, that.dataDescription) && Objects.equals(this.analysisLimits, that.analysisLimits) && Objects.equals(this.dataDescription, that.dataDescription)
&& Objects.equals(this.modelDebugConfig, that.modelDebugConfig) && Objects.equals(this.transforms, that.transforms) && Objects.equals(this.modelDebugConfig, that.modelDebugConfig)
&& Objects.equals(this.ignoreDowntime, that.ignoreDowntime) && Objects.equals(this.ignoreDowntime, that.ignoreDowntime)
&& Objects.equals(this.renormalizationWindowDays, that.renormalizationWindowDays) && Objects.equals(this.renormalizationWindowDays, that.renormalizationWindowDays)
&& Objects.equals(this.backgroundPersistInterval, that.backgroundPersistInterval) && Objects.equals(this.backgroundPersistInterval, that.backgroundPersistInterval)
@ -502,7 +476,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(jobId, description, createTime, finishedTime, lastDataTime, timeout, analysisConfig, return Objects.hash(jobId, description, createTime, finishedTime, lastDataTime, timeout, analysisConfig,
analysisLimits, dataDescription, modelDebugConfig, transforms, renormalizationWindowDays, analysisLimits, dataDescription, modelDebugConfig, renormalizationWindowDays,
backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, ignoreDowntime, customSettings, backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, ignoreDowntime, customSettings,
modelSnapshotId, indexName); modelSnapshotId, indexName);
} }
@ -533,7 +507,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
private AnalysisConfig analysisConfig; private AnalysisConfig analysisConfig;
private AnalysisLimits analysisLimits; private AnalysisLimits analysisLimits;
private List<TransformConfig> transforms = new ArrayList<>();
private DataDescription dataDescription; private DataDescription dataDescription;
private Date createTime; private Date createTime;
private Date finishedTime; private Date finishedTime;
@ -560,7 +533,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
this.id = job.getId(); this.id = job.getId();
this.description = job.getDescription(); this.description = job.getDescription();
this.analysisConfig = job.getAnalysisConfig(); this.analysisConfig = job.getAnalysisConfig();
this.transforms = job.getTransforms();
this.dataDescription = job.getDataDescription(); this.dataDescription = job.getDataDescription();
this.createTime = job.getCreateTime(); this.createTime = job.getCreateTime();
this.finishedTime = job.getFinishedTime(); this.finishedTime = job.getFinishedTime();
@ -628,10 +600,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
this.lastDataTime = lastDataTime; this.lastDataTime = lastDataTime;
} }
public void setTransforms(List<TransformConfig> transforms) {
this.transforms = transforms;
}
public void setDataDescription(DataDescription.Builder description) { public void setDataDescription(DataDescription.Builder description) {
dataDescription = description.build(); dataDescription = description.build();
} }
@ -677,19 +645,6 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_MISSING_ANALYSISCONFIG)); throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_MISSING_ANALYSISCONFIG));
} }
if (transforms != null && transforms.isEmpty() == false) {
TransformConfigsVerifier.verify(transforms);
checkTransformOutputIsUsed();
} else {
if (dataDescription != null && dataDescription.getFormat() == DataDescription.DataFormat.SINGLE_LINE) {
String msg = Messages.getMessage(
Messages.JOB_CONFIG_DATAFORMAT_REQUIRES_TRANSFORM,
DataDescription.DataFormat.SINGLE_LINE);
throw new IllegalArgumentException(msg);
}
}
checkValueNotLessThan(0, "timeout", timeout); checkValueNotLessThan(0, "timeout", timeout);
checkValueNotLessThan(0, "renormalizationWindowDays", renormalizationWindowDays); checkValueNotLessThan(0, "renormalizationWindowDays", renormalizationWindowDays);
checkValueNotLessThan(MIN_BACKGROUND_PERSIST_INTERVAL, "backgroundPersistInterval", backgroundPersistInterval); checkValueNotLessThan(MIN_BACKGROUND_PERSIST_INTERVAL, "backgroundPersistInterval", backgroundPersistInterval);
@ -732,7 +687,7 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
return new Job( return new Job(
id, description, createTime, finishedTime, lastDataTime, timeout, analysisConfig, analysisLimits, id, description, createTime, finishedTime, lastDataTime, timeout, analysisConfig, analysisLimits,
dataDescription, transforms, modelDebugConfig, ignoreDowntime, renormalizationWindowDays, dataDescription, modelDebugConfig, ignoreDowntime, renormalizationWindowDays,
backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, customSettings, modelSnapshotId, backgroundPersistInterval, modelSnapshotRetentionDays, resultsRetentionDays, customSettings, modelSnapshotId,
indexName indexName
); );
@ -743,41 +698,5 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContent
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW, name, minVal, value)); throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW, name, minVal, value));
} }
} }
/**
* Transform outputs should be used in either the date field,
* as an analysis field or input to another transform
*/
private boolean checkTransformOutputIsUsed() {
Set<String> usedFields = new TransformConfigs(transforms).inputFieldNames();
usedFields.addAll(analysisConfig.analysisFields());
String summaryCountFieldName = analysisConfig.getSummaryCountFieldName();
boolean isSummarised = !Strings.isNullOrEmpty(summaryCountFieldName);
if (isSummarised) {
usedFields.remove(summaryCountFieldName);
}
String timeField = dataDescription == null ? DataDescription.DEFAULT_TIME_FIELD : dataDescription.getTimeField();
usedFields.add(timeField);
for (TransformConfig tc : transforms) {
// if the type has no default outputs it doesn't need an output
boolean usesAnOutput = tc.type().defaultOutputNames().isEmpty()
|| tc.getOutputs().stream().anyMatch(outputName -> usedFields.contains(outputName));
if (isSummarised && tc.getOutputs().contains(summaryCountFieldName)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_DUPLICATED_OUTPUT_NAME, tc.type().prettyName());
throw new IllegalArgumentException(msg);
}
if (!usesAnOutput) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_OUTPUTS_UNUSED,
tc.type().prettyName());
throw new IllegalArgumentException(msg);
}
}
return false;
}
} }
} }

View File

@ -1,105 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import java.util.Objects;
public class IntRange {
public enum BoundType {
OPEN, CLOSED
}
public static class Bound {
private final int value;
private final BoundType boundType;
public Bound(int value, BoundType boundType) {
this.value = value;
this.boundType = Objects.requireNonNull(boundType);
}
}
private static String PLUS_INFINITY = "+\u221E";
private static String MINUS_INFINITY = "-\u221E";
private static char LEFT_BRACKET = '(';
private static char RIGHT_BRACKET = ')';
private static char LEFT_SQUARE_BRACKET = '[';
private static char RIGHT_SQUARE_BRACKET = ']';
private static char BOUNDS_SEPARATOR = '\u2025';
private final Bound lower;
private final Bound upper;
private IntRange(Bound lower, Bound upper) {
this.lower = Objects.requireNonNull(lower);
this.upper = Objects.requireNonNull(upper);
}
public boolean contains(int value) {
int lowerIncludedValue = lower.boundType == BoundType.CLOSED ? lower.value : lower.value + 1;
int upperIncludedValue = upper.boundType == BoundType.CLOSED ? upper.value : upper.value - 1;
return value >= lowerIncludedValue && value <= upperIncludedValue;
}
public boolean hasLowerBound() {
return lower.value != Integer.MIN_VALUE;
}
public boolean hasUpperBound() {
return upper.value != Integer.MAX_VALUE;
}
public int lower() {
return lower.value;
}
public int upper() {
return upper.value;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append(hasLowerBound() && lower.boundType == BoundType.CLOSED ? LEFT_SQUARE_BRACKET : LEFT_BRACKET);
builder.append(hasLowerBound() ? lower.value : MINUS_INFINITY);
builder.append(BOUNDS_SEPARATOR);
builder.append(hasUpperBound() ? upper.value : PLUS_INFINITY);
builder.append(hasUpperBound() && upper.boundType == BoundType.CLOSED ? RIGHT_SQUARE_BRACKET : RIGHT_BRACKET);
return builder.toString();
}
public static IntRange singleton(int value) {
return closed(value, value);
}
public static IntRange closed(int lower, int upper) {
return new IntRange(closedBound(lower), closedBound(upper));
}
public static IntRange open(int lower, int upper) {
return new IntRange(openBound(lower), openBound(upper));
}
public static IntRange openClosed(int lower, int upper) {
return new IntRange(openBound(lower), closedBound(upper));
}
public static IntRange closedOpen(int lower, int upper) {
return new IntRange(closedBound(lower), openBound(upper));
}
public static IntRange atLeast(int lower) {
return closed(lower, Integer.MAX_VALUE);
}
private static Bound openBound(int value) {
return new Bound(value, BoundType.OPEN);
}
private static Bound closedBound(int value) {
return new Bound(value, BoundType.CLOSED);
}
}

View File

@ -1,190 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.xpack.ml.job.config.Condition;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
/**
* Represents an API data transform
*/
// NORELEASE: to be replaced by ingest (https://github.com/elastic/prelert-legacy/issues/39)
public class TransformConfig extends ToXContentToBytes implements Writeable {
// Serialisation strings
public static final ParseField TYPE = new ParseField("transform");
public static final ParseField TRANSFORM = new ParseField("transform");
public static final ParseField CONDITION = new ParseField("condition");
public static final ParseField ARGUMENTS = new ParseField("arguments");
public static final ParseField INPUTS = new ParseField("inputs");
public static final ParseField OUTPUTS = new ParseField("outputs");
public static final ConstructingObjectParser<TransformConfig, Void> PARSER = new ConstructingObjectParser<>(
TYPE.getPreferredName(), objects -> new TransformConfig((String) objects[0]));
static {
PARSER.declareString(ConstructingObjectParser.constructorArg(), TYPE);
PARSER.declareStringArray(TransformConfig::setInputs, INPUTS);
PARSER.declareStringArray(TransformConfig::setArguments, ARGUMENTS);
PARSER.declareStringArray(TransformConfig::setOutputs, OUTPUTS);
PARSER.declareObject(TransformConfig::setCondition, Condition.PARSER, CONDITION);
}
private List<String> inputs;
private String type;
private List<String> arguments;
private List<String> outputs;
private Condition condition;
// lazily initialized:
private transient TransformType lazyType;
public TransformConfig(String type) {
this.type = type;
lazyType = TransformType.fromString(type);
try {
outputs = lazyType.defaultOutputNames();
} catch (IllegalArgumentException e) {
outputs = Collections.emptyList();
}
arguments = Collections.emptyList();
}
@SuppressWarnings("unchecked")
public TransformConfig(StreamInput in) throws IOException {
this(in.readString());
inputs = (List<String>) in.readGenericValue();
arguments = (List<String>) in.readGenericValue();
outputs = (List<String>) in.readGenericValue();
if (in.readBoolean()) {
condition = new Condition(in);
}
}
public List<String> getInputs() {
return inputs;
}
public void setInputs(List<String> fields) {
inputs = fields;
}
/**
* Transform type see {@linkplain TransformType.Names}
*/
public String getTransform() {
return type;
}
public List<String> getArguments() {
return arguments;
}
public void setArguments(List<String> args) {
arguments = args;
}
public List<String> getOutputs() {
return outputs;
}
public void setOutputs(List<String> outputs) {
this.outputs = outputs;
}
/**
* The condition object which may or may not be defined for this
* transform
*
* @return May be <code>null</code>
*/
public Condition getCondition() {
return condition;
}
public void setCondition(Condition condition) {
this.condition = condition;
}
/**
* This field shouldn't be serialised as its created dynamically
* Type may be null when the class is constructed.
*/
public TransformType type() {
return lazyType;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(type);
out.writeGenericValue(inputs);
out.writeGenericValue(arguments);
out.writeGenericValue(outputs);
if (condition != null) {
out.writeBoolean(true);
condition.writeTo(out);
} else {
out.writeBoolean(false);
}
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(TYPE.getPreferredName(), type);
if (inputs != null) {
builder.field(INPUTS.getPreferredName(), inputs);
}
if (arguments != null) {
builder.field(ARGUMENTS.getPreferredName(), arguments);
}
if (outputs != null) {
builder.field(OUTPUTS.getPreferredName(), outputs);
}
if (condition != null) {
builder.field(CONDITION.getPreferredName(), condition);
}
builder.endObject();
return builder;
}
@Override
public int hashCode() {
return Objects.hash(inputs, type, outputs, arguments, condition);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
TransformConfig other = (TransformConfig) obj;
return Objects.equals(this.type, other.type)
&& Objects.equals(this.inputs, other.inputs)
&& Objects.equals(this.outputs, other.outputs)
&& Objects.equals(this.arguments, other.arguments)
&& Objects.equals(this.condition, other.condition);
}
}

View File

@ -1,107 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
/**
* Utility class for methods involving arrays of transforms
*/
public class TransformConfigs extends ToXContentToBytes implements Writeable {
public static final ParseField TRANSFORMS = new ParseField("transforms");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<TransformConfigs, Void> PARSER = new ConstructingObjectParser<>(
TRANSFORMS.getPreferredName(), a -> new TransformConfigs((List<TransformConfig>) a[0]));
static {
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), TransformConfig.PARSER, TRANSFORMS);
}
private List<TransformConfig> transforms;
public TransformConfigs(List<TransformConfig> transforms) {
this.transforms = Objects.requireNonNull(transforms);
}
public TransformConfigs(StreamInput in) throws IOException {
transforms = in.readList(TransformConfig::new);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeList(transforms);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(TRANSFORMS.getPreferredName(), transforms);
builder.endObject();
return builder;
}
public List<TransformConfig> getTransforms() {
return transforms;
}
/**
* Set of all the field names that are required as inputs to transforms
*/
public Set<String> inputFieldNames() {
Set<String> fields = new HashSet<>();
for (TransformConfig t : transforms) {
fields.addAll(t.getInputs());
}
return fields;
}
/**
* Set of all the field names that are outputted (i.e. created) by
* transforms
*/
public Set<String> outputFieldNames() {
Set<String> fields = new HashSet<>();
for (TransformConfig t : transforms) {
fields.addAll(t.getOutputs());
}
return fields;
}
@Override
public int hashCode() {
return Objects.hash(transforms);
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
TransformConfigs other = (TransformConfigs) obj;
return Objects.equals(transforms, other.transforms);
}
}

View File

@ -1,156 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
/**
* Enum type representing the different transform functions
* with functions for converting between the enum and its
* pretty name i.e. human readable string.
*/
public enum TransformType implements ToXContent, Writeable {
// Name, arity, arguments, outputs, default output names, has condition
DOMAIN_SPLIT(Names.DOMAIN_SPLIT_NAME, IntRange.singleton(1), IntRange.singleton(0),
IntRange.closed(1, 2), Arrays.asList("subDomain", "hrd")),
CONCAT(Names.CONCAT_NAME, IntRange.atLeast(2), IntRange.closed(0, 1), IntRange.singleton(1),
Arrays.asList("concat")),
REGEX_EXTRACT(Names.EXTRACT_NAME, IntRange.singleton(1), IntRange.singleton(1), IntRange.atLeast(1),
Arrays.asList("extract"), false),
REGEX_SPLIT(Names.SPLIT_NAME, IntRange.singleton(1), IntRange.singleton(1), IntRange.atLeast(1),
Arrays.asList("split"), false),
EXCLUDE(Names.EXCLUDE_NAME, IntRange.atLeast(1), IntRange.singleton(0), IntRange.singleton(0),
Arrays.asList(), true),
LOWERCASE(Names.LOWERCASE_NAME, IntRange.singleton(1), IntRange.singleton(0), IntRange.singleton(1),
Arrays.asList("lowercase")),
UPPERCASE(Names.UPPERCASE_NAME, IntRange.singleton(1), IntRange.singleton(0), IntRange.singleton(1),
Arrays.asList("uppercase")),
TRIM(Names.TRIM_NAME, IntRange.singleton(1), IntRange.singleton(0), IntRange.singleton(1),
Arrays.asList("trim"));
/**
* Transform names.
*
* Enums cannot use static fields in their constructors as the
* enum values are initialised before the statics.
* Having the static fields in nested class means they are created
* when required.
*/
public class Names {
public static final String DOMAIN_SPLIT_NAME = "domain_split";
public static final String CONCAT_NAME = "concat";
public static final String EXTRACT_NAME = "extract";
public static final String SPLIT_NAME = "split";
public static final String EXCLUDE_NAME = "exclude";
public static final String LOWERCASE_NAME = "lowercase";
public static final String UPPERCASE_NAME = "uppercase";
public static final String TRIM_NAME = "trim";
private Names() {
}
}
private final IntRange arityRange;
private final IntRange argumentsRange;
private final IntRange outputsRange;
private final String prettyName;
private final List<String> defaultOutputNames;
private final boolean hasCondition;
TransformType(String prettyName, IntRange arityIntRange,
IntRange argumentsIntRange, IntRange outputsIntRange,
List<String> defaultOutputNames) {
this(prettyName, arityIntRange, argumentsIntRange, outputsIntRange, defaultOutputNames, false);
}
TransformType(String prettyName, IntRange arityIntRange,
IntRange argumentsIntRange, IntRange outputsIntRange,
List<String> defaultOutputNames, boolean hasCondition) {
this.arityRange = arityIntRange;
this.argumentsRange = argumentsIntRange;
this.outputsRange = outputsIntRange;
this.prettyName = prettyName;
this.defaultOutputNames = defaultOutputNames;
this.hasCondition = hasCondition;
}
/**
* The count IntRange of inputs the transform expects.
*/
public IntRange arityRange() {
return this.arityRange;
}
/**
* The count IntRange of arguments the transform expects.
*/
public IntRange argumentsRange() {
return this.argumentsRange;
}
/**
* The count IntRange of outputs the transform expects.
*/
public IntRange outputsRange() {
return this.outputsRange;
}
public String prettyName() {
return prettyName;
}
public List<String> defaultOutputNames() {
return defaultOutputNames;
}
public boolean hasCondition() {
return hasCondition;
}
@Override
public String toString() {
return prettyName();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(ordinal());
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.value(prettyName);
return builder;
}
/**
* Get the enum for the given pretty name.
* The static function valueOf() cannot be overridden so use
* this method instead when converting from the pretty name
* to enum.
*/
public static TransformType fromString(String prettyName) throws IllegalArgumentException {
Set<TransformType> all = EnumSet.allOf(TransformType.class);
for (TransformType type : all) {
if (type.prettyName().equals(prettyName)) {
return type;
}
}
throw new IllegalArgumentException("Unknown [transformType]: [" + prettyName + "]");
}
}

View File

@ -1,15 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
@FunctionalInterface
public interface ArgumentVerifier {
void verify(String argument, TransformConfig tc) throws ElasticsearchParseException;
}

View File

@ -1,30 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import java.util.List;
import java.util.regex.Pattern;
public class RegexExtractVerifier implements ArgumentVerifier {
@Override
public void verify(String arg, TransformConfig tc) {
new RegexPatternVerifier().verify(arg, tc);
Pattern pattern = Pattern.compile(arg);
int groupCount = pattern.matcher("").groupCount();
List<String> outputs = tc.getOutputs();
int outputCount = outputs == null ? 0 : outputs.size();
if (groupCount != outputCount) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_EXTRACT_GROUPS_SHOULD_MATCH_OUTPUT_COUNT,
tc.getTransform(), outputCount, arg, groupCount);
throw new IllegalArgumentException(msg);
}
}
}

View File

@ -1,25 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class RegexPatternVerifier implements ArgumentVerifier {
@Override
public void verify(String arg, TransformConfig tc) throws ElasticsearchParseException {
try {
Pattern.compile(arg);
} catch (PatternSyntaxException e) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INVALID_ARGUMENT, tc.getTransform(), arg);
throw new IllegalArgumentException(msg);
}
}
}

View File

@ -1,149 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.IntRange;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import java.util.List;
public final class TransformConfigVerifier {
private TransformConfigVerifier() {
// Hide default constructor
}
/**
* Checks the transform configuration is valid
* <ol>
* <li>Checks there are the correct number of inputs for a given transform
* type and that those inputs are not empty strings</li>
* <li>Check the number of arguments is correct for the transform type and
* verify the argument (i.e. is is a valid regex)</li>
* <li>Check there is a valid number of ouputs for the transform type and
* those outputs are not empty strings</li>
* <li>If the transform has a condition verify it</li>
* </ol>
*/
public static boolean verify(TransformConfig tc) throws ElasticsearchParseException {
TransformType type;
try {
type = tc.type();
} catch (IllegalArgumentException e) {
throw new ElasticsearchParseException(Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_UNKNOWN_TYPE, tc.getTransform()));
}
checkCondition(tc, type);
checkInputs(tc, type);
checkArguments(tc, type);
checkOutputs(tc, type);
return true;
}
private static void checkCondition(TransformConfig tc, TransformType type) {
if (type.hasCondition()) {
if (tc.getCondition() == null) {
throw new IllegalArgumentException(
Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_CONDITION_REQUIRED, type.prettyName()));
}
}
}
private static void checkInputs(TransformConfig tc, TransformType type) {
List<String> inputs = tc.getInputs();
checkValidInputCount(tc, type, inputs);
checkInputsAreNonEmptyStrings(tc, inputs);
}
private static void checkValidInputCount(TransformConfig tc, TransformType type, List<String> inputs) {
int inputsSize = (inputs == null) ? 0 : inputs.size();
if (!type.arityRange().contains(inputsSize)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INVALID_INPUT_COUNT,
tc.getTransform(), rangeAsString(type.arityRange()), inputsSize);
throw new IllegalArgumentException(msg);
}
}
private static void checkInputsAreNonEmptyStrings(TransformConfig tc, List<String> inputs) {
if (containsEmptyString(inputs)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INPUTS_CONTAIN_EMPTY_STRING, tc.getTransform());
throw new IllegalArgumentException(msg);
}
}
private static boolean containsEmptyString(List<String> strings) {
return strings.stream().anyMatch(s -> s.trim().isEmpty());
}
private static void checkArguments(TransformConfig tc, TransformType type) {
checkArgumentsCountValid(tc, type);
checkArgumentsValid(tc, type);
}
private static void checkArgumentsCountValid(TransformConfig tc, TransformType type) {
List<String> arguments = tc.getArguments();
int argumentsSize = (arguments == null) ? 0 : arguments.size();
if (!type.argumentsRange().contains(argumentsSize)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INVALID_ARGUMENT_COUNT,
tc.getTransform(), rangeAsString(type.argumentsRange()), argumentsSize);
throw new IllegalArgumentException(msg);
}
}
private static void checkArgumentsValid(TransformConfig tc, TransformType type) {
if (tc.getArguments() != null) {
ArgumentVerifier av = argumentVerifierForType(type);
for (String argument : tc.getArguments()) {
av.verify(argument, tc);
}
}
}
private static ArgumentVerifier argumentVerifierForType(TransformType type) {
switch (type) {
case REGEX_EXTRACT:
return new RegexExtractVerifier();
case REGEX_SPLIT:
return new RegexPatternVerifier();
default:
return (argument, config) -> {};
}
}
private static void checkOutputs(TransformConfig tc, TransformType type) {
List<String> outputs = tc.getOutputs();
checkValidOutputCount(tc, type, outputs);
checkOutputsAreNonEmptyStrings(tc, outputs);
}
private static void checkValidOutputCount(TransformConfig tc, TransformType type, List<String> outputs) {
int outputsSize = (outputs == null) ? 0 : outputs.size();
if (!type.outputsRange().contains(outputsSize)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_INVALID_OUTPUT_COUNT,
tc.getTransform(), rangeAsString(type.outputsRange()), outputsSize);
throw new IllegalArgumentException(msg);
}
}
private static void checkOutputsAreNonEmptyStrings(TransformConfig tc, List<String> outputs) {
if (containsEmptyString(outputs)) {
String msg = Messages.getMessage(
Messages.JOB_CONFIG_TRANSFORM_OUTPUTS_CONTAIN_EMPTY_STRING, tc.getTransform());
throw new IllegalArgumentException(msg);
}
}
private static String rangeAsString(IntRange range) {
if (range.hasLowerBound() && range.hasUpperBound() && range.lower() == range.upper()) {
return String.valueOf(range.lower());
}
return range.toString();
}
}

View File

@ -1,120 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform.verification;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class TransformConfigsVerifier {
private TransformConfigsVerifier() {
}
/**
* Checks the transform configurations are valid
* <ol>
* <li>Call {@linkplain TransformConfigVerifier#verify(TransformConfig)} ()} on each transform</li>
* <li>Check all the transform output field names are unique</li>
* <li>Check there are no circular dependencies in the transforms</li>
* </ol>
*/
public static boolean verify(List<TransformConfig> transforms) throws ElasticsearchParseException {
for (TransformConfig tr : transforms) {
TransformConfigVerifier.verify(tr);
}
String duplicatedName = outputNamesAreUnique(transforms);
if (duplicatedName != null) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_OUTPUT_NAME_USED_MORE_THAN_ONCE, duplicatedName);
throw new IllegalArgumentException(msg);
}
// Check for circular dependencies
int index = checkForCircularDependencies(transforms);
if (index >= 0) {
TransformConfig tc = transforms.get(index);
String msg = Messages.getMessage(Messages.JOB_CONFIG_TRANSFORM_CIRCULAR_DEPENDENCY, tc.type(), tc.getInputs());
throw new IllegalArgumentException(msg);
}
return true;
}
/**
* return null if all transform ouput names are
* unique or the first duplicate name if there are
* duplications
*/
private static String outputNamesAreUnique(List<TransformConfig> transforms) {
Set<String> fields = new HashSet<>();
for (TransformConfig t : transforms) {
for (String output : t.getOutputs()) {
if (fields.contains(output)) {
return output;
}
fields.add(output);
}
}
return null;
}
/**
* Find circular dependencies in the list of transforms.
* This might be because a transform's input is its output
* or because of a transitive dependency.
*
* If there is a circular dependency the index of the transform
* in the <code>transforms</code> list at the start of the chain
* is returned else -1
*
* @return -1 if no circular dependencies else the index of the
* transform at the start of the circular chain
*/
public static int checkForCircularDependencies(List<TransformConfig> transforms) {
for (int i=0; i<transforms.size(); i++) {
Set<Integer> chain = new HashSet<Integer>();
chain.add(new Integer(i));
TransformConfig tc = transforms.get(i);
if (checkCircularDependenciesRecursive(tc, transforms, chain) == false) {
return i;
}
}
return -1;
}
private static boolean checkCircularDependenciesRecursive(TransformConfig transform, List<TransformConfig> transforms,
Set<Integer> chain) {
boolean result = true;
for (int i=0; i<transforms.size(); i++) {
TransformConfig tc = transforms.get(i);
for (String input : transform.getInputs()) {
if (tc.getOutputs().contains(input)) {
Integer index = new Integer(i);
if (chain.contains(index)) {
return false;
}
chain.add(index);
result = result && checkCircularDependenciesRecursive(tc, transforms, chain);
}
}
}
return result;
}
}

View File

@ -76,7 +76,6 @@ public final class Messages {
public static final String JOB_CONFIG_CONDITION_INVALID_VALUE_NUMBER = "job.config.condition.invalid.value.numeric"; public static final String JOB_CONFIG_CONDITION_INVALID_VALUE_NUMBER = "job.config.condition.invalid.value.numeric";
public static final String JOB_CONFIG_CONDITION_INVALID_VALUE_REGEX = "job.config.condition.invalid.value.regex"; public static final String JOB_CONFIG_CONDITION_INVALID_VALUE_REGEX = "job.config.condition.invalid.value.regex";
public static final String JOB_CONFIG_CONDITION_UNKNOWN_OPERATOR = "job.config.condition.unknown.operator"; public static final String JOB_CONFIG_CONDITION_UNKNOWN_OPERATOR = "job.config.condition.unknown.operator";
public static final String JOB_CONFIG_DATAFORMAT_REQUIRES_TRANSFORM = "job.config.dataformat.requires.transform";
public static final String JOB_CONFIG_DETECTION_RULE_CONDITION_CATEGORICAL_INVALID_OPTION = "job.config.detectionrule.condition." public static final String JOB_CONFIG_DETECTION_RULE_CONDITION_CATEGORICAL_INVALID_OPTION = "job.config.detectionrule.condition."
+ "categorical.invalid.option"; + "categorical.invalid.option";
public static final String JOB_CONFIG_DETECTION_RULE_CONDITION_CATEGORICAL_MISSING_OPTION = "job.config.detectionrule.condition." public static final String JOB_CONFIG_DETECTION_RULE_CONDITION_CATEGORICAL_MISSING_OPTION = "job.config.detectionrule.condition."
@ -158,21 +157,6 @@ public final class Messages {
public static final String JOB_CONFIG_UPDATE_DATAFEED_CONFIG_PARSE_ERROR = "job.config.update.datafeed.config.parse.error"; public static final String JOB_CONFIG_UPDATE_DATAFEED_CONFIG_PARSE_ERROR = "job.config.update.datafeed.config.parse.error";
public static final String JOB_CONFIG_UPDATE_DATAFEED_CONFIG_CANNOT_BE_NULL = "job.config.update.datafeed.config.cannot.be.null"; public static final String JOB_CONFIG_UPDATE_DATAFEED_CONFIG_CANNOT_BE_NULL = "job.config.update.datafeed.config.cannot.be.null";
public static final String JOB_CONFIG_TRANSFORM_CIRCULAR_DEPENDENCY = "job.config.transform.circular.dependency";
public static final String JOB_CONFIG_TRANSFORM_CONDITION_REQUIRED = "job.config.transform.condition.required";
public static final String JOB_CONFIG_TRANSFORM_DUPLICATED_OUTPUT_NAME = "job.config.transform.duplicated.output.name";
public static final String JOB_CONFIG_TRANSFORM_EXTRACT_GROUPS_SHOULD_MATCH_OUTPUT_COUNT = "job.config.transform.extract.groups.should."
+ "match.output.count";
public static final String JOB_CONFIG_TRANSFORM_INPUTS_CONTAIN_EMPTY_STRING = "job.config.transform.inputs.contain.empty.string";
public static final String JOB_CONFIG_TRANSFORM_INVALID_ARGUMENT = "job.config.transform.invalid.argument";
public static final String JOB_CONFIG_TRANSFORM_INVALID_ARGUMENT_COUNT = "job.config.transform.invalid.argument.count";
public static final String JOB_CONFIG_TRANSFORM_INVALID_INPUT_COUNT = "job.config.transform.invalid.input.count";
public static final String JOB_CONFIG_TRANSFORM_INVALID_OUTPUT_COUNT = "job.config.transform.invalid.output.count";
public static final String JOB_CONFIG_TRANSFORM_OUTPUTS_CONTAIN_EMPTY_STRING = "job.config.transform.outputs.contain.empty.string";
public static final String JOB_CONFIG_TRANSFORM_OUTPUTS_UNUSED = "job.config.transform.outputs.unused";
public static final String JOB_CONFIG_TRANSFORM_OUTPUT_NAME_USED_MORE_THAN_ONCE = "job.config.transform.output.name.used.more.than"
+ ".once";
public static final String JOB_CONFIG_TRANSFORM_UNKNOWN_TYPE = "job.config.transform.unknown.type";
public static final String JOB_CONFIG_UNKNOWN_FUNCTION = "job.config.unknown.function"; public static final String JOB_CONFIG_UNKNOWN_FUNCTION = "job.config.unknown.function";
public static final String JOB_INDEX_ALREADY_EXISTS = "job.index.already.exists"; public static final String JOB_INDEX_ALREADY_EXISTS = "job.index.already.exists";
@ -207,9 +191,6 @@ public final class Messages {
public static final String JSON_DETECTOR_CONFIG_MAPPING = "json.detector.config.mapping.error"; public static final String JSON_DETECTOR_CONFIG_MAPPING = "json.detector.config.mapping.error";
public static final String JSON_DETECTOR_CONFIG_PARSE = "json.detector.config.parse.error"; public static final String JSON_DETECTOR_CONFIG_PARSE = "json.detector.config.parse.error";
public static final String JSON_TRANSFORM_CONFIG_MAPPING = "json.transform.config.mapping.error";
public static final String JSON_TRANSFORM_CONFIG_PARSE = "json.transform.config.parse.error";
public static final String REST_ACTION_NOT_ALLOWED_FOR_DATAFEED_JOB = "rest.action.not.allowed.for.datafeed.job"; public static final String REST_ACTION_NOT_ALLOWED_FOR_DATAFEED_JOB = "rest.action.not.allowed.for.datafeed.job";
public static final String REST_INVALID_DATETIME_PARAMS = "rest.invalid.datetime.params"; public static final String REST_INVALID_DATETIME_PARAMS = "rest.invalid.datetime.params";

View File

@ -24,7 +24,6 @@ import org.elasticsearch.xpack.ml.job.process.autodetect.params.InterimResultsPa
import org.elasticsearch.xpack.ml.job.process.autodetect.writer.DataToProcessWriter; import org.elasticsearch.xpack.ml.job.process.autodetect.writer.DataToProcessWriter;
import org.elasticsearch.xpack.ml.job.process.autodetect.writer.DataToProcessWriterFactory; import org.elasticsearch.xpack.ml.job.process.autodetect.writer.DataToProcessWriterFactory;
import org.elasticsearch.xpack.ml.job.process.CountingInputStream; import org.elasticsearch.xpack.ml.job.process.CountingInputStream;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.ml.utils.ExceptionsHelper;
import java.io.Closeable; import java.io.Closeable;
@ -77,7 +76,7 @@ public class AutodetectCommunicator implements Closeable {
private DataToProcessWriter createProcessWriter(Optional<DataDescription> dataDescription) { private DataToProcessWriter createProcessWriter(Optional<DataDescription> dataDescription) {
return DataToProcessWriterFactory.create(true, autodetectProcess, dataDescription.orElse(job.getDataDescription()), return DataToProcessWriterFactory.create(true, autodetectProcess, dataDescription.orElse(job.getDataDescription()),
job.getAnalysisConfig(), new TransformConfigs(job.getTransforms()) , dataCountsReporter, LOGGER); job.getAnalysisConfig(), dataCountsReporter);
} }
public DataCounts writeToJob(InputStream inputStream, DataLoadParams params) throws IOException { public DataCounts writeToJob(InputStream inputStream, DataLoadParams params) throws IOException {

View File

@ -10,17 +10,6 @@ import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription; import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess; import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.transforms.DependencySorter;
import org.elasticsearch.xpack.ml.transforms.Transform;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
import org.elasticsearch.xpack.ml.transforms.TransformException;
import org.elasticsearch.xpack.ml.transforms.TransformFactory;
import org.elasticsearch.xpack.ml.transforms.date.DateFormatTransform;
import org.elasticsearch.xpack.ml.transforms.date.DateTransform;
import org.elasticsearch.xpack.ml.transforms.date.DoubleDateTransform;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@ -39,46 +28,36 @@ import java.util.Set;
public abstract class AbstractDataToProcessWriter implements DataToProcessWriter { public abstract class AbstractDataToProcessWriter implements DataToProcessWriter {
protected static final int TIME_FIELD_OUT_INDEX = 0; private static final int TIME_FIELD_OUT_INDEX = 0;
private static final int MS_IN_SECOND = 1000; private static final long MS_IN_SECOND = 1000;
protected final boolean includeControlField; private final boolean includeControlField;
protected final AutodetectProcess autodetectProcess; protected final AutodetectProcess autodetectProcess;
protected final DataDescription dataDescription; protected final DataDescription dataDescription;
protected final AnalysisConfig analysisConfig; protected final AnalysisConfig analysisConfig;
protected final DataCountsReporter dataCountsReporter; protected final DataCountsReporter dataCountsReporter;
protected final Logger logger;
protected final TransformConfigs transformConfigs;
protected List<Transform> dateInputTransforms; private final Logger logger;
protected DateTransform dateTransform; private final DateTransformer dateTransformer;
protected List<Transform> postDateTransforms;
protected Map<String, Integer> inFieldIndexes; protected Map<String, Integer> inFieldIndexes;
protected List<InputOutputMap> inputOutputMap; protected List<InputOutputMap> inputOutputMap;
private String[] scratchArea;
private String[][] readWriteArea;
// epoch in seconds // epoch in seconds
private long latestEpochMs; private long latestEpochMs;
private long latestEpochMsThisUpload; private long latestEpochMsThisUpload;
protected AbstractDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess, protected AbstractDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess,
DataDescription dataDescription, AnalysisConfig analysisConfig, DataDescription dataDescription, AnalysisConfig analysisConfig,
TransformConfigs transformConfigs, DataCountsReporter dataCountsReporter, Logger logger) { DataCountsReporter dataCountsReporter, Logger logger) {
this.includeControlField = includeControlField; this.includeControlField = includeControlField;
this.autodetectProcess = Objects.requireNonNull(autodetectProcess); this.autodetectProcess = Objects.requireNonNull(autodetectProcess);
this.dataDescription = Objects.requireNonNull(dataDescription); this.dataDescription = Objects.requireNonNull(dataDescription);
this.analysisConfig = Objects.requireNonNull(analysisConfig); this.analysisConfig = Objects.requireNonNull(analysisConfig);
this.dataCountsReporter = Objects.requireNonNull(dataCountsReporter); this.dataCountsReporter = Objects.requireNonNull(dataCountsReporter);
this.logger = Objects.requireNonNull(logger); this.logger = Objects.requireNonNull(logger);
this.transformConfigs = Objects.requireNonNull(transformConfigs);
postDateTransforms = new ArrayList<>();
dateInputTransforms = new ArrayList<>();
Date date = dataCountsReporter.getLatestRecordTime(); Date date = dataCountsReporter.getLatestRecordTime();
latestEpochMsThisUpload = 0; latestEpochMsThisUpload = 0;
latestEpochMs = 0; latestEpochMs = 0;
@ -86,75 +65,39 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
latestEpochMs = date.getTime(); latestEpochMs = date.getTime();
} }
readWriteArea = new String[3][]; boolean isDateFormatString = dataDescription.isTransformTime() && !dataDescription.isEpochMs();
if (isDateFormatString) {
dateTransformer = new DateFormatDateTransformer(dataDescription.getTimeFormat());
} else {
dateTransformer = new DoubleDateTransformer(dataDescription.isEpochMs());
}
} }
/** /**
* Create the transforms. This must be called before * Set up the field index mappings.
* {@linkplain DataToProcessWriter#write(java.io.InputStream)} * This must be called before {@linkplain DataToProcessWriter#write(java.io.InputStream)}.
* even if no transforms are configured as it creates the
* date transform and sets up the field mappings.<br>
* <p> * <p>
* Finds the required input indexes in the <code>header</code> * Finds the required input indexes in the <code>header</code>
* and sets the mappings for the transforms so they know where * and sets the mappings to the corresponding output indexes.
* to read their inputs and write outputs.
* <p>
* Transforms can be chained so some write their outputs to
* a scratch area which is input to another transform
*/ */
public void buildTransforms(String[] header) throws IOException { void buildFieldIndexMapping(String[] header) throws IOException {
Collection<String> inputFields = inputFields(); Collection<String> inputFields = inputFields();
inFieldIndexes = inputFieldIndexes(header, inputFields); inFieldIndexes = inputFieldIndexes(header, inputFields);
checkForMissingFields(inputFields, inFieldIndexes, header); checkForMissingFields(inputFields, inFieldIndexes, header);
Map<String, Integer> outFieldIndexes = outputFieldIndexes();
inputOutputMap = createInputOutputMap(inFieldIndexes); inputOutputMap = createInputOutputMap(inFieldIndexes);
dataCountsReporter.setAnalysedFieldsPerRecord(analysisConfig.analysisFields().size()); dataCountsReporter.setAnalysedFieldsPerRecord(analysisConfig.analysisFields().size());
Map<String, Integer> scratchAreaIndexes = scratchAreaIndexes(inputFields, outputFields(),
dataDescription.getTimeField());
scratchArea = new String[scratchAreaIndexes.size()];
readWriteArea[TransformFactory.SCRATCH_ARRAY_INDEX] = scratchArea;
buildDateTransform(scratchAreaIndexes, outFieldIndexes);
List<TransformConfig> dateInputTransforms = DependencySorter.findDependencies(
dataDescription.getTimeField(), transformConfigs.getTransforms());
TransformFactory transformFactory = new TransformFactory();
for (TransformConfig config : dateInputTransforms) {
Transform tr = transformFactory.create(config, inFieldIndexes, scratchAreaIndexes,
outFieldIndexes, logger);
this.dateInputTransforms.add(tr);
}
// get the transforms that don't input into the date
List<TransformConfig> postDateTransforms = new ArrayList<>();
for (TransformConfig tc : transformConfigs.getTransforms()) {
if (dateInputTransforms.contains(tc) == false) {
postDateTransforms.add(tc);
}
}
postDateTransforms = DependencySorter.sortByDependency(postDateTransforms);
for (TransformConfig config : postDateTransforms) {
Transform tr = transformFactory.create(config, inFieldIndexes, scratchAreaIndexes,
outFieldIndexes, logger);
this.postDateTransforms.add(tr);
}
} }
/** /**
* Write the header. * Write the header.
* The header is created from the list of analysis input fields, * The header is created from the list of analysis input fields, the time field and the control field.
* the time field and the control field
*/ */
@Override @Override
public void writeHeader() throws IOException { public void writeHeader() throws IOException {
Map<String, Integer> outFieldIndexes = outputFieldIndexes(); Map<String, Integer> outFieldIndexes = outputFieldIndexes();
// header is all the analysis input fields + the time field + control field // header is all the analysis input fields + the time field + control field
int numFields = outFieldIndexes.size(); int numFields = outFieldIndexes.size();
String[] record = new String[numFields]; String[] record = new String[numFields];
@ -168,39 +111,6 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
autodetectProcess.writeRecord(record); autodetectProcess.writeRecord(record);
} }
protected void buildDateTransform(Map<String, Integer> scratchAreaIndexes, Map<String, Integer> outFieldIndexes) {
List<TransformIndex> readIndexes = new ArrayList<>();
Integer index = inFieldIndexes.get(dataDescription.getTimeField());
if (index != null) {
readIndexes.add(new TransformIndex(TransformFactory.INPUT_ARRAY_INDEX, index));
} else {
index = outFieldIndexes.get(dataDescription.getTimeField());
if (index != null) {
// date field could also be an output field
readIndexes.add(new TransformIndex(TransformFactory.OUTPUT_ARRAY_INDEX, index));
} else if (scratchAreaIndexes.containsKey(dataDescription.getTimeField())) {
index = scratchAreaIndexes.get(dataDescription.getTimeField());
readIndexes.add(new TransformIndex(TransformFactory.SCRATCH_ARRAY_INDEX, index));
} else {
throw new IllegalStateException(
String.format(Locale.ROOT, "Transform input date field '%s' not found",
dataDescription.getTimeField()));
}
}
List<TransformIndex> writeIndexes = new ArrayList<>();
writeIndexes.add(new TransformIndex(TransformFactory.OUTPUT_ARRAY_INDEX,
outFieldIndexes.get(dataDescription.getTimeField())));
boolean isDateFormatString = dataDescription.isTransformTime() && !dataDescription.isEpochMs();
if (isDateFormatString) {
dateTransform = new DateFormatTransform(dataDescription.getTimeFormat(), readIndexes, writeIndexes, logger);
} else {
dateTransform = new DoubleDateTransform(dataDescription.isEpochMs(), readIndexes, writeIndexes, logger);
}
}
/** /**
* Transform the input data and write to length encoded writer.<br> * Transform the input data and write to length encoded writer.<br>
* <p> * <p>
@ -210,33 +120,21 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
* First all the transforms whose outputs the Date transform relies * First all the transforms whose outputs the Date transform relies
* on are executed then the date transform then the remaining transforms. * on are executed then the date transform then the remaining transforms.
* *
* @param input The record the transforms should read their input from. The contents should * @param record The record that will be written to the length encoded writer after the time has been transformed.
* align with the header parameter passed to {@linkplain #buildTransforms(String[])}
* @param output The record that will be written to the length encoded writer.
* This should be the same size as the number of output (analysis fields) i.e. * This should be the same size as the number of output (analysis fields) i.e.
* the size of the map returned by {@linkplain #outputFieldIndexes()} * the size of the map returned by {@linkplain #outputFieldIndexes()}
* @param numberOfFieldsRead The total number read not just those included in the analysis * @param numberOfFieldsRead The total number read not just those included in the analysis
*/ */
protected boolean applyTransformsAndWrite(String[] input, String[] output, long numberOfFieldsRead) protected boolean transformTimeAndWrite(String[] record, long numberOfFieldsRead) throws IOException {
throws IOException { long epochMs;
readWriteArea[TransformFactory.INPUT_ARRAY_INDEX] = input;
readWriteArea[TransformFactory.OUTPUT_ARRAY_INDEX] = output;
Arrays.fill(readWriteArea[TransformFactory.SCRATCH_ARRAY_INDEX], "");
if (!applyTransforms(dateInputTransforms, numberOfFieldsRead)) {
return false;
}
try { try {
dateTransform.transform(readWriteArea); epochMs = dateTransformer.transform(record[TIME_FIELD_OUT_INDEX]);
} catch (TransformException e) { } catch (CannotParseTimestampException e) {
dataCountsReporter.reportDateParseError(numberOfFieldsRead); dataCountsReporter.reportDateParseError(numberOfFieldsRead);
logger.error(e.getMessage()); logger.error(e.getMessage());
return false; return false;
} }
long epochMs = dateTransform.epochMs();
// Records have epoch seconds timestamp so compare for out of order in seconds // Records have epoch seconds timestamp so compare for out of order in seconds
if (epochMs / MS_IN_SECOND < latestEpochMs / MS_IN_SECOND - analysisConfig.getLatency()) { if (epochMs / MS_IN_SECOND < latestEpochMs / MS_IN_SECOND - analysisConfig.getLatency()) {
// out of order // out of order
@ -250,38 +148,17 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
return false; return false;
} }
// Now do the rest of the transforms record[TIME_FIELD_OUT_INDEX] = Long.toString(epochMs / MS_IN_SECOND);
if (!applyTransforms(postDateTransforms, numberOfFieldsRead)) {
return false;
}
latestEpochMs = Math.max(latestEpochMs, epochMs); latestEpochMs = Math.max(latestEpochMs, epochMs);
latestEpochMsThisUpload = latestEpochMs; latestEpochMsThisUpload = latestEpochMs;
autodetectProcess.writeRecord(output); autodetectProcess.writeRecord(record);
dataCountsReporter.reportRecordWritten(numberOfFieldsRead, latestEpochMs); dataCountsReporter.reportRecordWritten(numberOfFieldsRead, latestEpochMs);
return true; return true;
} }
/**
* If false then the transform is excluded
*/
private boolean applyTransforms(List<Transform> transforms, long inputFieldCount) {
for (Transform tr : transforms) {
try {
TransformResult result = tr.transform(readWriteArea);
if (result == TransformResult.EXCLUDE) {
return false;
}
} catch (TransformException e) {
logger.warn(e);
}
}
return true;
}
@Override @Override
public void flush() throws IOException { public void flush() throws IOException {
autodetectProcess.flushStream(); autodetectProcess.flushStream();
@ -289,16 +166,11 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
/** /**
* Get all the expected input fields i.e. all the fields we * Get all the expected input fields i.e. all the fields we
* must see in the csv header. * must see in the csv header
* = transform input fields + analysis fields that aren't a transform output
* + the date field - the transform output field names
*/ */
public final Collection<String> inputFields() { final Collection<String> inputFields() {
Set<String> requiredFields = new HashSet<>(analysisConfig.analysisFields()); Set<String> requiredFields = new HashSet<>(analysisConfig.analysisFields());
requiredFields.add(dataDescription.getTimeField()); requiredFields.add(dataDescription.getTimeField());
requiredFields.addAll(transformConfigs.inputFieldNames());
requiredFields.removeAll(transformConfigs.outputFieldNames()); // inputs not in a transform
return requiredFields; return requiredFields;
} }
@ -321,21 +193,10 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
return fieldIndexes; return fieldIndexes;
} }
public Map<String, Integer> getInputFieldIndexes() { Map<String, Integer> getInputFieldIndexes() {
return inFieldIndexes; return inFieldIndexes;
} }
/**
* This output fields are the time field and all the fields
* configured for analysis
*/
public final Collection<String> outputFields() {
List<String> outputFields = new ArrayList<>(analysisConfig.analysisFields());
outputFields.add(dataDescription.getTimeField());
return outputFields;
}
/** /**
* Create indexes of the output fields. * Create indexes of the output fields.
* This is the time field and all the fields configured for analysis * This is the time field and all the fields configured for analysis
@ -368,7 +229,7 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
* The number of fields used in the analysis field, * The number of fields used in the analysis field,
* the time field and (sometimes) the control field * the time field and (sometimes) the control field
*/ */
public int outputFieldCount() { protected int outputFieldCount() {
return analysisConfig.analysisFields().size() + (includeControlField ? 2 : 1); return analysisConfig.analysisFields().size() + (includeControlField ? 2 : 1);
} }
@ -376,63 +237,28 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
return outputFieldIndexes(); return outputFieldIndexes();
} }
/** /**
* Find all the scratch area fields. These are those that are input to a * Create a map of input index to output index. This does not include the time or control fields.
* transform but are not written to the output or read from input. i.e. for
* the case where a transforms output is used exclusively by another
* transform
* *
* @param inputFields * @param inFieldIndexes Map of field name to index in the input array
* Fields we expect in the header
* @param outputFields
* Fields that are written to the analytics
* @param dateTimeField date field
*/ */
protected final Map<String, Integer> scratchAreaIndexes(Collection<String> inputFields, Collection<String> outputFields, private List<InputOutputMap> createInputOutputMap(Map<String, Integer> inFieldIndexes) {
String dateTimeField) {
Set<String> requiredFields = new HashSet<>(transformConfigs.outputFieldNames());
boolean dateTimeFieldIsTransformOutput = requiredFields.contains(dateTimeField);
requiredFields.addAll(transformConfigs.inputFieldNames());
requiredFields.removeAll(inputFields);
requiredFields.removeAll(outputFields);
// date time is a output of a transform AND the input to the date time transform
// so add it back into the scratch area
if (dateTimeFieldIsTransformOutput) {
requiredFields.add(dateTimeField);
}
int index = 0;
Map<String, Integer> result = new HashMap<String, Integer>();
for (String field : requiredFields) {
result.put(field, new Integer(index++));
}
return result;
}
/**
* For inputs that aren't transformed create a map of input index
* to output index. This does not include the time or control fields
*
* @param inFieldIndexes Map of field name -&gt; index in the input array
*/
protected final List<InputOutputMap> createInputOutputMap(Map<String, Integer> inFieldIndexes) {
// where no transform
List<InputOutputMap> inputOutputMap = new ArrayList<>(); List<InputOutputMap> inputOutputMap = new ArrayList<>();
int outIndex = TIME_FIELD_OUT_INDEX + 1; int outIndex = TIME_FIELD_OUT_INDEX;
Integer inIndex = inFieldIndexes.get(dataDescription.getTimeField());
if (inIndex == null) {
throw new IllegalStateException(
String.format(Locale.ROOT, "Input time field '%s' not found", dataDescription.getTimeField()));
}
inputOutputMap.add(new InputOutputMap(inIndex, outIndex));
for (String field : analysisConfig.analysisFields()) { for (String field : analysisConfig.analysisFields()) {
Integer inIndex = inFieldIndexes.get(field); ++outIndex;
inIndex = inFieldIndexes.get(field);
if (inIndex != null) { if (inIndex != null) {
inputOutputMap.add(new InputOutputMap(inIndex, outIndex)); inputOutputMap.add(new InputOutputMap(inIndex, outIndex));
} }
++outIndex;
} }
return inputOutputMap; return inputOutputMap;
@ -442,7 +268,6 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
return inputOutputMap; return inputOutputMap;
} }
/** /**
* Check that all the fields are present in the header. * Check that all the fields are present in the header.
* Either return true or throw a MissingFieldException * Either return true or throw a MissingFieldException
@ -453,7 +278,6 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
protected abstract boolean checkForMissingFields(Collection<String> inputFields, Map<String, Integer> inputFieldIndexes, protected abstract boolean checkForMissingFields(Collection<String> inputFields, Map<String, Integer> inputFieldIndexes,
String[] header); String[] header);
/** /**
* Input and output array indexes map * Input and output array indexes map
*/ */
@ -466,6 +290,4 @@ public abstract class AbstractDataToProcessWriter implements DataToProcessWriter
outputIndex = out; outputIndex = out;
} }
} }
} }

View File

@ -3,11 +3,11 @@
* or more contributor license agreements. Licensed under the Elastic License; * or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License. * you may not use this file except in compliance with the Elastic License.
*/ */
package org.elasticsearch.xpack.ml.transforms; package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
public abstract class TransformException extends Exception { public class CannotParseTimestampException extends Exception {
public TransformException(String message) { public CannotParseTimestampException(String message, Throwable cause) {
super(message); super(message, cause);
} }
} }

View File

@ -6,12 +6,12 @@
package org.elasticsearch.xpack.ml.job.process.autodetect.writer; package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.job.config.DataDescription; import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess; import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.supercsv.io.CsvListReader; import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference; import org.supercsv.prefs.CsvPreference;
@ -36,6 +36,9 @@ import java.util.Map;
* line. * line.
*/ */
class CsvDataToProcessWriter extends AbstractDataToProcessWriter { class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
private static final Logger LOGGER = Loggers.getLogger(CsvDataToProcessWriter.class);
/** /**
* Maximum number of lines allowed within a single CSV record. * Maximum number of lines allowed within a single CSV record.
* <p> * <p>
@ -51,13 +54,13 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
public CsvDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess, public CsvDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess,
DataDescription dataDescription, AnalysisConfig analysisConfig, DataDescription dataDescription, AnalysisConfig analysisConfig,
TransformConfigs transforms, DataCountsReporter dataCountsReporter, Logger logger) { DataCountsReporter dataCountsReporter) {
super(includeControlField, autodetectProcess, dataDescription, analysisConfig, transforms, dataCountsReporter, logger); super(includeControlField, autodetectProcess, dataDescription, analysisConfig, dataCountsReporter, LOGGER);
} }
/** /**
* Read the csv inputIndex, transform to length encoded values and pipe to * Read the csv inputIndex, transform to length encoded values and pipe to
* the OutputStream. If any of the expected fields in the transform inputs, * the OutputStream. If any of the expected fields in the
* analysis inputIndex or if the expected time field is missing from the CSV * analysis inputIndex or if the expected time field is missing from the CSV
* header a exception is thrown * header a exception is thrown
*/ */
@ -74,15 +77,14 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
try (CsvListReader csvReader = new CsvListReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), csvPref)) { try (CsvListReader csvReader = new CsvListReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), csvPref)) {
String[] header = csvReader.getHeader(true); String[] header = csvReader.getHeader(true);
if (header == null) { // null if EoF if (header == null) { // null if EoF
return dataCountsReporter.incrementalStats(); return dataCountsReporter.incrementalStats();
} }
long inputFieldCount = Math.max(header.length - 1, 0); // time field doesn't count long inputFieldCount = Math.max(header.length - 1, 0); // time field doesn't count
buildTransforms(header); buildFieldIndexMapping(header);
//backing array for the inputIndex // backing array for the inputIndex
String[] inputRecord = new String[header.length]; String[] inputRecord = new String[header.length];
int maxIndex = 0; int maxIndex = 0;
@ -98,7 +100,7 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
Arrays.fill(record, ""); Arrays.fill(record, "");
if (maxIndex >= line.size()) { if (maxIndex >= line.size()) {
logger.warn("Not enough fields in csv record, expected at least " + maxIndex + ". " + line); LOGGER.warn("Not enough fields in csv record, expected at least " + maxIndex + ". " + line);
for (InputOutputMap inOut : inputOutputMap) { for (InputOutputMap inOut : inputOutputMap) {
if (inOut.inputIndex >= line.size()) { if (inOut.inputIndex >= line.size()) {
@ -117,7 +119,7 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
} }
fillRecordFromLine(line, inputRecord); fillRecordFromLine(line, inputRecord);
applyTransformsAndWrite(inputRecord, record, inputFieldCount); transformTimeAndWrite(record, inputFieldCount);
} }
// This function can throw // This function can throw
@ -148,7 +150,7 @@ class CsvDataToProcessWriter extends AbstractDataToProcessWriter {
String msg = String.format(Locale.ROOT, "Field configured for analysis '%s' is not in the CSV header '%s'", String msg = String.format(Locale.ROOT, "Field configured for analysis '%s' is not in the CSV header '%s'",
field, Arrays.toString(header)); field, Arrays.toString(header));
logger.error(msg); LOGGER.error(msg);
throw new IllegalArgumentException(msg); throw new IllegalArgumentException(msg);
} }
} }

View File

@ -10,7 +10,6 @@ import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription; import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess; import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
/** /**
* Factory for creating the suitable writer depending on * Factory for creating the suitable writer depending on
@ -32,17 +31,14 @@ public final class DataToProcessWriterFactory {
*/ */
public static DataToProcessWriter create(boolean includeControlField, AutodetectProcess autodetectProcess, public static DataToProcessWriter create(boolean includeControlField, AutodetectProcess autodetectProcess,
DataDescription dataDescription, AnalysisConfig analysisConfig, DataDescription dataDescription, AnalysisConfig analysisConfig,
TransformConfigs transforms, DataCountsReporter dataCountsReporter, Logger logger) { DataCountsReporter dataCountsReporter) {
switch (dataDescription.getFormat()) { switch (dataDescription.getFormat()) {
case JSON: case JSON:
return new JsonDataToProcessWriter(includeControlField, autodetectProcess, dataDescription, analysisConfig, return new JsonDataToProcessWriter(includeControlField, autodetectProcess, dataDescription, analysisConfig,
transforms, dataCountsReporter, logger); dataCountsReporter);
case DELIMITED: case DELIMITED:
return new CsvDataToProcessWriter(includeControlField, autodetectProcess, dataDescription, analysisConfig, return new CsvDataToProcessWriter(includeControlField, autodetectProcess, dataDescription, analysisConfig,
transforms, dataCountsReporter, logger); dataCountsReporter);
case SINGLE_LINE:
return new SingleLineDataToProcessWriter(includeControlField, autodetectProcess, dataDescription, analysisConfig,
transforms, dataCountsReporter, logger);
default: default:
throw new IllegalArgumentException(); throw new IllegalArgumentException();
} }

View File

@ -0,0 +1,38 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.elasticsearch.xpack.ml.utils.time.DateTimeFormatterTimestampConverter;
import org.elasticsearch.xpack.ml.utils.time.TimestampConverter;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.Locale;
/**
* A transformer that attempts to parse a String timestamp as a data according to a time format.
* It converts that to a long that represents the equivalent milliseconds since the epoch.
*/
public class DateFormatDateTransformer implements DateTransformer {
private final String timeFormat;
private final TimestampConverter dateToEpochConverter;
public DateFormatDateTransformer(String timeFormat) {
this.timeFormat = timeFormat;
dateToEpochConverter = DateTimeFormatterTimestampConverter.ofPattern(timeFormat, ZoneOffset.UTC);
}
@Override
public long transform(String timestamp) throws CannotParseTimestampException {
try {
return dateToEpochConverter.toEpochMillis(timestamp);
} catch (DateTimeParseException e) {
String message = String.format(Locale.ROOT, "Cannot parse date '%s' with format string '%s'", timestamp, timeFormat);
throw new CannotParseTimestampException(message, e);
}
}
}

View File

@ -0,0 +1,19 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
/**
* An interface for transforming a String timestamp into epoch_millis.
*/
public interface DateTransformer {
/**
*
* @param timestamp A String representing a timestamp
* @return Milliseconds since the epoch that the timestamp corresponds to
* @throws CannotParseTimestampException If the timestamp cannot be parsed
*/
long transform(String timestamp) throws CannotParseTimestampException;
}

View File

@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import java.util.Locale;
/**
* A transformer that attempts to parse a String timestamp
* as a double and convert that to a long that represents
* an epoch. If m_IsMillisecond is true, it will convert to seconds.
*/
public class DoubleDateTransformer implements DateTransformer {
private static final long MS_IN_SECOND = 1000;
private final boolean isMillisecond;
public DoubleDateTransformer(boolean isMillisecond) {
this.isMillisecond = isMillisecond;
}
@Override
public long transform(String timestamp) throws CannotParseTimestampException {
try {
long longValue = Double.valueOf(timestamp).longValue();
return isMillisecond ? longValue : longValue * MS_IN_SECOND;
} catch (NumberFormatException e) {
String message = String.format(Locale.ROOT, "Cannot parse timestamp '%s' as epoch value", timestamp);
throw new CannotParseTimestampException(message, e);
}
}
}

View File

@ -8,12 +8,12 @@ package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonParser;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.job.config.DataDescription; import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess; import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -31,10 +31,11 @@ import java.util.Map;
*/ */
class JsonDataToProcessWriter extends AbstractDataToProcessWriter { class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
private static final Logger LOGGER = Loggers.getLogger(JsonDataToProcessWriter.class);
public JsonDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess, DataDescription dataDescription, public JsonDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess, DataDescription dataDescription,
AnalysisConfig analysisConfig, TransformConfigs transforms, DataCountsReporter dataCountsReporter, AnalysisConfig analysisConfig, DataCountsReporter dataCountsReporter) {
Logger logger) { super(includeControlField, autodetectProcess, dataDescription, analysisConfig, dataCountsReporter, LOGGER);
super(includeControlField, autodetectProcess, dataDescription, analysisConfig, transforms, dataCountsReporter, logger);
} }
/** /**
@ -61,7 +62,7 @@ class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
private void writeJson(JsonParser parser) throws IOException { private void writeJson(JsonParser parser) throws IOException {
Collection<String> analysisFields = inputFields(); Collection<String> analysisFields = inputFields();
buildTransforms(analysisFields.toArray(new String[0])); buildFieldIndexMapping(analysisFields.toArray(new String[0]));
int numFields = outputFieldCount(); int numFields = outputFieldCount();
String[] input = new String[numFields]; String[] input = new String[numFields];
@ -70,7 +71,7 @@ class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
// We never expect to get the control field // We never expect to get the control field
boolean[] gotFields = new boolean[analysisFields.size()]; boolean[] gotFields = new boolean[analysisFields.size()];
JsonRecordReader recordReader = new SimpleJsonRecordReader(parser, inFieldIndexes, logger); JsonRecordReader recordReader = new SimpleJsonRecordReader(parser, inFieldIndexes, LOGGER);
long inputFieldCount = recordReader.read(input, gotFields); long inputFieldCount = recordReader.read(input, gotFields);
while (inputFieldCount >= 0) { while (inputFieldCount >= 0) {
Arrays.fill(record, ""); Arrays.fill(record, "");
@ -87,7 +88,7 @@ class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
record[inOut.outputIndex] = (field == null) ? "" : field; record[inOut.outputIndex] = (field == null) ? "" : field;
} }
applyTransformsAndWrite(input, record, inputFieldCount); transformTimeAndWrite(record, inputFieldCount);
inputFieldCount = recordReader.read(input, gotFields); inputFieldCount = recordReader.read(input, gotFields);
} }

View File

@ -1,71 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
/**
* This writer is used for reading inputIndex data that are unstructured and
* each record is a single line. The writer applies transforms and pipes
* the records into length encoded outputIndex.
* <p>
* This writer is expected only to be used in combination of transforms
* that will extract the time and the other fields used in the analysis.
* <p>
* Records for which no time can be extracted will be ignored.
*/
public class SingleLineDataToProcessWriter extends AbstractDataToProcessWriter {
private static final String RAW = "raw";
protected SingleLineDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess,
DataDescription dataDescription, AnalysisConfig analysisConfig,
TransformConfigs transformConfigs, DataCountsReporter dataCountsReporter, Logger logger) {
super(includeControlField, autodetectProcess, dataDescription, analysisConfig, transformConfigs, dataCountsReporter, logger);
}
@Override
public DataCounts write(InputStream inputStream) throws IOException {
dataCountsReporter.startNewIncrementalCount();
try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
String[] header = {RAW};
buildTransforms(header);
int numFields = outputFieldCount();
String[] record = new String[numFields];
for (String line = bufferedReader.readLine(); line != null;
line = bufferedReader.readLine()) {
Arrays.fill(record, "");
applyTransformsAndWrite(new String[]{line}, record, 1);
}
dataCountsReporter.finishReporting();
}
return dataCountsReporter.incrementalStats();
}
@Override
protected boolean checkForMissingFields(Collection<String> inputFields,
Map<String, Integer> inputFieldIndexes, String[] header) {
return true;
}
}

View File

@ -18,13 +18,13 @@ enum Level {
INFLUENCER("infl"), INFLUENCER("infl"),
PARTITION("part"); PARTITION("part");
private final String m_Key; private final String key;
Level(String key) { Level(String key) {
m_Key = key; this.key = key;
} }
public String asString() { public String asString() {
return m_Key; return key;
} }
} }

View File

@ -1,35 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.rest.validate;
import org.elasticsearch.client.node.NodeClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.action.AcknowledgedRestListener;
import org.elasticsearch.xpack.ml.MlPlugin;
import org.elasticsearch.xpack.ml.action.ValidateTransformAction;
import java.io.IOException;
public class RestValidateTransformAction extends BaseRestHandler {
public RestValidateTransformAction(Settings settings, RestController controller) {
super(settings);
controller.registerHandler(RestRequest.Method.POST, MlPlugin.BASE_PATH + "_validate/transform", this);
}
@Override
protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
XContentParser parser = restRequest.contentOrSourceParamParser();
ValidateTransformAction.Request validateDetectorRequest = ValidateTransformAction.Request.parseRequest(parser);
return channel ->
client.execute(ValidateTransformAction.INSTANCE, validateDetectorRequest, new AcknowledgedRestListener<>(channel));
}
}

View File

@ -1,35 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.rest.validate;
import org.elasticsearch.client.node.NodeClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.action.AcknowledgedRestListener;
import org.elasticsearch.xpack.ml.MlPlugin;
import org.elasticsearch.xpack.ml.action.ValidateTransformsAction;
import java.io.IOException;
public class RestValidateTransformsAction extends BaseRestHandler {
public RestValidateTransformsAction(Settings settings, RestController controller) {
super(settings);
controller.registerHandler(RestRequest.Method.POST, MlPlugin.BASE_PATH + "_validate/transforms", this);
}
@Override
protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
XContentParser parser = restRequest.contentOrSourceParamParser();
ValidateTransformsAction.Request validateDetectorRequest = ValidateTransformsAction.Request.PARSER.apply(parser, null);
return channel ->
client.execute(ValidateTransformsAction.INSTANCE, validateDetectorRequest, new AcknowledgedRestListener<>(channel));
}
}

View File

@ -1,56 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.StringJoiner;
import org.apache.logging.log4j.Logger;
/**
* Concatenate input fields
*/
public class Concat extends Transform {
private static final String EMPTY_STRING = "";
private final String delimiter;
public Concat(List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
delimiter = EMPTY_STRING;
}
public Concat(String join, List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
delimiter = join;
}
public String getDelimiter() {
return delimiter;
}
/**
* Concat has only 1 output field
*/
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
if (writeIndexes.isEmpty()) {
return TransformResult.FAIL;
}
TransformIndex writeIndex = writeIndexes.get(0);
StringJoiner joiner = new StringJoiner(delimiter);
for (TransformIndex i : readIndexes) {
joiner.add(readWriteArea[i.array][i.index]);
}
readWriteArea[writeIndex.array][writeIndex.index] = joiner.toString();
return TransformResult.OK;
}
}

View File

@ -1,173 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
/**
* Transform inputs and outputs can be chained together this class provides
* methods for finding the chains of dependencies is a list of transforms. The
* results are ordered list of transforms that should be executed in order
* starting at index 0
*/
public final class DependencySorter {
/**
* Hide public constructor
*/
private DependencySorter() {
}
/**
* For the input field get the chain of transforms that must be executed to
* get that field. The returned list is ordered so that the ones at the end
* of the list are dependent on those at the beginning.
* <p>
* Note if there is a circular dependency in the list of transforms this
* will cause a stack overflow. Check with
* {@linkplain org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier#checkForCircularDependencies(List)}
* first.
*
* @return List of transforms ordered by dependencies
*/
public static List<TransformConfig> findDependencies(String input, List<TransformConfig> transforms) {
return findDependencies(Arrays.asList(input), transforms);
}
/**
* For the list of input fields get the chain of transforms that must be
* executed to get those fields. The returned list is ordered so that the
* ones at the end of the list are dependent on those at the beginning
* <p>
* Note if there is a circular dependency in the list of transforms this
* will cause a stack overflow. Check with
* {@linkplain org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier#checkForCircularDependencies(List)}
* first.
*
* @return List of transforms ordered by dependencies
*/
public static List<TransformConfig> findDependencies(List<String> inputs, List<TransformConfig> transforms) {
List<TransformConfig> dependencies = new LinkedList<>();
ListIterator<TransformConfig> itr = transforms.listIterator();
while (itr.hasNext()) {
TransformConfig tc = itr.next();
for (String input : inputs) {
if (tc.getOutputs().contains(input)) {
findDependenciesRecursive(tc, transforms, dependencies);
}
}
}
return dependencies;
}
/**
* Recursively find the transform dependencies and add them to the
* dependency list
*
*/
private static void findDependenciesRecursive(TransformConfig transform, List<TransformConfig> transforms,
List<TransformConfig> dependencies) {
int index = dependencies.indexOf(transform);
if (index >= 0) {
return;
}
ListIterator<TransformConfig> itr = transforms.listIterator();
while (itr.hasNext()) {
TransformConfig tc = itr.next();
for (String input : transform.getInputs()) {
if (tc.getOutputs().contains(input)) {
findDependenciesRecursive(tc, transforms, dependencies);
}
}
}
dependencies.add(transform);
}
/**
* Return an ordered list of transforms (the same size as the input list)
* that sorted in terms of dependencies.
* <p>
* Note if there is a circular dependency in the list of transforms this
* will cause a stack overflow. Check with
* {@linkplain org.elasticsearch.xpack.ml.job.config.transform.verification.TransformConfigsVerifier#checkForCircularDependencies(List)}
* first.
*
* @return List of transforms ordered by dependencies
*/
public static List<TransformConfig> sortByDependency(List<TransformConfig> transforms) {
List<TransformConfig> orderedDependencies = new LinkedList<>();
List<TransformConfig> transformsCopy = new LinkedList<>(transforms);
transformsCopy = orderDependenciesRecursive(transformsCopy, orderedDependencies);
while (transformsCopy.isEmpty() == false) {
transformsCopy = orderDependenciesRecursive(transformsCopy, orderedDependencies);
}
return orderedDependencies;
}
/**
* Find the dependencies of the head of the <code>transforms</code> list
* adding them to the <code>dependencies</code> list. The returned list is a
* copy of the input <code>transforms</code> with the dependent transforms
* (i.e. those that have been ordered and add to <code>dependencies</code>)
* removed.
* <p>
* In the case where the input <code>transforms</code> list contains
* multiple chains of dependencies this function should be called multiple
* times using its return value as the input <code>transforms</code>
* parameter
* <p>
* To avoid concurrent modification of the transforms list a new copy is
* made for each recursive call and a new modified list returned
*
* @param dependencies
* Transforms are added to this list
* @return As transforms are moved from <code>transforms</code> to
* <code>dependencies</code> this list is a new copy of the
* <code>transforms</code> input with the moved transforms removed.
*/
private static List<TransformConfig> orderDependenciesRecursive(List<TransformConfig> transforms, List<TransformConfig> dependencies) {
if (transforms.isEmpty()) {
return transforms;
}
ListIterator<TransformConfig> itr = transforms.listIterator();
TransformConfig transform = itr.next();
itr.remove();
int index = dependencies.indexOf(transform);
if (index >= 0) {
return transforms;
}
while (itr.hasNext()) {
TransformConfig tc = itr.next();
for (String input : transform.getInputs()) {
if (tc.getOutputs().contains(input)) {
transforms = orderDependenciesRecursive(new LinkedList<TransformConfig>(transforms), dependencies);
itr = transforms.listIterator();
}
}
}
dependencies.add(transform);
return transforms;
}
}

View File

@ -1,34 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.Condition;
/**
* Abstract base class for exclude filters
*/
public abstract class ExcludeFilter extends Transform {
private final Condition condition;
/**
* The condition should have been verified by now and it <i>must</i> have a
* valid value &amp; operator
*/
public ExcludeFilter(Condition condition, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
this.condition = condition;
}
public Condition getCondition() {
return condition;
}
}

View File

@ -1,85 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
/**
* Parses a numeric value from a field and compares it against a hard
* value using a certain {@link Operator}
*/
public class ExcludeFilterNumeric extends ExcludeFilter {
private final double filterValue;
/**
* The condition should have been verified by now but if they are not valid
* then the default of &lt; (less than) and filter of 0.0 are used meaning
* that no values are excluded.
*/
public ExcludeFilterNumeric(Condition condition, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(condition, readIndexes, writeIndexes, logger);
filterValue = parseFilterValue(getCondition().getValue());
}
/**
* If no condition then the default is &lt; (less than) and filter value of
* 0.0 are used meaning that only -ve values are excluded.
*/
public ExcludeFilterNumeric(List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(new Condition(Operator.LT, "0.0"),
readIndexes, writeIndexes, logger);
filterValue = 0.0;
}
private double parseFilterValue(String fieldValue) {
double result = 0.0;
try {
result = Double.parseDouble(fieldValue);
} catch (NumberFormatException e) {
logger.warn("Exclude transform cannot parse a number from field '" + fieldValue + "'. Using default 0.0");
}
return result;
}
/**
* Returns {@link TransformResult#EXCLUDE} if the value should be excluded
*/
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
TransformResult result = TransformResult.OK;
for (TransformIndex readIndex : readIndexes) {
String field = readWriteArea[readIndex.array][readIndex.index];
try {
double value = Double.parseDouble(field);
if (getCondition().getOperator().test(value, filterValue)) {
result = TransformResult.EXCLUDE;
break;
}
} catch (NumberFormatException e) {
}
}
return result;
}
public double filterValue() {
return filterValue;
}
}

View File

@ -1,49 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.Condition;
/**
* Matches a field against a regex
*/
public class ExcludeFilterRegex extends ExcludeFilter {
private final Pattern pattern;
public ExcludeFilterRegex(Condition condition, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(condition, readIndexes, writeIndexes, logger);
pattern = Pattern.compile(getCondition().getValue());
}
/**
* Returns {@link TransformResult#EXCLUDE} if the record matches the regex
*/
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
TransformResult result = TransformResult.OK;
for (TransformIndex readIndex : readIndexes) {
String field = readWriteArea[readIndex.array][readIndex.index];
Matcher match = pattern.matcher(field);
if (match.matches()) {
result = TransformResult.EXCLUDE;
break;
}
}
return result;
}
}

View File

@ -1,47 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import org.apache.logging.log4j.Logger;
/**
* Split a hostname into Highest Registered Domain and sub domain.
* TODO Reimplement porting the code from C++
*/
public class HighestRegisteredDomain extends Transform {
/**
* Immutable class for the domain split results
*/
public static class DomainSplit {
private String subDomain;
private String highestRegisteredDomain;
private DomainSplit(String subDomain, String highestRegisteredDomain) {
this.subDomain = subDomain;
this.highestRegisteredDomain = highestRegisteredDomain;
}
public String getSubDomain() {
return subDomain;
}
public String getHighestRegisteredDomain() {
return highestRegisteredDomain;
}
}
public HighestRegisteredDomain(List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
}
@Override
public TransformResult transform(String[][] readWriteArea) {
return TransformResult.FAIL;
}
}

View File

@ -1,46 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.Logger;
public class RegexExtract extends Transform {
private final Pattern pattern;
public RegexExtract(String regex, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
pattern = Pattern.compile(regex);
}
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
TransformIndex readIndex = readIndexes.get(0);
String field = readWriteArea[readIndex.array][readIndex.index];
Matcher match = pattern.matcher(field);
if (match.find()) {
int maxMatches = Math.min(writeIndexes.size(), match.groupCount());
for (int i = 0; i < maxMatches; i++) {
TransformIndex index = writeIndexes.get(i);
readWriteArea[index.array][index.index] = match.group(i + 1);
}
return TransformResult.OK;
} else {
logger.warn("Transform 'extract' failed to match field: " + field);
}
return TransformResult.FAIL;
}
}

View File

@ -1,53 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
import org.apache.logging.log4j.Logger;
public class RegexSplit extends Transform {
private final Pattern pattern;
public RegexSplit(String regex, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
pattern = Pattern.compile(regex);
}
@Override
public TransformResult transform(String[][] readWriteArea)
throws TransformException {
TransformIndex readIndex = readIndexes.get(0);
String field = readWriteArea[readIndex.array][readIndex.index];
String[] split = pattern.split(field);
warnIfOutputCountIsNotMatched(split.length, field);
int count = Math.min(split.length, writeIndexes.size());
for (int i = 0; i < count; i++) {
TransformIndex index = writeIndexes.get(i);
readWriteArea[index.array][index.index] = split[i];
}
return TransformResult.OK;
}
private void warnIfOutputCountIsNotMatched(int splitCount, String field) {
if (splitCount != writeIndexes.size()) {
String warning = String.format(Locale.ROOT,
"Transform 'split' has %d output(s) but splitting value '%s' resulted to %d part(s)",
writeIndexes.size(), field, splitCount);
logger.warn(warning);
}
}
}

View File

@ -1,49 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.Locale;
import java.util.function.Function;
import org.apache.logging.log4j.Logger;
public class StringTransform extends Transform {
private final Function<String, String> convertFunction;
private StringTransform(Function<String, String> convertFunction,
List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
this.convertFunction = convertFunction;
if (readIndexes.size() != 1 || writeIndexes.size() != 1) {
throw new IllegalArgumentException();
}
}
@Override
public TransformResult transform(String[][] readWriteArea) throws TransformException {
TransformIndex readIndex = readIndexes.get(0);
TransformIndex writeIndex = writeIndexes.get(0);
String input = readWriteArea[readIndex.array][readIndex.index];
readWriteArea[writeIndex.array][writeIndex.index] = convertFunction.apply(input);
return TransformResult.OK;
}
public static StringTransform createLowerCase(List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
return new StringTransform(s -> s.toLowerCase(Locale.ROOT), readIndexes, writeIndexes, logger);
}
public static StringTransform createUpperCase(List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
return new StringTransform(s -> s.toUpperCase(Locale.ROOT), readIndexes, writeIndexes, logger);
}
public static StringTransform createTrim(List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
return new StringTransform(s -> s.trim(), readIndexes, writeIndexes, logger);
}
}

View File

@ -1,103 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.List;
import java.util.Objects;
import org.apache.logging.log4j.Logger;
/**
* Abstract transform class.
* Instances are created with maps telling it which field(s)
* to read from in the input array and where to write to.
* The read/write area is passed in the {@linkplain #transform(String[][])}
* function.
* <p>
* Some transforms may fail and we will continue processing for
* others a failure is terminal meaning the record should not be
* processed further
*/
public abstract class Transform {
/**
* OK means the transform was successful,
* FAIL means the transform failed but it's ok to continue processing
* EXCLUDE means the no further processing should take place and the record discarded
*/
public enum TransformResult {
OK, FAIL, EXCLUDE
}
public static class TransformIndex {
public final int array;
public final int index;
public TransformIndex(int a, int b) {
this.array = a;
this.index = b;
}
@Override
public int hashCode() {
return Objects.hash(array, index);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
TransformIndex other = (TransformIndex) obj;
return Objects.equals(this.array, other.array)
&& Objects.equals(this.index, other.index);
}
}
protected final Logger logger;
protected final List<TransformIndex> readIndexes;
protected final List<TransformIndex> writeIndexes;
/**
* @param readIndexes Read inputs from these indexes
* @param writeIndexes Outputs are written to these indexes
* @param logger Transform results go into these indexes
*/
public Transform(List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
this.logger = logger;
this.readIndexes = readIndexes;
this.writeIndexes = writeIndexes;
}
/**
* The indexes for the inputs
*/
public final List<TransformIndex> getReadIndexes() {
return readIndexes;
}
/**
* The write output indexes
*/
public final List<TransformIndex> getWriteIndexes() {
return writeIndexes;
}
/**
* Transform function.
* The read write array of arrays area typically contains an input array,
* scratch area array and the output array. The scratch area is used in the
* case where the transform is chained so reads/writes to an intermediate area
*/
public abstract TransformResult transform(String[][] readWriteArea)
throws TransformException;
}

View File

@ -1,122 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
/**
* Create transforms from the configuration object.
* Transforms need to know where to read strings from and where
* write the output to hence input and output maps required by the
* create method.
*/
public class TransformFactory {
public static final int INPUT_ARRAY_INDEX = 0;
public static final int SCRATCH_ARRAY_INDEX = 1;
public static final int OUTPUT_ARRAY_INDEX = 2;
public Transform create(TransformConfig transformConfig,
Map<String, Integer> inputIndexesMap,
Map<String, Integer> scratchAreaIndexesMap,
Map<String, Integer> outputIndexesMap,
Logger logger) {
int[] input = new int[transformConfig.getInputs().size()];
fillIndexArray(transformConfig.getInputs(), inputIndexesMap, input);
List<TransformIndex> readIndexes = new ArrayList<>();
for (String field : transformConfig.getInputs()) {
Integer index = inputIndexesMap.get(field);
if (index != null) {
readIndexes.add(new TransformIndex(INPUT_ARRAY_INDEX, index));
} else {
index = scratchAreaIndexesMap.get(field);
if (index != null) {
readIndexes.add(new TransformIndex(SCRATCH_ARRAY_INDEX, index));
} else if (outputIndexesMap.containsKey(field)) { // also check the outputs array for this input
index = outputIndexesMap.get(field);
readIndexes.add(new TransformIndex(SCRATCH_ARRAY_INDEX, index));
} else {
throw new IllegalStateException("Transform input '" + field +
"' cannot be found");
}
}
}
List<TransformIndex> writeIndexes = new ArrayList<>();
for (String field : transformConfig.getOutputs()) {
Integer index = outputIndexesMap.get(field);
if (index != null) {
writeIndexes.add(new TransformIndex(OUTPUT_ARRAY_INDEX, index));
} else {
index = scratchAreaIndexesMap.get(field);
if (index != null) {
writeIndexes.add(new TransformIndex(SCRATCH_ARRAY_INDEX, index));
}
}
}
TransformType type = transformConfig.type();
switch (type) {
case DOMAIN_SPLIT:
return new HighestRegisteredDomain(readIndexes, writeIndexes, logger);
case CONCAT:
if (transformConfig.getArguments().isEmpty()) {
return new Concat(readIndexes, writeIndexes, logger);
} else {
return new Concat(transformConfig.getArguments().get(0),
readIndexes, writeIndexes, logger);
}
case REGEX_EXTRACT:
return new RegexExtract(transformConfig.getArguments().get(0), readIndexes,
writeIndexes, logger);
case REGEX_SPLIT:
return new RegexSplit(transformConfig.getArguments().get(0), readIndexes,
writeIndexes, logger);
case EXCLUDE:
if (transformConfig.getCondition().getOperator().expectsANumericArgument()) {
return new ExcludeFilterNumeric(transformConfig.getCondition(),
readIndexes, writeIndexes, logger);
} else {
return new ExcludeFilterRegex(transformConfig.getCondition(), readIndexes,
writeIndexes, logger);
}
case LOWERCASE:
return StringTransform.createLowerCase(readIndexes, writeIndexes, logger);
case UPPERCASE:
return StringTransform.createUpperCase(readIndexes, writeIndexes, logger);
case TRIM:
return StringTransform.createTrim(readIndexes, writeIndexes, logger);
default:
// This code will never be hit - it's to
// keep the compiler happy.
throw new IllegalArgumentException("Unknown transform type " + type);
}
}
/**
* For each <code>field</code> fill the <code>indexArray</code>
* with the index from the <code>indexes</code> map.
*/
private static void fillIndexArray(List<String> fields, Map<String, Integer> indexes,
int[] indexArray) {
int i = 0;
for (String field : fields) {
Integer index = indexes.get(field);
if (index != null) {
indexArray[i++] = index;
}
}
}
}

View File

@ -1,43 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.transforms.TransformException;
import org.elasticsearch.xpack.ml.utils.time.DateTimeFormatterTimestampConverter;
import org.elasticsearch.xpack.ml.utils.time.TimestampConverter;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.List;
import java.util.Locale;
/**
* A transform that attempts to parse a String timestamp
* according to a timeFormat. It converts that
* to a long that represents the equivalent epoch.
*/
public class DateFormatTransform extends DateTransform {
private final String timeFormat;
private final TimestampConverter dateToEpochConverter;
public DateFormatTransform(String timeFormat, List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
this.timeFormat = timeFormat;
dateToEpochConverter = DateTimeFormatterTimestampConverter.ofPattern(timeFormat, ZoneOffset.UTC);
}
@Override
protected long toEpochMs(String field) throws TransformException {
try {
return dateToEpochConverter.toEpochMillis(field);
} catch (DateTimeParseException pe) {
String message = String.format(Locale.ROOT, "Cannot parse date '%s' with format string '%s'", field, timeFormat);
throw new ParseTimestampException(message);
}
}
}

View File

@ -1,62 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.transforms.Transform;
import org.elasticsearch.xpack.ml.transforms.TransformException;
/**
* Abstract class introduces the {@link #epochMs()} method for
* date transforms
*/
public abstract class DateTransform extends Transform {
protected static final int SECONDS_TO_MS = 1000;
private long epochMs;
public DateTransform(List<TransformIndex> readIndexes, List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
}
/**
* The epoch time from the last transform
*/
public long epochMs() {
return epochMs;
}
/**
* Expects 1 input and 1 output.
*/
@Override
public final TransformResult transform(String[][] readWriteArea) throws TransformException {
if (readIndexes.isEmpty()) {
throw new ParseTimestampException("Cannot parse null string");
}
if (writeIndexes.isEmpty()) {
throw new ParseTimestampException("No write index for the datetime format transform");
}
TransformIndex i = readIndexes.get(0);
String field = readWriteArea[i.array][i.index];
if (field == null) {
throw new ParseTimestampException("Cannot parse null string");
}
epochMs = toEpochMs(field);
TransformIndex writeIndex = writeIndexes.get(0);
readWriteArea[writeIndex.array][writeIndex.index] = Long.toString(epochMs / SECONDS_TO_MS);
return TransformResult.OK;
}
protected abstract long toEpochMs(String field) throws TransformException;
}

View File

@ -1,42 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import java.util.List;
import java.util.Locale;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.xpack.ml.transforms.TransformException;
/**
* A transformer that attempts to parse a String timestamp
* as a double and convert that to a long that represents
* an epoch time in seconds.
* If isMillisecond is true, it assumes the number represents
* time in milli-seconds and will convert to seconds
*/
public class DoubleDateTransform extends DateTransform {
private final boolean isMillisecond;
public DoubleDateTransform(boolean isMillisecond, List<TransformIndex> readIndexes,
List<TransformIndex> writeIndexes, Logger logger) {
super(readIndexes, writeIndexes, logger);
this.isMillisecond = isMillisecond;
}
@Override
protected long toEpochMs(String field) throws TransformException {
try {
long longValue = Double.valueOf(field).longValue();
return isMillisecond ? longValue : longValue * SECONDS_TO_MS;
} catch (NumberFormatException e) {
String message = String.format(Locale.ROOT, "Cannot parse timestamp '%s' as epoch value", field);
throw new ParseTimestampException(message);
}
}
}

View File

@ -1,16 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import org.elasticsearch.xpack.ml.transforms.TransformException;
public class ParseTimestampException extends TransformException {
public ParseTimestampException(String message) {
super(message);
}
}

View File

@ -56,7 +56,6 @@ job.config.condition.invalid.value.null = Invalid condition: the value field can
job.config.condition.invalid.value.numeric = Invalid condition value: cannot parse a double from string ''{0}'' job.config.condition.invalid.value.numeric = Invalid condition value: cannot parse a double from string ''{0}''
job.config.condition.invalid.value.regex = Invalid condition value: ''{0}'' is not a valid regular expression job.config.condition.invalid.value.regex = Invalid condition value: ''{0}'' is not a valid regular expression
job.config.condition.unknown.operator = Unknown condition operator ''{0}'' job.config.condition.unknown.operator = Unknown condition operator ''{0}''
job.config.dataformat.requires.transform = When the data format is {0}, transforms are required.
job.config.detectionrule.condition.categorical.invalid.option = Invalid detector rule: a categorical rule_condition does not support {0} job.config.detectionrule.condition.categorical.invalid.option = Invalid detector rule: a categorical rule_condition does not support {0}
job.config.detectionrule.condition.categorical.missing.option = Invalid detector rule: a categorical rule_condition requires {0} to be set job.config.detectionrule.condition.categorical.missing.option = Invalid detector rule: a categorical rule_condition requires {0} to be set
job.config.detectionrule.condition.invalid.fieldname = Invalid detector rule: field_name has to be one of {0}; actual was ''{1}'' job.config.detectionrule.condition.invalid.fieldname = Invalid detector rule: field_name has to be one of {0}; actual was ''{1}''
@ -118,19 +117,6 @@ job.config.update.results.retention.days.invalid = Invalid update value for resu
job.config.update.datafeed.config.parse.error = JSON parse error reading the update value for datafeed_config job.config.update.datafeed.config.parse.error = JSON parse error reading the update value for datafeed_config
job.config.update.datafeed.config.cannot.be.null = Invalid update value for datafeed_config: null job.config.update.datafeed.config.cannot.be.null = Invalid update value for datafeed_config: null
job.config.transform.circular.dependency = Transform type {0} with inputs {1} has a circular dependency
job.config.transform.condition.required = A condition must be defined for transform ''{0}''
job.config.transform.duplicated.output.name = Transform ''{0}'' has an output with the same name as the summary count field. Transform outputs cannot use the summary count field, please review your configuration
job.config.transform.extract.groups.should.match.output.count = Transform ''{0}'' expects {1} output(s) but regex ''{2}'' captures {3} group(s)
job.config.transform.inputs.contain.empty.string = Transform type {0} contains empty input
job.config.transform.invalid.argument = Transform ''{0}'' has invalid argument ''{1}''
job.config.transform.invalid.argument.count = Transform type {0} expected {1} argument(s), got {2}
job.config.transform.invalid.input.count = Transform type {0} expected {1} input(s), got {2}
job.config.transform.invalid.output.count = Transform type {0} expected {1} output(s), got {2}
job.config.transform.outputs.contain.empty.string = Transform type {0} contains empty output
job.config.transform.outputs.unused = None of the outputs of transform ''{0}'' are used. Please review your configuration
job.config.transform.output.name.used.more.than.once = Transform output name ''{0}'' is used more than once
job.config.transform.unknown.type = Unknown TransformType ''{0}''
job.config.unknown.function = Unknown function ''{0}'' job.config.unknown.function = Unknown function ''{0}''
job.index.already.exists = Cannot create index ''{0}'' as it already exists job.index.already.exists = Cannot create index ''{0}'' as it already exists
@ -164,9 +150,6 @@ json.job.config.parse.error = JSON parse error reading the job configuration
json.detector.config.mapping.error = JSON mapping error reading the detector configuration json.detector.config.mapping.error = JSON mapping error reading the detector configuration
json.detector.config.parse.error = JSON parse error reading the detector configuration json.detector.config.parse.error = JSON parse error reading the detector configuration
json.transform.config.mapping.error = JSON mapping error reading the transform configuration
json.transform.config.parse.error = JSON parse error reading the transform configuration
rest.action.not.allowed.for.datafeed.job = This action is not allowed for a datafeed job rest.action.not.allowed.for.datafeed.job = This action is not allowed for a datafeed job
rest.invalid.datetime.params = Query param ''{0}'' with value ''{1}'' cannot be parsed as a date or converted to a number (epoch). rest.invalid.datetime.params = Query param ''{0}'' with value ''{1}'' cannot be parsed as a date or converted to a number (epoch).

View File

@ -14,8 +14,6 @@ import org.elasticsearch.xpack.ml.job.config.IgnoreDowntime;
import org.elasticsearch.xpack.ml.job.config.Job; import org.elasticsearch.xpack.ml.job.config.Job;
import org.elasticsearch.xpack.ml.job.config.ModelDebugConfig; import org.elasticsearch.xpack.ml.job.config.ModelDebugConfig;
import org.elasticsearch.xpack.ml.action.util.QueryPage; import org.elasticsearch.xpack.ml.action.util.QueryPage;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.support.AbstractStreamableTestCase; import org.elasticsearch.xpack.ml.support.AbstractStreamableTestCase;
import java.util.ArrayList; import java.util.ArrayList;
@ -43,11 +41,6 @@ public class GetJobsActionResponseTests extends AbstractStreamableTestCase<GetJo
Collections.singletonList(new Detector.Builder("metric", "some_field").build())).build(); Collections.singletonList(new Detector.Builder("metric", "some_field").build())).build();
AnalysisLimits analysisLimits = new AnalysisLimits(randomNonNegativeLong(), randomNonNegativeLong()); AnalysisLimits analysisLimits = new AnalysisLimits(randomNonNegativeLong(), randomNonNegativeLong());
DataDescription dataDescription = randomBoolean() ? new DataDescription.Builder().build() : null; DataDescription dataDescription = randomBoolean() ? new DataDescription.Builder().build() : null;
int numTransformers = randomIntBetween(0, 32);
List<TransformConfig> transformConfigList = new ArrayList<>(numTransformers);
for (int i = 0; i < numTransformers; i++) {
transformConfigList.add(new TransformConfig(TransformType.UPPERCASE.prettyName()));
}
ModelDebugConfig modelDebugConfig = randomBoolean() ? new ModelDebugConfig(randomDouble(), randomAsciiOfLength(10)) : null; ModelDebugConfig modelDebugConfig = randomBoolean() ? new ModelDebugConfig(randomDouble(), randomAsciiOfLength(10)) : null;
IgnoreDowntime ignoreDowntime = randomFrom(IgnoreDowntime.values()); IgnoreDowntime ignoreDowntime = randomFrom(IgnoreDowntime.values());
Long normalizationWindowDays = randomBoolean() ? randomLong() : null; Long normalizationWindowDays = randomBoolean() ? randomLong() : null;
@ -59,7 +52,7 @@ public class GetJobsActionResponseTests extends AbstractStreamableTestCase<GetJo
String modelSnapshotId = randomBoolean() ? randomAsciiOfLength(10) : null; String modelSnapshotId = randomBoolean() ? randomAsciiOfLength(10) : null;
String indexName = randomAsciiOfLength(10); String indexName = randomAsciiOfLength(10);
Job job = new Job(jobId, description, createTime, finishedTime, lastDataTime, Job job = new Job(jobId, description, createTime, finishedTime, lastDataTime,
timeout, analysisConfig, analysisLimits, dataDescription, transformConfigList, timeout, analysisConfig, analysisLimits, dataDescription,
modelDebugConfig, ignoreDowntime, normalizationWindowDays, backgroundPersistInterval, modelDebugConfig, ignoreDowntime, normalizationWindowDays, backgroundPersistInterval,
modelSnapshotRetentionDays, resultsRetentionDays, customConfig, modelSnapshotId, indexName); modelSnapshotRetentionDays, resultsRetentionDays, customConfig, modelSnapshotId, indexName);

View File

@ -1,33 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.action;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.xpack.ml.action.ValidateTransformAction.Request;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.support.AbstractStreamableXContentTestCase;
public class ValidateTransformActionRequestTests extends AbstractStreamableXContentTestCase<ValidateTransformAction.Request> {
@Override
protected Request createTestInstance() {
TransformType transformType = randomFrom(TransformType.values());
TransformConfig transform = new TransformConfig(transformType.prettyName());
return new Request(transform);
}
@Override
protected Request createBlankInstance() {
return new Request();
}
@Override
protected Request parseInstance(XContentParser parser) {
return Request.parseRequest(parser);
}
}

View File

@ -1,41 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.action;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.xpack.ml.action.ValidateTransformsAction.Request;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.support.AbstractStreamableXContentTestCase;
import java.util.ArrayList;
import java.util.List;
public class ValidateTransformsActionRequestTests extends AbstractStreamableXContentTestCase<ValidateTransformsAction.Request> {
@Override
protected Request createTestInstance() {
int size = randomInt(10);
List<TransformConfig> transforms = new ArrayList<>();
for (int i = 0; i < size; i++) {
TransformType transformType = randomFrom(TransformType.values());
TransformConfig transform = new TransformConfig(transformType.prettyName());
transforms.add(transform);
}
return new Request(transforms);
}
@Override
protected Request createBlankInstance() {
return new Request();
}
@Override
protected Request parseInstance(XContentParser parser) {
return Request.PARSER.apply(parser, null);
}
}

View File

@ -67,13 +67,6 @@ public class ConditionTests extends AbstractSerializingTestCase<Condition> {
return Condition.PARSER.apply(parser, null); return Condition.PARSER.apply(parser, null);
} }
public void testInvalidTransformName() throws Exception {
BytesArray json = new BytesArray("{ \"value\":\"someValue\" }");
XContentParser parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> Condition.PARSER.apply(parser, null));
assertThat(ex.getMessage(), containsString("Required [operator]"));
}
public void testVerifyArgsNumericArgs() { public void testVerifyArgsNumericArgs() {
new Condition(Operator.LTE, "100"); new Condition(Operator.LTE, "100");
new Condition(Operator.GT, "10.0"); new Condition(Operator.GT, "10.0");

View File

@ -25,15 +25,11 @@ public class DataFormatTests extends ESTestCase {
assertEquals(DataFormat.JSON, DataFormat.forString("json")); assertEquals(DataFormat.JSON, DataFormat.forString("json"));
assertEquals(DataFormat.JSON, DataFormat.forString("JSON")); assertEquals(DataFormat.JSON, DataFormat.forString("JSON"));
assertEquals(DataFormat.SINGLE_LINE, DataFormat.forString("single_line"));
assertEquals(DataFormat.SINGLE_LINE, DataFormat.forString("SINGLE_LINE"));
} }
public void testValidOrdinals() { public void testValidOrdinals() {
assertThat(DataFormat.JSON.ordinal(), equalTo(0)); assertThat(DataFormat.JSON.ordinal(), equalTo(0));
assertThat(DataFormat.DELIMITED.ordinal(), equalTo(1)); assertThat(DataFormat.DELIMITED.ordinal(), equalTo(1));
assertThat(DataFormat.SINGLE_LINE.ordinal(), equalTo(2));
} }
public void testwriteTo() throws Exception { public void testwriteTo() throws Exception {
@ -50,13 +46,6 @@ public class DataFormatTests extends ESTestCase {
assertThat(in.readVInt(), equalTo(1)); assertThat(in.readVInt(), equalTo(1));
} }
} }
try (BytesStreamOutput out = new BytesStreamOutput()) {
DataFormat.SINGLE_LINE.writeTo(out);
try (StreamInput in = out.bytes().streamInput()) {
assertThat(in.readVInt(), equalTo(2));
}
}
} }
public void testReadFrom() throws Exception { public void testReadFrom() throws Exception {
@ -72,12 +61,6 @@ public class DataFormatTests extends ESTestCase {
assertThat(DataFormat.readFromStream(in), equalTo(DataFormat.DELIMITED)); assertThat(DataFormat.readFromStream(in), equalTo(DataFormat.DELIMITED));
} }
} }
try (BytesStreamOutput out = new BytesStreamOutput()) {
out.writeVInt(2);
try (StreamInput in = out.bytes().streamInput()) {
assertThat(DataFormat.readFromStream(in), equalTo(DataFormat.SINGLE_LINE));
}
}
} }
public void testInvalidReadFrom() throws Exception { public void testInvalidReadFrom() throws Exception {

View File

@ -10,8 +10,6 @@ import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.messages.Messages; import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.support.AbstractSerializingTestCase; import org.elasticsearch.xpack.ml.support.AbstractSerializingTestCase;
import java.util.ArrayList; import java.util.ArrayList;
@ -58,7 +56,6 @@ public class JobTests extends AbstractSerializingTestCase<Job> {
assertNull(job.getBackgroundPersistInterval()); assertNull(job.getBackgroundPersistInterval());
assertNull(job.getModelSnapshotRetentionDays()); assertNull(job.getModelSnapshotRetentionDays());
assertNull(job.getResultsRetentionDays()); assertNull(job.getResultsRetentionDays());
assertEquals(Collections.emptyList(), job.getTransforms());
assertNotNull(job.allFields()); assertNotNull(job.allFields());
assertFalse(job.allFields().isEmpty()); assertFalse(job.allFields().isEmpty());
} }
@ -298,92 +295,6 @@ public class JobTests extends AbstractSerializingTestCase<Job> {
builder.build(); builder.build();
} }
public void testCheckTransformOutputIsUsed_throws() {
Job.Builder builder = buildJobBuilder("foo");
TransformConfig tc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
tc.setInputs(Arrays.asList("dns"));
builder.setTransforms(Arrays.asList(tc));
expectThrows(IllegalArgumentException.class, builder::build);
Detector.Builder newDetector = new Detector.Builder();
newDetector.setFunction(Detector.MIN);
newDetector.setFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
AnalysisConfig.Builder config = new AnalysisConfig.Builder(Collections.singletonList(newDetector.build()));
builder.setAnalysisConfig(config);
builder.build();
}
public void testCheckTransformDuplicatOutput_outputIsSummaryCountField() {
Job.Builder builder = buildJobBuilder("foo");
AnalysisConfig.Builder config = createAnalysisConfig();
config.setSummaryCountFieldName("summaryCountField");
builder.setAnalysisConfig(config);
TransformConfig tc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
tc.setInputs(Arrays.asList("dns"));
tc.setOutputs(Arrays.asList("summaryCountField"));
builder.setTransforms(Arrays.asList(tc));
expectThrows(IllegalArgumentException.class, builder::build);
}
public void testCheckTransformOutputIsUsed_outputIsSummaryCountField() {
Job.Builder builder = buildJobBuilder("foo");
TransformConfig tc = new TransformConfig(TransformType.Names.EXTRACT_NAME);
tc.setInputs(Arrays.asList("dns"));
tc.setOutputs(Arrays.asList("summaryCountField"));
tc.setArguments(Arrays.asList("(.*)"));
builder.setTransforms(Arrays.asList(tc));
expectThrows(IllegalArgumentException.class, builder::build);
}
public void testCheckTransformOutputIsUsed_transformHasNoOutput() {
Job.Builder builder = buildJobBuilder("foo");
// The exclude filter has no output
TransformConfig tc = new TransformConfig(TransformType.Names.EXCLUDE_NAME);
tc.setCondition(new Condition(Operator.MATCH, "whitelisted_host"));
tc.setInputs(Arrays.asList("dns"));
builder.setTransforms(Arrays.asList(tc));
builder.build();
}
public void testVerify_GivenDataFormatIsSingleLineAndNullTransforms() {
String errorMessage = Messages.getMessage(
Messages.JOB_CONFIG_DATAFORMAT_REQUIRES_TRANSFORM,
DataDescription.DataFormat.SINGLE_LINE);
Job.Builder builder = buildJobBuilder("foo");
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataDescription.DataFormat.SINGLE_LINE);
builder.setDataDescription(dataDescription);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, builder::build);
assertEquals(errorMessage, e.getMessage());
}
public void testVerify_GivenDataFormatIsSingleLineAndEmptyTransforms() {
String errorMessage = Messages.getMessage(
Messages.JOB_CONFIG_DATAFORMAT_REQUIRES_TRANSFORM,
DataDescription.DataFormat.SINGLE_LINE);
Job.Builder builder = buildJobBuilder("foo");
builder.setTransforms(new ArrayList<>());
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataDescription.DataFormat.SINGLE_LINE);
builder.setDataDescription(dataDescription);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, builder::build);
assertEquals(errorMessage, e.getMessage());
}
public void testVerify_GivenDataFormatIsSingleLineAndNonEmptyTransforms() {
ArrayList<TransformConfig> transforms = new ArrayList<>();
TransformConfig transform = new TransformConfig("trim");
transform.setInputs(Arrays.asList("raw"));
transform.setOutputs(Arrays.asList("time"));
transforms.add(transform);
Job.Builder builder = buildJobBuilder("foo");
builder.setTransforms(transforms);
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataDescription.DataFormat.SINGLE_LINE);
builder.setDataDescription(dataDescription);
builder.build();
}
public void testVerify_GivenNegativeRenormalizationWindowDays() { public void testVerify_GivenNegativeRenormalizationWindowDays() {
String errorMessage = Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW, String errorMessage = Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW,
"renormalizationWindowDays", 0, -1); "renormalizationWindowDays", 0, -1);
@ -488,23 +399,12 @@ public class JobTests extends AbstractSerializingTestCase<Job> {
builder.setDataDescription(dataDescription); builder.setDataDescription(dataDescription);
} }
String[] outputs; String[] outputs;
TransformType[] transformTypes ;
AnalysisConfig ac = analysisConfig.build(); AnalysisConfig ac = analysisConfig.build();
if (randomBoolean()) { if (randomBoolean()) {
transformTypes = new TransformType[] {TransformType.TRIM, TransformType.LOWERCASE};
outputs = new String[] {ac.getDetectors().get(0).getFieldName(), ac.getDetectors().get(0).getOverFieldName()}; outputs = new String[] {ac.getDetectors().get(0).getFieldName(), ac.getDetectors().get(0).getOverFieldName()};
} else { } else {
transformTypes = new TransformType[] {TransformType.TRIM};
outputs = new String[] {ac.getDetectors().get(0).getFieldName()}; outputs = new String[] {ac.getDetectors().get(0).getFieldName()};
} }
List<TransformConfig> transformConfigList = new ArrayList<>(transformTypes.length);
for (int i = 0; i < transformTypes.length; i++) {
TransformConfig tc = new TransformConfig(transformTypes[i].prettyName());
tc.setInputs(Collections.singletonList("input" + i));
tc.setOutputs(Collections.singletonList(outputs[i]));
transformConfigList.add(tc);
}
builder.setTransforms(transformConfigList);
if (randomBoolean()) { if (randomBoolean()) {
builder.setModelDebugConfig(new ModelDebugConfig(randomDouble(), randomAsciiOfLength(10))); builder.setModelDebugConfig(new ModelDebugConfig(randomDouble(), randomAsciiOfLength(10)));
} }

View File

@ -1,196 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.support.AbstractSerializingTestCase;
import java.util.Arrays;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.instanceOf;
public class TransformConfigTests extends AbstractSerializingTestCase<TransformConfig> {
@Override
protected TransformConfig createTestInstance() {
TransformType transformType = randomFrom(TransformType.values());
TransformConfig config = new TransformConfig(transformType.prettyName());
if (randomBoolean()) {
config.setInputs(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
config.setOutputs(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
config.setArguments(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
// no need to randomize, it is properly randomily tested in ConditionTest
config.setCondition(new Condition(Operator.LT, Double.toString(randomDouble())));
}
return config;
}
@Override
protected Writeable.Reader<TransformConfig> instanceReader() {
return TransformConfig::new;
}
@Override
protected TransformConfig parseInstance(XContentParser parser) {
return TransformConfig.PARSER.apply(parser, null);
}
public void testGetOutputs_GivenNoExplicitOutputsSpecified() {
TransformConfig config = new TransformConfig("concat");
assertEquals(Arrays.asList("concat"), config.getOutputs());
}
public void testGetOutputs_GivenEmptyOutputsSpecified() {
TransformConfig config = new TransformConfig("concat");
assertEquals(Arrays.asList("concat"), config.getOutputs());
}
public void testGetOutputs_GivenOutputsSpecified() {
TransformConfig config = new TransformConfig("concat");
config.setOutputs(Arrays.asList("o1", "o2"));
assertEquals(Arrays.asList("o1", "o2"), config.getOutputs());
}
public void testVerify_GivenUnknownTransform() {
ESTestCase.expectThrows(IllegalArgumentException.class, () -> new TransformConfig("unknown+transform"));
}
public void testEquals_GivenSameReference() {
TransformConfig config = new TransformConfig(TransformType.CONCAT.prettyName());
assertTrue(config.equals(config));
}
public void testEquals_GivenDifferentClass() {
TransformConfig config = new TransformConfig(TransformType.CONCAT.prettyName());
assertFalse(config.equals("a string"));
}
public void testEquals_GivenNull() {
TransformConfig config = new TransformConfig(TransformType.CONCAT.prettyName());
assertFalse(config.equals(null));
}
public void testEquals_GivenEqualTransform() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
config1.setOutputs(Arrays.asList("output"));
config1.setArguments(Arrays.asList("-"));
config1.setCondition(new Condition(Operator.EQ, "5"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input2"));
config2.setOutputs(Arrays.asList("output"));
config2.setArguments(Arrays.asList("-"));
config2.setCondition(new Condition(Operator.EQ, "5"));
assertTrue(config1.equals(config2));
assertTrue(config2.equals(config1));
}
public void testEquals_GivenDifferentType() {
TransformConfig config1 = new TransformConfig("concat");
TransformConfig config2 = new TransformConfig("lowercase");
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentInputs() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input3"));
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentOutputs() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
config1.setOutputs(Arrays.asList("output1"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input2"));
config2.setOutputs(Arrays.asList("output2"));
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentArguments() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
config1.setOutputs(Arrays.asList("output"));
config1.setArguments(Arrays.asList("-"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input2"));
config2.setOutputs(Arrays.asList("output"));
config2.setArguments(Arrays.asList("--"));
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentConditions() {
TransformConfig config1 = new TransformConfig("concat");
config1.setInputs(Arrays.asList("input1", "input2"));
config1.setOutputs(Arrays.asList("output"));
config1.setArguments(Arrays.asList("-"));
config1.setCondition(new Condition(Operator.MATCH, "foo"));
TransformConfig config2 = new TransformConfig("concat");
config2.setInputs(Arrays.asList("input1", "input2"));
config2.setOutputs(Arrays.asList("output"));
config2.setArguments(Arrays.asList("-"));
config2.setCondition(new Condition(Operator.MATCH, "bar"));
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testInvalidTransformName() throws Exception {
BytesArray json = new BytesArray("{ \"transform\":\"\" }");
XContentParser parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
ParsingException ex = expectThrows(ParsingException.class,
() -> TransformConfig.PARSER.apply(parser, null));
assertThat(ex.getMessage(), containsString("[transform] failed to parse field [transform]"));
Throwable cause = ex.getRootCause();
assertNotNull(cause);
assertThat(cause, instanceOf(IllegalArgumentException.class));
assertThat(cause.getMessage(),
containsString("Unknown [transformType]: []"));
}
}

View File

@ -1,81 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.support.AbstractSerializingTestCase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class TransformConfigsTests extends AbstractSerializingTestCase<TransformConfigs> {
@Override
protected TransformConfigs createTestInstance() {
int size = randomInt(10);
List<TransformConfig> transforms = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
TransformType transformType = randomFrom(TransformType.values());
TransformConfig config = new TransformConfig(transformType.prettyName());
if (randomBoolean()) {
config.setInputs(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
config.setOutputs(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
config.setArguments(Arrays.asList(generateRandomStringArray(0, 10, false)));
}
if (randomBoolean()) {
// no need to randomize, it is properly randomily tested in ConditionTest
config.setCondition(new Condition(Operator.EQ, Double.toString(randomDouble())));
}
transforms.add(config);
}
return new TransformConfigs(transforms);
}
@Override
protected Reader<TransformConfigs> instanceReader() {
return TransformConfigs::new;
}
@Override
protected TransformConfigs parseInstance(XContentParser parser) {
return TransformConfigs.PARSER.apply(parser, null);
}
public void testInputOutputFieldNames() {
List<TransformConfig> transforms = new ArrayList<>();
transforms.add(createConcatTransform(Arrays.asList("a", "b", "c"), Arrays.asList("c1")));
transforms.add(createConcatTransform(Arrays.asList("d", "e", "c"), Arrays.asList("c2")));
transforms.add(createConcatTransform(Arrays.asList("f", "a", "c"), Arrays.asList("c3")));
TransformConfigs tcs = new TransformConfigs(transforms);
List<String> inputNames = Arrays.asList("a", "b", "c", "d", "e", "f");
Set<String> inputSet = new HashSet<>(inputNames);
assertEquals(inputSet, tcs.inputFieldNames());
List<String> outputNames = Arrays.asList("c1", "c2", "c3");
Set<String> outputSet = new HashSet<>(outputNames);
assertEquals(outputSet, tcs.outputFieldNames());
}
private TransformConfig createConcatTransform(List<String> inputs, List<String> outputs) {
TransformConfig concat = new TransformConfig(TransformType.CONCAT.prettyName());
concat.setInputs(inputs);
concat.setOutputs(outputs);
return concat;
}
}

View File

@ -1,66 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import com.fasterxml.jackson.core.JsonProcessingException;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
public class TransformSerialisationTests extends ESTestCase {
public void testDeserialise_singleFieldAsArray() throws JsonProcessingException, IOException {
String json = "{\"inputs\":\"dns\", \"transform\":\"domain_split\"}";
XContentParser parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
TransformConfig tr = TransformConfig.PARSER.apply(parser, null);
assertEquals(1, tr.getInputs().size());
assertEquals("dns", tr.getInputs().get(0));
assertEquals("domain_split", tr.getTransform());
assertEquals(2, tr.getOutputs().size());
assertEquals("subDomain", tr.getOutputs().get(0));
assertEquals("hrd", tr.getOutputs().get(1));
json = "{\"inputs\":\"dns\", \"transform\":\"domain_split\", \"outputs\":\"catted\"}";
parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
tr = TransformConfig.PARSER.apply(parser, null);
assertEquals(1, tr.getInputs().size());
assertEquals("dns", tr.getInputs().get(0));
assertEquals("domain_split", tr.getTransform());
assertEquals(1, tr.getOutputs().size());
assertEquals("catted", tr.getOutputs().get(0));
}
public void testDeserialise_fieldsArray() throws JsonProcessingException, IOException {
String json = "{\"inputs\":[\"dns\"], \"transform\":\"domain_split\"}";
XContentParser parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
TransformConfig tr = TransformConfig.PARSER.apply(parser, null);
assertEquals(1, tr.getInputs().size());
assertEquals("dns", tr.getInputs().get(0));
assertEquals("domain_split", tr.getTransform());
json = "{\"inputs\":[\"a\", \"b\", \"c\"], \"transform\":\"concat\", \"outputs\":[\"catted\"]}";
parser = XContentFactory.xContent(json).createParser(NamedXContentRegistry.EMPTY, json);
tr = TransformConfig.PARSER.apply(parser, null);
assertEquals(3, tr.getInputs().size());
assertEquals("a", tr.getInputs().get(0));
assertEquals("b", tr.getInputs().get(1));
assertEquals("c", tr.getInputs().get(2));
assertEquals("concat", tr.getTransform());
assertEquals(1, tr.getOutputs().size());
assertEquals("catted", tr.getOutputs().get(0));
}
}

View File

@ -1,52 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config.transform;
import org.elasticsearch.test.ESTestCase;
import java.util.EnumSet;
import java.util.Set;
public class TransformTypeTests extends ESTestCase {
public void testFromString() {
Set<TransformType> all = EnumSet.allOf(TransformType.class);
for (TransformType type : all) {
assertEquals(type.prettyName(), type.toString());
TransformType created = TransformType.fromString(type.prettyName());
assertEquals(type, created);
}
}
public void testFromString_UnknownType() {
ESTestCase.expectThrows(IllegalArgumentException.class, () -> TransformType.fromString("random_type"));
}
public void testForString() {
assertEquals(TransformType.fromString("domain_split"), TransformType.DOMAIN_SPLIT);
assertEquals(TransformType.fromString("concat"), TransformType.CONCAT);
assertEquals(TransformType.fromString("extract"), TransformType.REGEX_EXTRACT);
assertEquals(TransformType.fromString("split"), TransformType.REGEX_SPLIT);
assertEquals(TransformType.fromString("exclude"), TransformType.EXCLUDE);
assertEquals(TransformType.fromString("lowercase"), TransformType.LOWERCASE);
assertEquals(TransformType.fromString("uppercase"), TransformType.UPPERCASE);
assertEquals(TransformType.fromString("trim"), TransformType.TRIM);
}
public void testValidOrdinals() {
assertEquals(0, TransformType.DOMAIN_SPLIT.ordinal());
assertEquals(1, TransformType.CONCAT.ordinal());
assertEquals(2, TransformType.REGEX_EXTRACT.ordinal());
assertEquals(3, TransformType.REGEX_SPLIT.ordinal());
assertEquals(4, TransformType.EXCLUDE.ordinal());
assertEquals(5, TransformType.LOWERCASE.ordinal());
assertEquals(6, TransformType.UPPERCASE.ordinal());
assertEquals(7, TransformType.TRIM.ordinal());
}
}

View File

@ -5,78 +5,48 @@
*/ */
package org.elasticsearch.xpack.ml.job.process.autodetect.writer; package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription; import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.config.Detector; import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess; import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.autodetect.writer.AbstractDataToProcessWriter.InputOutputMap; import org.elasticsearch.xpack.ml.job.process.autodetect.writer.AbstractDataToProcessWriter.InputOutputMap;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.transforms.Concat;
import org.elasticsearch.xpack.ml.transforms.HighestRegisteredDomain;
import org.elasticsearch.xpack.ml.transforms.RegexSplit;
import org.elasticsearch.xpack.ml.transforms.StringTransform;
import org.elasticsearch.xpack.ml.transforms.Transform;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.mockito.Mockito; import org.mockito.Mockito;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
/** /**
* Testing methods of AbstractDataToProcessWriter but uses the concrete * Testing methods of AbstractDataToProcessWriter but uses the concrete instances.
* instances.
* <p>
* Asserts that the transforms have the right input and outputs.
*/ */
public class AbstractDataToProcessWriterTests extends ESTestCase { public class AbstractDataToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess; private AutodetectProcess autodetectProcess;
private DataCountsReporter dataCountsReporter; private DataCountsReporter dataCountsReporter;
private Logger jobLogger;
@Before @Before
public void setUpMocks() { public void setUpMocks() {
autodetectProcess = Mockito.mock(AutodetectProcess.class); autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class); dataCountsReporter = Mockito.mock(DataCountsReporter.class);
jobLogger = Mockito.mock(Logger.class);
} }
public void testInputFields_MulitpleInputsSingleOutput() throws IOException { public void testInputFields() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder(); DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("time_field"); dd.setTimeField("time_field");
Detector.Builder detector = new Detector.Builder("metric", "value"); Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("host-metric"); detector.setByFieldName("metric");
detector.setDetectorDescription("metric(value) by host-metric"); detector.setPartitionFieldName("host");
detector.setDetectorDescription("metric(value) by metric partitionfield=host");
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build(); AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig tc = new TransformConfig(TransformType.Names.CONCAT_NAME);
tc.setInputs(Arrays.asList("host", "metric"));
tc.setOutputs(Arrays.asList("host-metric"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(tc));
AbstractDataToProcessWriter writer = AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger); new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, dataCountsReporter);
writer.writeHeader(); writer.writeHeader();
@ -88,314 +58,32 @@ public class AbstractDataToProcessWriterTests extends ESTestCase {
assertTrue(inputFields.contains("metric")); assertTrue(inputFields.contains("metric"));
String[] header = { "time_field", "metric", "host", "value" }; String[] header = { "time_field", "metric", "host", "value" };
writer.buildTransforms(header); writer.buildFieldIndexMapping(header);
List<Transform> trs = writer.postDateTransforms;
assertEquals(1, trs.size());
Transform tr = trs.get(0);
List<TransformIndex> readIndexes = tr.getReadIndexes();
assertEquals(readIndexes.get(0), new TransformIndex(0, 2));
assertEquals(readIndexes.get(1), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = tr.getWriteIndexes();
assertEquals(writeIndexes.get(0), new TransformIndex(2, 1));
Map<String, Integer> inputIndexes = writer.getInputFieldIndexes(); Map<String, Integer> inputIndexes = writer.getInputFieldIndexes();
assertEquals(4, inputIndexes.size()); assertEquals(4, inputIndexes.size());
Assert.assertEquals(new Integer(0), inputIndexes.get("time_field")); assertEquals(new Integer(0), inputIndexes.get("time_field"));
Assert.assertEquals(new Integer(1), inputIndexes.get("metric")); assertEquals(new Integer(1), inputIndexes.get("metric"));
Assert.assertEquals(new Integer(2), inputIndexes.get("host")); assertEquals(new Integer(2), inputIndexes.get("host"));
Assert.assertEquals(new Integer(3), inputIndexes.get("value")); assertEquals(new Integer(3), inputIndexes.get("value"));
Map<String, Integer> outputIndexes = writer.getOutputFieldIndexes(); Map<String, Integer> outputIndexes = writer.getOutputFieldIndexes();
assertEquals(4, outputIndexes.size()); assertEquals(5, outputIndexes.size());
Assert.assertEquals(new Integer(0), outputIndexes.get("time_field")); assertEquals(new Integer(0), outputIndexes.get("time_field"));
Assert.assertEquals(new Integer(1), outputIndexes.get("host-metric")); assertEquals(new Integer(1), outputIndexes.get("host"));
Assert.assertEquals(new Integer(2), outputIndexes.get("value")); assertEquals(new Integer(2), outputIndexes.get("metric"));
Assert.assertEquals(new Integer(3), outputIndexes.get(LengthEncodedWriter.CONTROL_FIELD_NAME)); assertEquals(new Integer(3), outputIndexes.get("value"));
assertEquals(new Integer(4), outputIndexes.get(LengthEncodedWriter.CONTROL_FIELD_NAME));
List<InputOutputMap> inOutMaps = writer.getInputOutputMap(); List<InputOutputMap> inOutMaps = writer.getInputOutputMap();
assertEquals(1, inOutMaps.size()); assertEquals(4, inOutMaps.size());
assertEquals(inOutMaps.get(0).inputIndex, 3); assertEquals(inOutMaps.get(0).inputIndex, 0);
assertEquals(inOutMaps.get(0).outputIndex, 2); assertEquals(inOutMaps.get(0).outputIndex, 0);
} assertEquals(inOutMaps.get(1).inputIndex, 2);
assertEquals(inOutMaps.get(1).outputIndex, 1);
public void testInputFields_SingleInputMulitpleOutputs() throws IOException { assertEquals(inOutMaps.get(2).inputIndex, 1);
DataDescription.Builder dd = new DataDescription.Builder(); assertEquals(inOutMaps.get(2).outputIndex, 2);
dd.setTimeField("time_field"); assertEquals(inOutMaps.get(3).inputIndex, 3);
assertEquals(inOutMaps.get(3).outputIndex, 3);
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
detector.setOverFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(1));
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig tc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
tc.setInputs(Arrays.asList("domain"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(tc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
Set<String> inputFields = new HashSet<>(writer.inputFields());
assertEquals(3, inputFields.size());
assertTrue(inputFields.contains("time_field"));
assertTrue(inputFields.contains("value"));
assertTrue(inputFields.contains("domain"));
String[] header = { "time_field", "domain", "value" };
writer.buildTransforms(header);
List<Transform> trs = writer.postDateTransforms;
assertEquals(1, trs.size());
Map<String, Integer> inputIndexes = writer.getInputFieldIndexes();
assertEquals(3, inputIndexes.size());
Assert.assertEquals(new Integer(0), inputIndexes.get("time_field"));
Assert.assertEquals(new Integer(1), inputIndexes.get("domain"));
Assert.assertEquals(new Integer(2), inputIndexes.get("value"));
Map<String, Integer> outputIndexes = writer.getOutputFieldIndexes();
List<String> allOutputs = new ArrayList<>(TransformType.DOMAIN_SPLIT.defaultOutputNames());
allOutputs.add("value");
Collections.sort(allOutputs); // outputs are in alphabetical order
assertEquals(5, outputIndexes.size()); // time + control field + outputs
Assert.assertEquals(new Integer(0), outputIndexes.get("time_field"));
int count = 1;
for (String f : allOutputs) {
Assert.assertEquals(new Integer(count++), outputIndexes.get(f));
}
Assert.assertEquals(new Integer(allOutputs.size() + 1), outputIndexes.get(LengthEncodedWriter.CONTROL_FIELD_NAME));
List<InputOutputMap> inOutMaps = writer.getInputOutputMap();
assertEquals(1, inOutMaps.size());
assertEquals(inOutMaps.get(0).inputIndex, 2);
assertEquals(inOutMaps.get(0).outputIndex, allOutputs.indexOf("value") + 1);
Transform tr = trs.get(0);
assertEquals(tr.getReadIndexes().get(0), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = new ArrayList<>();
int[] outIndexes = new int[TransformType.DOMAIN_SPLIT.defaultOutputNames().size()];
for (int i = 0; i < outIndexes.length; i++) {
writeIndexes.add(new TransformIndex(2, allOutputs.indexOf(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(i)) + 1));
}
assertEquals(writeIndexes, tr.getWriteIndexes());
}
/**
* Only one output of the transform is used
*/
public void testInputFields_SingleInputMulitpleOutputs_OnlyOneOutputUsed() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("time_field");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig tc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
tc.setInputs(Arrays.asList("domain"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(tc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
Set<String> inputFields = new HashSet<>(writer.inputFields());
assertEquals(3, inputFields.size());
assertTrue(inputFields.contains("time_field"));
assertTrue(inputFields.contains("value"));
assertTrue(inputFields.contains("domain"));
String[] header = { "time_field", "domain", "value" };
writer.buildTransforms(header);
List<Transform> trs = writer.postDateTransforms;
assertEquals(1, trs.size());
Map<String, Integer> inputIndexes = writer.getInputFieldIndexes();
assertEquals(3, inputIndexes.size());
Assert.assertEquals(new Integer(0), inputIndexes.get("time_field"));
Assert.assertEquals(new Integer(1), inputIndexes.get("domain"));
Assert.assertEquals(new Integer(2), inputIndexes.get("value"));
Map<String, Integer> outputIndexes = writer.getOutputFieldIndexes();
List<String> allOutputs = new ArrayList<>();
allOutputs.add(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
allOutputs.add("value");
Collections.sort(allOutputs); // outputs are in alphabetical order
assertEquals(4, outputIndexes.size()); // time + control field + outputs
Assert.assertEquals(new Integer(0), outputIndexes.get("time_field"));
int count = 1;
for (String f : allOutputs) {
Assert.assertEquals(new Integer(count++), outputIndexes.get(f));
}
Assert.assertEquals(new Integer(allOutputs.size() + 1), outputIndexes.get(LengthEncodedWriter.CONTROL_FIELD_NAME));
List<InputOutputMap> inOutMaps = writer.getInputOutputMap();
assertEquals(1, inOutMaps.size());
assertEquals(inOutMaps.get(0).inputIndex, 2);
assertEquals(inOutMaps.get(0).outputIndex, allOutputs.indexOf("value") + 1);
Transform tr = trs.get(0);
assertEquals(tr.getReadIndexes().get(0), new TransformIndex(0, 1));
TransformIndex ti = new TransformIndex(2, allOutputs.indexOf(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0)) + 1);
assertEquals(tr.getWriteIndexes().get(0), ti);
}
/**
* Only one output of the transform is used
*/
public void testBuildTransforms_ChainedTransforms() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("datetime");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName(TransformType.DOMAIN_SPLIT.defaultOutputNames().get(0));
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig concatTc = new TransformConfig(TransformType.Names.CONCAT_NAME);
concatTc.setInputs(Arrays.asList("date", "time"));
concatTc.setOutputs(Arrays.asList("datetime"));
TransformConfig hrdTc = new TransformConfig(TransformType.Names.DOMAIN_SPLIT_NAME);
hrdTc.setInputs(Arrays.asList("domain"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(concatTc, hrdTc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
Set<String> inputFields = new HashSet<>(writer.inputFields());
assertEquals(4, inputFields.size());
assertTrue(inputFields.contains("date"));
assertTrue(inputFields.contains("time"));
assertTrue(inputFields.contains("value"));
assertTrue(inputFields.contains("domain"));
String[] header = { "date", "time", "domain", "value" };
writer.buildTransforms(header);
List<Transform> trs = writer.dateInputTransforms;
assertEquals(1, trs.size());
assertTrue(trs.get(0) instanceof Concat);
trs = writer.postDateTransforms;
assertEquals(1, trs.size());
assertTrue(trs.get(0) instanceof HighestRegisteredDomain);
Map<String, Integer> inputIndexes = writer.getInputFieldIndexes();
assertEquals(4, inputIndexes.size());
Assert.assertEquals(new Integer(0), inputIndexes.get("date"));
Assert.assertEquals(new Integer(1), inputIndexes.get("time"));
Assert.assertEquals(new Integer(2), inputIndexes.get("domain"));
Assert.assertEquals(new Integer(3), inputIndexes.get("value"));
}
/**
* The exclude transform returns fail fatal meaning the record shouldn't be
* processed.
*/
public void testApplyTransforms_transformReturnsExclude()
throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("datetime");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("metric");
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig excludeConfig = new TransformConfig(TransformType.EXCLUDE.prettyName());
excludeConfig.setInputs(Arrays.asList("metric"));
excludeConfig.setCondition(new Condition(Operator.MATCH, "metricA"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(excludeConfig));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
String[] header = { "datetime", "metric", "value" };
writer.buildTransforms(header);
// metricA is excluded
String[] input = { "1", "metricA", "0" };
String[] output = new String[3];
assertFalse(writer.applyTransformsAndWrite(input, output, 3));
verify(autodetectProcess, never()).writeRecord(output);
verify(dataCountsReporter, never()).reportRecordWritten(anyLong(), anyLong());
// reset the call counts etc.
Mockito.reset(dataCountsReporter);
// this is ok
input = new String[] { "2", "metricB", "0" };
String[] expectedOutput = { "2", null, null };
assertTrue(writer.applyTransformsAndWrite(input, output, 3));
verify(autodetectProcess, times(1)).writeRecord(expectedOutput);
verify(dataCountsReporter, times(1)).reportRecordWritten(3, 2000);
}
public void testBuildTransforms_DateTransformsAreSorted() throws IOException {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setTimeField("datetime");
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("type");
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig concatTc = new TransformConfig(TransformType.Names.CONCAT_NAME);
concatTc.setInputs(Arrays.asList("DATE", "time"));
concatTc.setOutputs(Arrays.asList("datetime"));
TransformConfig upperTc = new TransformConfig(TransformType.Names.UPPERCASE_NAME);
upperTc.setInputs(Arrays.asList("date"));
upperTc.setOutputs(Arrays.asList("DATE"));
TransformConfig splitTc = new TransformConfig(TransformType.Names.SPLIT_NAME);
splitTc.setInputs(Arrays.asList("date-somethingelse"));
splitTc.setOutputs(Arrays.asList("date"));
splitTc.setArguments(Arrays.asList("-"));
TransformConfigs transforms = new TransformConfigs(Arrays.asList(upperTc, concatTc, splitTc));
AbstractDataToProcessWriter writer =
new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, transforms, dataCountsReporter, jobLogger);
writer.writeHeader();
String[] header = { "date-somethingelse", "time", "type", "value" };
writer.buildTransforms(header);
// the date input transforms should be in this order
List<Transform> trs = writer.dateInputTransforms;
assertEquals(3, trs.size());
assertTrue(trs.get(0) instanceof RegexSplit);
assertTrue(trs.get(1) instanceof StringTransform);
assertTrue(trs.get(2) instanceof Concat);
} }
} }

View File

@ -5,7 +5,6 @@
*/ */
package org.elasticsearch.xpack.ml.job.process.autodetect.writer; package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
@ -14,9 +13,6 @@ import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.config.Detector; import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess; import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.junit.Before; import org.junit.Before;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock; import org.mockito.invocation.InvocationOnMock;
@ -43,11 +39,9 @@ import static org.mockito.Mockito.when;
public class CsvDataToProcessWriterTests extends ESTestCase { public class CsvDataToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess; private AutodetectProcess autodetectProcess;
private List<TransformConfig> transforms;
private DataDescription.Builder dataDescription; private DataDescription.Builder dataDescription;
private AnalysisConfig analysisConfig; private AnalysisConfig analysisConfig;
private DataCountsReporter dataCountsReporter; private DataCountsReporter dataCountsReporter;
private Logger jobLogger;
private List<String[]> writtenRecords; private List<String[]> writtenRecords;
@ -55,7 +49,6 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
public void setUpMocks() throws IOException { public void setUpMocks() throws IOException {
autodetectProcess = Mockito.mock(AutodetectProcess.class); autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class); dataCountsReporter = Mockito.mock(DataCountsReporter.class);
jobLogger = Mockito.mock(Logger.class);
writtenRecords = new ArrayList<>(); writtenRecords = new ArrayList<>();
doAnswer(new Answer<Void>() { doAnswer(new Answer<Void>() {
@ -68,8 +61,6 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
} }
}).when(autodetectProcess).writeRecord(any(String[].class)); }).when(autodetectProcess).writeRecord(any(String[].class));
transforms = new ArrayList<>();
dataDescription = new DataDescription.Builder(); dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(','); dataDescription.setFieldDelimiter(',');
dataDescription.setFormat(DataFormat.DELIMITED); dataDescription.setFormat(DataFormat.DELIMITED);
@ -79,8 +70,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
analysisConfig = new AnalysisConfig.Builder(Arrays.asList(detector)).build(); analysisConfig = new AnalysisConfig.Builder(Arrays.asList(detector)).build();
} }
public void testWrite_GivenTimeFormatIsEpochAndDataIsValid() public void testWrite_GivenTimeFormatIsEpochAndDataIsValid() throws IOException {
throws IOException {
StringBuilder input = new StringBuilder(); StringBuilder input = new StringBuilder();
input.append("time,metric,value\n"); input.append("time,metric,value\n");
input.append("1,foo,1.0\n"); input.append("1,foo,1.0\n");
@ -101,40 +91,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting(); verify(dataCountsReporter).finishReporting();
} }
public void testWrite_GivenTransformAndEmptyField() public void testWrite_GivenTimeFormatIsEpochAndTimestampsAreOutOfOrder() throws IOException {
throws IOException {
TransformConfig transform = new TransformConfig("uppercase");
transform.setInputs(Arrays.asList("value"));
transform.setOutputs(Arrays.asList("transformed"));
transforms.add(transform);
Detector existingDetector = analysisConfig.getDetectors().get(0);
Detector.Builder newDetector = new Detector.Builder(existingDetector);
newDetector.setFieldName("transformed");
analysisConfig.getDetectors().set(0, newDetector.build());
StringBuilder input = new StringBuilder();
input.append("time,metric,value\n");
input.append("1,,foo\n");
input.append("2,,\n");
InputStream inputStream = createInputStream(input.toString());
CsvDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[] { "time", "transformed", "." });
expectedRecords.add(new String[] { "1", "FOO", "" });
expectedRecords.add(new String[] { "2", "", "" });
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenTimeFormatIsEpochAndTimestampsAreOutOfOrder()
throws IOException {
StringBuilder input = new StringBuilder(); StringBuilder input = new StringBuilder();
input.append("time,metric,value\n"); input.append("time,metric,value\n");
input.append("3,foo,3.0\n"); input.append("3,foo,3.0\n");
@ -157,8 +114,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting(); verify(dataCountsReporter).finishReporting();
} }
public void testWrite_GivenTimeFormatIsEpochAndAllRecordsAreOutOfOrder() public void testWrite_GivenTimeFormatIsEpochAndAllRecordsAreOutOfOrder() throws IOException {
throws IOException {
StringBuilder input = new StringBuilder(); StringBuilder input = new StringBuilder();
input.append("time,metric,value\n"); input.append("time,metric,value\n");
input.append("1,foo,1.0\n"); input.append("1,foo,1.0\n");
@ -182,8 +138,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting(); verify(dataCountsReporter).finishReporting();
} }
public void testWrite_GivenTimeFormatIsEpochAndSomeTimestampsWithinLatencySomeOutOfOrder() public void testWrite_GivenTimeFormatIsEpochAndSomeTimestampsWithinLatencySomeOutOfOrder() throws IOException {
throws IOException {
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(new Detector.Builder("metric", "value").build())); AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(new Detector.Builder("metric", "value").build()));
builder.setLatency(2L); builder.setLatency(2L);
analysisConfig = builder.build(); analysisConfig = builder.build();
@ -216,8 +171,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting(); verify(dataCountsReporter).finishReporting();
} }
public void testWrite_NullByte() public void testWrite_NullByte() throws IOException {
throws IOException {
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(new Detector.Builder("metric", "value").build())); AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(new Detector.Builder("metric", "value").build()));
builder.setLatency(0L); builder.setLatency(0L);
analysisConfig = builder.build(); analysisConfig = builder.build();
@ -225,7 +179,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
StringBuilder input = new StringBuilder(); StringBuilder input = new StringBuilder();
input.append("metric,value,time\n"); input.append("metric,value,time\n");
input.append("foo,4.0,1\n"); input.append("foo,4.0,1\n");
input.append("\0"); // the csv reader skips over this line input.append("\0"); // the csv reader treats this as a line (even though it doesn't end with \n) and skips over it
input.append("foo,5.0,2\n"); input.append("foo,5.0,2\n");
input.append("foo,3.0,3\n"); input.append("foo,3.0,3\n");
input.append("bar,4.0,4\n"); input.append("bar,4.0,4\n");
@ -245,7 +199,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
expectedRecords.add(new String[] { "4", "4.0", "" }); expectedRecords.add(new String[] { "4", "4.0", "" });
assertWrittenRecordsEqualTo(expectedRecords); assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter, times(1)).reportMissingField(); verify(dataCountsReporter, times(2)).reportMissingField();
verify(dataCountsReporter, times(1)).reportRecordWritten(2, 1000); verify(dataCountsReporter, times(1)).reportRecordWritten(2, 1000);
verify(dataCountsReporter, times(1)).reportRecordWritten(2, 2000); verify(dataCountsReporter, times(1)).reportRecordWritten(2, 2000);
verify(dataCountsReporter, times(1)).reportRecordWritten(2, 3000); verify(dataCountsReporter, times(1)).reportRecordWritten(2, 3000);
@ -270,83 +224,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
assertEquals(0L, counts.getInputRecordCount()); assertEquals(0L, counts.getInputRecordCount());
} }
public void testWrite_GivenDateTimeFieldIsOutputOfTransform() public void testWrite_GivenMisplacedQuoteMakesRecordExtendOverTooManyLines() throws IOException {
throws IOException {
TransformConfig transform = new TransformConfig("concat");
transform.setInputs(Arrays.asList("date", "time-of-day"));
transform.setOutputs(Arrays.asList("datetime"));
transforms.add(transform);
dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(',');
dataDescription.setTimeField("datetime");
dataDescription.setFormat(DataFormat.DELIMITED);
dataDescription.setTimeFormat("yyyy-MM-ddHH:mm:ssX");
CsvDataToProcessWriter writer = createWriter();
writer.writeHeader();
StringBuilder input = new StringBuilder();
input.append("date,time-of-day,metric,value\n");
input.append("1970-01-01,00:00:01Z,foo,5.0\n");
input.append("1970-01-01,00:00:02Z,foo,6.0\n");
InputStream inputStream = createInputStream(input.toString());
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[] { "datetime", "value", "." });
expectedRecords.add(new String[] { "1", "5.0", "" });
expectedRecords.add(new String[] { "2", "6.0", "" });
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenChainedTransforms_SortsByDependencies()
throws IOException {
TransformConfig tc1 = new TransformConfig(TransformType.Names.UPPERCASE_NAME);
tc1.setInputs(Arrays.asList("dns"));
tc1.setOutputs(Arrays.asList("dns_upper"));
TransformConfig tc2 = new TransformConfig(TransformType.Names.CONCAT_NAME);
tc2.setInputs(Arrays.asList("dns1", "dns2"));
tc2.setArguments(Arrays.asList("."));
tc2.setOutputs(Arrays.asList("dns"));
transforms.add(tc1);
transforms.add(tc2);
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("dns_upper");
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
analysisConfig = builder.build();
StringBuilder input = new StringBuilder();
input.append("time,dns1,dns2,value\n");
input.append("1,www,foo.com,1.0\n");
input.append("2,www,bar.com,2.0\n");
InputStream inputStream = createInputStream(input.toString());
CsvDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[] { "time", "dns_upper", "value", "." });
expectedRecords.add(new String[] { "1", "WWW.FOO.COM", "1.0", "" });
expectedRecords.add(new String[] { "2", "WWW.BAR.COM", "2.0", "" });
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenMisplacedQuoteMakesRecordExtendOverTooManyLines()
throws IOException {
StringBuilder input = new StringBuilder(); StringBuilder input = new StringBuilder();
input.append("time,metric,value\n"); input.append("time,metric,value\n");
@ -372,7 +250,7 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
private CsvDataToProcessWriter createWriter() { private CsvDataToProcessWriter createWriter() {
return new CsvDataToProcessWriter(true, autodetectProcess, dataDescription.build(), analysisConfig, return new CsvDataToProcessWriter(true, autodetectProcess, dataDescription.build(), analysisConfig,
new TransformConfigs(transforms), dataCountsReporter, jobLogger); dataCountsReporter);
} }
private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) { private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) {

View File

@ -5,14 +5,12 @@
*/ */
package org.elasticsearch.xpack.ml.job.process.autodetect.writer; package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription; import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat; import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess; import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
@ -31,15 +29,8 @@ public class DataToProcessWriterFactoryTests extends ESTestCase {
assertTrue(createWriter(dataDescription.build()) instanceof CsvDataToProcessWriter); assertTrue(createWriter(dataDescription.build()) instanceof CsvDataToProcessWriter);
} }
public void testCreate_GivenDataFormatIsSingleLine() {
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataFormat.SINGLE_LINE);
assertTrue(createWriter(dataDescription.build()) instanceof SingleLineDataToProcessWriter);
}
private static DataToProcessWriter createWriter(DataDescription dataDescription) { private static DataToProcessWriter createWriter(DataDescription dataDescription) {
return DataToProcessWriterFactory.create(true, mock(AutodetectProcess.class), dataDescription, return DataToProcessWriterFactory.create(true, mock(AutodetectProcess.class), dataDescription,
mock(AnalysisConfig.class), mock(TransformConfigs.class), mock(DataCountsReporter.class), mock(Logger.class)); mock(AnalysisConfig.class), mock(DataCountsReporter.class));
} }
} }

View File

@ -1,141 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.junit.Before;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.verify;
public class DataWithTransformsToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess;
private DataCountsReporter dataCountsReporter;
private Logger logger;
private List<String[]> writtenRecords;
@Before
public void setUpMocks() throws IOException {
autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class);
logger = Mockito.mock(Logger.class);
writtenRecords = new ArrayList<>();
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
String[] record = (String[]) invocation.getArguments()[0];
String[] copy = Arrays.copyOf(record, record.length);
writtenRecords.add(copy);
return null;
}
}).when(autodetectProcess).writeRecord(any(String[].class));
}
public void testCsvWriteWithConcat() throws IOException {
StringBuilder input = new StringBuilder();
input.append("time,host,metric,value\n");
input.append("1,hostA,foo,3.0\n");
input.append("2,hostB,bar,2.0\n");
input.append("2,hostA,bar,2.0\n");
InputStream inputStream = createInputStream(input.toString());
AbstractDataToProcessWriter writer = createWriter(true);
writer.writeHeader();
writer.write(inputStream);
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "concat", "value", "."});
expectedRecords.add(new String[]{"1", "hostAfoo", "3.0", ""});
expectedRecords.add(new String[]{"2", "hostBbar", "2.0", ""});
expectedRecords.add(new String[]{"2", "hostAbar", "2.0", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testJsonWriteWithConcat() throws IOException {
StringBuilder input = new StringBuilder();
input.append("{\"time\" : 1, \"host\" : \"hostA\", \"metric\" : \"foo\", \"value\" : 3.0}\n");
input.append("{\"time\" : 2, \"host\" : \"hostB\", \"metric\" : \"bar\", \"value\" : 2.0}\n");
input.append("{\"time\" : 2, \"host\" : \"hostA\", \"metric\" : \"bar\", \"value\" : 2.0}\n");
InputStream inputStream = createInputStream(input.toString());
AbstractDataToProcessWriter writer = createWriter(false);
writer.writeHeader();
writer.write(inputStream);
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "concat", "value", "."});
expectedRecords.add(new String[]{"1", "hostAfoo", "3.0", ""});
expectedRecords.add(new String[]{"2", "hostBbar", "2.0", ""});
expectedRecords.add(new String[]{"2", "hostAbar", "2.0", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
private static InputStream createInputStream(String input) {
return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
}
private AbstractDataToProcessWriter createWriter(boolean doCsv) {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setFieldDelimiter(',');
dd.setFormat(doCsv ? DataFormat.DELIMITED : DataFormat.JSON);
dd.setTimeFormat(DataDescription.EPOCH);
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("concat");
AnalysisConfig ac = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
TransformConfig tc = new TransformConfig(TransformType.Names.CONCAT_NAME);
tc.setInputs(Arrays.asList("host", "metric"));
TransformConfigs tcs = new TransformConfigs(Arrays.asList(tc));
if (doCsv) {
return new CsvDataToProcessWriter(true, autodetectProcess, dd.build(), ac, tcs, dataCountsReporter, logger);
} else {
return new JsonDataToProcessWriter(true, autodetectProcess, dd.build(), ac, tcs, dataCountsReporter, logger);
}
}
private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) {
assertEquals(expectedRecords.size(), writtenRecords.size());
for (int i = 0; i < expectedRecords.size(); i++) {
assertArrayEquals(expectedRecords.get(i), writtenRecords.get(i));
}
}
}

View File

@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.elasticsearch.test.ESTestCase;
public class DateFormatDateTransformerTests extends ESTestCase {
public void testTransform_GivenValidTimestamp() throws CannotParseTimestampException {
DateFormatDateTransformer transformer = new DateFormatDateTransformer("yyyy-MM-dd HH:mm:ssXXX");
assertEquals(1388534400000L, transformer.transform("2014-01-01 00:00:00Z"));
}
public void testTransform_GivenInvalidTimestamp() throws CannotParseTimestampException {
DateFormatDateTransformer transformer = new DateFormatDateTransformer("yyyy-MM-dd HH:mm:ssXXX");
CannotParseTimestampException e = ESTestCase.expectThrows(CannotParseTimestampException.class,
() -> transformer.transform("invalid"));
assertEquals("Cannot parse date 'invalid' with format string 'yyyy-MM-dd HH:mm:ssXXX'", e.getMessage());
}
}

View File

@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.elasticsearch.test.ESTestCase;
public class DoubleDateTransformerTests extends ESTestCase {
public void testTransform_GivenTimestampIsNotMilliseconds() throws CannotParseTimestampException {
DoubleDateTransformer transformer = new DoubleDateTransformer(false);
assertEquals(1000000, transformer.transform("1000"));
}
public void testTransform_GivenTimestampIsMilliseconds() throws CannotParseTimestampException {
DoubleDateTransformer transformer = new DoubleDateTransformer(true);
assertEquals(1000, transformer.transform("1000"));
}
public void testTransform_GivenTimestampIsNotValidDouble() throws CannotParseTimestampException {
DoubleDateTransformer transformer = new DoubleDateTransformer(false);
CannotParseTimestampException e = ESTestCase.expectThrows(CannotParseTimestampException.class,
() -> transformer.transform("invalid"));
assertEquals("Cannot parse timestamp 'invalid' as epoch value", e.getMessage());
}
}

View File

@ -5,7 +5,6 @@
*/ */
package org.elasticsearch.xpack.ml.job.process.autodetect.writer; package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
@ -14,9 +13,6 @@ import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.config.Detector; import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess; import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter; import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.junit.Before; import org.junit.Before;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock; import org.mockito.invocation.InvocationOnMock;
@ -41,9 +37,7 @@ public class JsonDataToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess; private AutodetectProcess autodetectProcess;
private DataCountsReporter dataCountsReporter; private DataCountsReporter dataCountsReporter;
private Logger logger;
private List<TransformConfig> transforms;
private DataDescription.Builder dataDescription; private DataDescription.Builder dataDescription;
private AnalysisConfig analysisConfig; private AnalysisConfig analysisConfig;
@ -53,7 +47,6 @@ public class JsonDataToProcessWriterTests extends ESTestCase {
public void setUpMocks() throws IOException { public void setUpMocks() throws IOException {
autodetectProcess = Mockito.mock(AutodetectProcess.class); autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class); dataCountsReporter = Mockito.mock(DataCountsReporter.class);
logger = Mockito.mock(Logger.class);
writtenRecords = new ArrayList<>(); writtenRecords = new ArrayList<>();
doAnswer(new Answer<Void>() { doAnswer(new Answer<Void>() {
@ -66,7 +59,6 @@ public class JsonDataToProcessWriterTests extends ESTestCase {
} }
}).when(autodetectProcess).writeRecord(any(String[].class)); }).when(autodetectProcess).writeRecord(any(String[].class));
transforms = new ArrayList<>();
dataDescription = new DataDescription.Builder(); dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataFormat.JSON); dataDescription.setFormat(DataFormat.JSON);
@ -284,85 +276,13 @@ public class JsonDataToProcessWriterTests extends ESTestCase {
verify(dataCountsReporter).finishReporting(); verify(dataCountsReporter).finishReporting();
} }
public void testWrite_GivenDateTimeFieldIsOutputOfTransform() throws Exception {
TransformConfig transform = new TransformConfig("concat");
transform.setInputs(Arrays.asList("date", "time-of-day"));
transform.setOutputs(Arrays.asList("datetime"));
transforms.add(transform);
dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(',');
dataDescription.setTimeField("datetime");
dataDescription.setFormat(DataFormat.DELIMITED);
dataDescription.setTimeFormat("yyyy-MM-ddHH:mm:ssX");
JsonDataToProcessWriter writer = createWriter();
writer.writeHeader();
StringBuilder input = new StringBuilder();
input.append("{\"date\":\"1970-01-01\", \"time-of-day\":\"00:00:01Z\", \"value\":\"5.0\"}");
input.append("{\"date\":\"1970-01-01\", \"time-of-day\":\"00:00:02Z\", \"value\":\"6.0\"}");
InputStream inputStream = createInputStream(input.toString());
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"datetime", "value", "."});
expectedRecords.add(new String[]{"1", "5.0", ""});
expectedRecords.add(new String[]{"2", "6.0", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
public void testWrite_GivenChainedTransforms_SortsByDependencies() throws Exception {
TransformConfig tc1 = new TransformConfig(TransformType.Names.UPPERCASE_NAME);
tc1.setInputs(Arrays.asList("dns"));
tc1.setOutputs(Arrays.asList("dns_upper"));
TransformConfig tc2 = new TransformConfig(TransformType.Names.CONCAT_NAME);
tc2.setInputs(Arrays.asList("dns1", "dns2"));
tc2.setArguments(Arrays.asList("."));
tc2.setOutputs(Arrays.asList("dns"));
transforms.add(tc1);
transforms.add(tc2);
Detector.Builder detector = new Detector.Builder("metric", "value");
detector.setByFieldName("dns_upper");
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
analysisConfig = builder.build();
StringBuilder input = new StringBuilder();
input.append("{\"time\":\"1\", \"dns1\":\"www\", \"dns2\":\"foo.com\", \"value\":\"1.0\"}");
input.append("{\"time\":\"2\", \"dns1\":\"www\", \"dns2\":\"bar.com\", \"value\":\"2.0\"}");
InputStream inputStream = createInputStream(input.toString());
JsonDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "dns_upper", "value", "."});
expectedRecords.add(new String[]{"1", "WWW.FOO.COM", "1.0", ""});
expectedRecords.add(new String[]{"2", "WWW.BAR.COM", "2.0", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
}
private static InputStream createInputStream(String input) { private static InputStream createInputStream(String input) {
return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)); return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
} }
private JsonDataToProcessWriter createWriter() { private JsonDataToProcessWriter createWriter() {
return new JsonDataToProcessWriter(true, autodetectProcess, dataDescription.build(), analysisConfig, return new JsonDataToProcessWriter(true, autodetectProcess, dataDescription.build(), analysisConfig,
new TransformConfigs(transforms), dataCountsReporter, logger); dataCountsReporter);
} }
private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) { private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) {

View File

@ -1,181 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.autodetect.writer;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.ml.job.config.DataDescription;
import org.elasticsearch.xpack.ml.job.config.DataDescription.DataFormat;
import org.elasticsearch.xpack.ml.job.config.Detector;
import org.elasticsearch.xpack.ml.job.process.DataCountsReporter;
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfigs;
import org.junit.Before;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
public class SingleLineDataToProcessWriterTests extends ESTestCase {
private AutodetectProcess autodetectProcess;
private DataDescription.Builder dataDescription;
private AnalysisConfig analysisConfig;
private List<TransformConfig> transformConfigs;
private DataCountsReporter dataCountsReporter;
private List<String[]> writtenRecords;
@Before
public void setUpMocks() throws IOException {
autodetectProcess = Mockito.mock(AutodetectProcess.class);
dataCountsReporter = Mockito.mock(DataCountsReporter.class);
writtenRecords = new ArrayList<>();
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
String[] record = (String[]) invocation.getArguments()[0];
String[] copy = Arrays.copyOf(record, record.length);
writtenRecords.add(copy);
return null;
}
}).when(autodetectProcess).writeRecord(any(String[].class));
dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(',');
dataDescription.setFormat(DataFormat.SINGLE_LINE);
dataDescription.setTimeFormat("yyyy-MM-dd HH:mm:ssX");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setByFieldName("message");
analysisConfig = new AnalysisConfig.Builder(Arrays.asList(detector.build())).build();
transformConfigs = new ArrayList<>();
}
public void testWrite_GivenDataIsValid() throws Exception {
TransformConfig transformConfig = new TransformConfig("extract");
transformConfig.setInputs(Arrays.asList("raw"));
transformConfig.setOutputs(Arrays.asList("time", "message"));
transformConfig.setArguments(Arrays.asList("(.{20}) (.*)"));
transformConfigs.add(transformConfig);
StringBuilder input = new StringBuilder();
input.append("2015-04-29 10:00:00Z This is message 1\n");
input.append("2015-04-29 11:00:00Z This is message 2\r");
input.append("2015-04-29 12:00:00Z This is message 3\r\n");
InputStream inputStream = createInputStream(input.toString());
SingleLineDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).getLatestRecordTime();
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
verify(dataCountsReporter, times(1)).setAnalysedFieldsPerRecord(1);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430301600000L);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430305200000L);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430308800000L);
verify(dataCountsReporter, times(1)).incrementalStats();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "message", "."});
expectedRecords.add(new String[]{"1430301600", "This is message 1", ""});
expectedRecords.add(new String[]{"1430305200", "This is message 2", ""});
expectedRecords.add(new String[]{"1430308800", "This is message 3", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
verifyNoMoreInteractions(dataCountsReporter);
}
public void testWrite_GivenDataContainsInvalidRecords() throws Exception {
TransformConfig transformConfig = new TransformConfig("extract");
transformConfig.setInputs(Arrays.asList("raw"));
transformConfig.setOutputs(Arrays.asList("time", "message"));
transformConfig.setArguments(Arrays.asList("(.{20}) (.*)"));
transformConfigs.add(transformConfig);
StringBuilder input = new StringBuilder();
input.append("2015-04-29 10:00:00Z This is message 1\n");
input.append("No transform\n");
input.append("Transform can apply but no date to be parsed\n");
input.append("\n");
input.append("2015-04-29 12:00:00Z This is message 3\n");
InputStream inputStream = createInputStream(input.toString());
SingleLineDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).getLatestRecordTime();
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
verify(dataCountsReporter, times(1)).setAnalysedFieldsPerRecord(1);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430301600000L);
verify(dataCountsReporter, times(1)).reportRecordWritten(1, 1430308800000L);
verify(dataCountsReporter, times(3)).reportDateParseError(1);
verify(dataCountsReporter, times(1)).incrementalStats();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "message", "."});
expectedRecords.add(new String[]{"1430301600", "This is message 1", ""});
expectedRecords.add(new String[]{"1430308800", "This is message 3", ""});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).finishReporting();
verifyNoMoreInteractions(dataCountsReporter);
}
public void testWrite_GivenNoTransforms() throws Exception {
StringBuilder input = new StringBuilder();
input.append("2015-04-29 10:00:00Z This is message 1\n");
InputStream inputStream = createInputStream(input.toString());
SingleLineDataToProcessWriter writer = createWriter();
writer.writeHeader();
writer.write(inputStream);
verify(dataCountsReporter, times(1)).startNewIncrementalCount();
verify(dataCountsReporter, times(1)).setAnalysedFieldsPerRecord(1);
verify(dataCountsReporter, times(1)).reportDateParseError(1);
verify(dataCountsReporter, times(1)).incrementalStats();
List<String[]> expectedRecords = new ArrayList<>();
// The final field is the control field
expectedRecords.add(new String[]{"time", "message", "."});
assertWrittenRecordsEqualTo(expectedRecords);
verify(dataCountsReporter).getLatestRecordTime();
verify(dataCountsReporter).finishReporting();
verifyNoMoreInteractions(dataCountsReporter);
}
private static InputStream createInputStream(String input) {
return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
}
private SingleLineDataToProcessWriter createWriter() {
return new SingleLineDataToProcessWriter(true, autodetectProcess, dataDescription.build(),
analysisConfig, new TransformConfigs(transformConfigs), dataCountsReporter, Mockito.mock(Logger.class));
}
private void assertWrittenRecordsEqualTo(List<String[]> expectedRecords) {
assertEquals(expectedRecords.size(), writtenRecords.size());
for (int i = 0; i < expectedRecords.size(); i++) {
assertArrayEquals(expectedRecords.get(i), writtenRecords.get(i));
}
}
}

View File

@ -1,97 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class ConcatTests extends ESTestCase {
public void testMultipleInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1), new TransformIndex(0, 2), new TransformIndex(0, 4));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
Concat concat = new Concat(readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {};
String[] output = new String[2];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, concat.transform(readWriteArea));
assertNull(output[0]);
assertEquals("bce", output[1]);
}
public void testWithDelimiter() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1), new TransformIndex(0, 2), new TransformIndex(0, 4));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
Concat concat = new Concat("--", readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {};
String[] output = new String[2];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, concat.transform(readWriteArea));
assertNull(output[0]);
assertEquals("b--c--e", output[1]);
}
public void testZeroInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray();
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
Concat concat = new Concat(readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, concat.transform(readWriteArea));
assertEquals("", output[0]);
}
public void testNoOutput() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1), new TransformIndex(0, 2), new TransformIndex(0, 3));
List<TransformIndex> writeIndexes = createIndexArray();
Concat concat = new Concat(readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.FAIL, concat.transform(readWriteArea));
assertNull(output[0]);
}
public void testScratchAreaInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1), new TransformIndex(0, 2),
new TransformIndex(1, 0), new TransformIndex(1, 2));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(1, 4));
Concat concat = new Concat(readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"a", "b", "c", "d", "e"};
String[] scratch = {"a", "b", "c", "d", null};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, concat.transform(readWriteArea));
assertEquals("bcac", scratch[4]);
}
}

View File

@ -1,272 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
public class DependencySorterTests extends ESTestCase {
public void testFindDependencies_GivenNoDependencies() {
List<TransformConfig> transforms = new ArrayList<>();
List<TransformConfig> deps = DependencySorter.findDependencies("metricField", transforms);
assertEquals(0, deps.size());
}
public void testFindDependencies_Given1Dependency() {
List<TransformConfig> transforms = new ArrayList<>();
List<String> inputs = Arrays.asList("ina", "inb");
List<String> outputs = Arrays.asList("ab");
TransformConfig concat = createConcatTransform(inputs, outputs);
transforms.add(concat);
List<String> inputs2 = Arrays.asList("inc", "ind");
List<String> outputs2 = Arrays.asList("cd");
TransformConfig concat2 = createConcatTransform(inputs2, outputs2);
transforms.add(concat2);
List<TransformConfig> deps = DependencySorter.findDependencies("cd", transforms);
assertEquals(1, deps.size());
assertEquals(deps.get(0), concat2);
}
public void testFindDependencies_Given2Dependencies() {
List<TransformConfig> transforms = new ArrayList<>();
List<String> inputs = Arrays.asList("ina", "inb");
List<String> outputs = Arrays.asList("ab");
TransformConfig concat = createConcatTransform(inputs, outputs);
transforms.add(concat);
List<String> inputs2 = Arrays.asList("inc", "ind");
List<String> outputs2 = Arrays.asList("cd");
TransformConfig concat2 = createConcatTransform(inputs2, outputs2);
transforms.add(concat2);
List<TransformConfig> deps = DependencySorter.findDependencies(Arrays.asList("cd", "ab"),
transforms);
assertEquals(2, deps.size());
assertTrue(deps.contains(concat));
assertTrue(deps.contains(concat2));
}
public void testFindDependencies_GivenChainOfDependencies() {
List<TransformConfig> transforms = new ArrayList<>();
List<String> inputs = Arrays.asList("ina", "inb");
List<String> outputs = Arrays.asList("ab");
TransformConfig concat = createConcatTransform(inputs, outputs);
transforms.add(concat);
List<String> inputs2 = Arrays.asList("ab", "inc");
List<String> outputs2 = Arrays.asList("abc");
TransformConfig dependentConcat = createConcatTransform(inputs2, outputs2);
transforms.add(dependentConcat);
List<TransformConfig> deps = DependencySorter.findDependencies("abc",
transforms);
assertEquals(2, deps.size());
assertEquals(concat, deps.get(0));
assertEquals(dependentConcat, deps.get(1));
}
/**
* 2 separate inputs with chain of dependencies one of which is shared
*/
public void testFindDependencies_Given2ChainsAndSharedDependencys() {
List<TransformConfig> transforms = new ArrayList<>();
List<String> inputs2 = Arrays.asList("ab", "inc");
List<String> outputs2 = Arrays.asList("abc");
TransformConfig dependentConcat1 = createConcatTransform(inputs2, outputs2);
transforms.add(dependentConcat1);
List<String> inputs3 = Arrays.asList("ab", "ind");
List<String> outputs3 = Arrays.asList("abd");
TransformConfig dependentConcat2 = createConcatTransform(inputs3, outputs3);
transforms.add(dependentConcat2);
List<String> inputs = Arrays.asList("ina", "inb");
List<String> outputs = Arrays.asList("ab");
TransformConfig concat = createConcatTransform(inputs, outputs);
transforms.add(concat);
List<TransformConfig> deps = DependencySorter.findDependencies(Arrays.asList("abc", "abd"),
transforms);
assertEquals(3, deps.size());
assertEquals(concat, deps.get(0));
assertEquals(dependentConcat1, deps.get(1));
assertEquals(dependentConcat2, deps.get(2));
}
public void testSortByDependency_NoDependencies() {
List<TransformConfig> transforms = new ArrayList<>();
TransformConfig concat = createConcatTransform(Arrays.asList("ina", "inb"),
Arrays.asList("ab"));
transforms.add(concat);
TransformConfig hrd1 = createHrdTransform(Arrays.asList("dns"),
Arrays.asList("subdomain", "hrd"));
transforms.add(hrd1);
TransformConfig hrd2 = createHrdTransform(Arrays.asList("dns2"),
Arrays.asList("subdomain"));
transforms.add(hrd2);
List<TransformConfig> orderedDeps = DependencySorter.sortByDependency(transforms);
assertEquals(transforms.size(), orderedDeps.size());
}
public void testSortByDependency_SingleChain() {
List<TransformConfig> transforms = new ArrayList<>();
// Chain of 3 dependencies
TransformConfig chain1Hrd = createHrdTransform(Arrays.asList("ab"),
Arrays.asList("subdomain", "hrd"));
transforms.add(chain1Hrd);
TransformConfig chain1Concat = createConcatTransform(Arrays.asList("ina", "inb"),
Arrays.asList("ab"));
transforms.add(chain1Concat);
TransformConfig chain1Concat2 = createConcatTransform(Arrays.asList("subdomain", "port"),
Arrays.asList());
transforms.add(chain1Concat2);
List<TransformConfig> orderedDeps = DependencySorter.sortByDependency(transforms);
assertEquals(transforms.size(), orderedDeps.size());
int chain1ConcatIndex = orderedDeps.indexOf(chain1Concat);
assertTrue(chain1ConcatIndex == 0);
int chain1HrdIndex = orderedDeps.indexOf(chain1Hrd);
assertTrue(chain1HrdIndex == 1);
int chain1Concat2Index = orderedDeps.indexOf(chain1Concat2);
assertTrue(chain1Concat2Index == 2);
assertTrue(chain1ConcatIndex < chain1HrdIndex);
assertTrue(chain1HrdIndex < chain1Concat2Index);
}
public void testSortByDependency_3ChainsInOrder() {
List<TransformConfig> transforms = new ArrayList<>();
// Chain of 1
TransformConfig noChainHrd = createHrdTransform(Arrays.asList("dns"),
Arrays.asList("subdomain"));
transforms.add(noChainHrd);
// Chain of 2 dependencies
TransformConfig chain1Concat = createConcatTransform(Arrays.asList("ina", "inb"),
Arrays.asList("ab"));
transforms.add(chain1Concat);
TransformConfig chain1Hrd = createHrdTransform(Arrays.asList("ab"),
Arrays.asList("subdomain", "hrd"));
transforms.add(chain1Hrd);
// Chain of 2 dependencies
TransformConfig chain2Concat2 = createConcatTransform(Arrays.asList("cd", "ine"),
Arrays.asList("cde"));
transforms.add(chain2Concat2);
TransformConfig chain2Concat = createConcatTransform(Arrays.asList("inc", "ind"),
Arrays.asList("cd"));
transforms.add(chain2Concat);
List<TransformConfig> orderedDeps = DependencySorter.sortByDependency(transforms);
assertEquals(transforms.size(), orderedDeps.size());
int chain1ConcatIndex = orderedDeps.indexOf(chain1Concat);
assertTrue(chain1ConcatIndex >= 0);
int chain1HrdIndex = orderedDeps.indexOf(chain1Hrd);
assertTrue(chain1HrdIndex >= 1);
assertTrue(chain1ConcatIndex < chain1HrdIndex);
int chain2ConcatIndex = orderedDeps.indexOf(chain2Concat);
assertTrue(chain2ConcatIndex >= 0);
int chain2Concat2Index = orderedDeps.indexOf(chain2Concat2);
assertTrue(chain2Concat2Index >= 1);
assertTrue(chain2ConcatIndex < chain2Concat2Index);
}
public void testSortByDependency_3ChainsOutOfOrder() {
List<TransformConfig> transforms = new ArrayList<>();
TransformConfig chain1Hrd = createHrdTransform(Arrays.asList("ab"),
Arrays.asList("subdomain", "hrd"));
transforms.add(chain1Hrd);
TransformConfig chain2Concat2 = createConcatTransform(Arrays.asList("cd", "ine"),
Arrays.asList("cde"));
transforms.add(chain2Concat2);
TransformConfig chain1Concat = createConcatTransform(Arrays.asList("ina", "inb"),
Arrays.asList("ab"));
transforms.add(chain1Concat);
TransformConfig noChainHrd = createHrdTransform(Arrays.asList("dns"),
Arrays.asList("subdomain"));
transforms.add(noChainHrd);
TransformConfig chain2Concat = createConcatTransform(Arrays.asList("inc", "ind"),
Arrays.asList("cd"));
transforms.add(chain2Concat);
List<TransformConfig> orderedDeps = DependencySorter.sortByDependency(transforms);
assertEquals(transforms.size(), orderedDeps.size());
int chain1ConcatIndex = orderedDeps.indexOf(chain1Concat);
assertTrue(chain1ConcatIndex >= 0);
int chain1HrdIndex = orderedDeps.indexOf(chain1Hrd);
assertTrue(chain1HrdIndex >= 0);
assertTrue(chain1ConcatIndex < chain1HrdIndex);
int chain2ConcatIndex = orderedDeps.indexOf(chain2Concat);
assertTrue(chain2ConcatIndex >= 0);
int chain2Concat2Index = orderedDeps.indexOf(chain2Concat2);
assertTrue(chain2Concat2Index >= 0);
assertTrue(chain2ConcatIndex < chain2Concat2Index);
}
private TransformConfig createConcatTransform(List<String> inputs, List<String> outputs) {
TransformConfig concat = new TransformConfig(TransformType.CONCAT.prettyName());
concat.setInputs(inputs);
concat.setOutputs(outputs);
return concat;
}
private TransformConfig createHrdTransform(List<String> inputs, List<String> outputs) {
TransformConfig concat = new TransformConfig(TransformType.DOMAIN_SPLIT.prettyName());
concat.setInputs(inputs);
concat.setOutputs(outputs);
return concat;
}
}

View File

@ -1,115 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class ExcludeFilterNumericTests extends ESTestCase {
public void testEq()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.EQ, "5.0");
String[] input = {"5"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "5.10000";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testGT()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.GT, "10.000");
String[] input = {"100"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "1.0";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testGTE()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.GTE, "10.000");
String[] input = {"100"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "10";
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "9.5";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testLT()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.LT, "2000");
String[] input = {"100.2"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "2005.0000";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testLTE()
throws TransformException {
ExcludeFilterNumeric transform = createTransform(Operator.LTE, "2000");
String[] input = {"100.2"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "2000.0000";
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
input[0] = "9000.5";
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
private ExcludeFilterNumeric createTransform(Operator op, String filterValue) {
Condition condition = new Condition(op, filterValue);
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
return new ExcludeFilterNumeric(condition, readIndexes, writeIndexes, mock(Logger.class));
}
}

View File

@ -1,116 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class ExcludeFilterTests extends ESTestCase {
public void testTransform_matches() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "cat");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"cat"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
}
public void testTransform_noMatches() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "boat");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"cat"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testTransform_matchesRegex() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "metric[0-9]+");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"metric01"};
String[] scratch = {};
String[] output = new String[3];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
readWriteArea[0] = new String[]{"metric02-A"};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
}
public void testTransform_matchesMultipleInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0),
new TransformIndex(0, 1),
new TransformIndex(0, 2));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "boat");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"cat", "hat", "boat"};
String[] scratch = {};
String[] output = {};
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.EXCLUDE, transform.transform(readWriteArea));
}
public void testTransform() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray();
Condition cond = new Condition(Operator.MATCH, "^(?!latency\\.total).*$");
ExcludeFilterRegex transform = new ExcludeFilterRegex(cond, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"utilization.total"};
String[] scratch = {};
String[] output = new String[3];
String[][] readWriteArea = {input, scratch, output};
TransformResult tr = transform.transform(readWriteArea);
assertEquals(TransformResult.EXCLUDE, tr);
readWriteArea[0] = new String[]{"latency.total"};
tr = transform.transform(readWriteArea);
assertEquals(TransformResult.OK, tr);
}
}

View File

@ -1,448 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import org.elasticsearch.test.ESTestCase;
// TODO Reimplement
public class HighestRegisteredDomainTests extends ESTestCase {
// private void checkHighestRegisteredDomain(String fullName, String
// registeredNameExpected)
// {
// InternetDomainName effectiveTLD = InternetDomainName.from(fullName);
//
// effectiveTLD = effectiveTLD.topPrivateDomain();
// assertTrue(effectiveTLD.isTopPrivateDomain());
// String registeredName = effectiveTLD.toString();
//
// assertEquals(registeredNameExpected, registeredName);
// }
//
// private void checkIsPublicSuffix(String suffix)
// {
// InternetDomainName effectiveTLD = InternetDomainName.from(suffix);
// assertTrue(effectiveTLD.isPublicSuffix());
// }
//
// private void testDomainSplit(String subDomainExpected,
// String domainExpected, String hostName)
// {
// HighestRegisteredDomain.DomainSplit split =
// HighestRegisteredDomain.lookup(hostName);
//
// assertEquals(subDomainExpected, split.getSubDomain());
// assertEquals(domainExpected, split.getHighestRegisteredDomain());
// }
//
// @Test
// public void testDomainSplit()
// {
// testDomainSplit("", "", "");
// testDomainSplit("", "", ".");
//
// // Test cases from
// https://github.com/john-kurkowski/tldextract/tree/master/tldextract/tests
// testDomainSplit("www", "google.com", "www.google.com");
// testDomainSplit("www.maps", "google.co.uk", "www.maps.google.co.uk");
// testDomainSplit("www", "theregister.co.uk", "www.theregister.co.uk");
// testDomainSplit("", "gmail.com", "gmail.com");
// testDomainSplit("media.forums", "theregister.co.uk",
// "media.forums.theregister.co.uk");
// testDomainSplit("www", "www.com", "www.www.com");
// testDomainSplit("", "www.com", "www.com");
// testDomainSplit("", "internalunlikelyhostname",
// "internalunlikelyhostname");
// testDomainSplit("internalunlikelyhostname", "bizarre",
// "internalunlikelyhostname.bizarre");
// testDomainSplit("", "internalunlikelyhostname.info",
// "internalunlikelyhostname.info"); // .info is a valid TLD
// testDomainSplit("internalunlikelyhostname", "information",
// "internalunlikelyhostname.information");
// testDomainSplit("", "216.22.0.192", "216.22.0.192");
// testDomainSplit("", "::1", "::1");
// testDomainSplit("", "FE80:0000:0000:0000:0202:B3FF:FE1E:8329",
// "FE80:0000:0000:0000:0202:B3FF:FE1E:8329");
// testDomainSplit("216.22", "project.coop", "216.22.project.coop");
// testDomainSplit("www", "xn--h1alffa9f.xn--p1ai",
// "www.xn--h1alffa9f.xn--p1ai");
// testDomainSplit("", "", "");
// testDomainSplit("www", "parliament.uk", "www.parliament.uk");
// testDomainSplit("www", "parliament.co.uk", "www.parliament.co.uk");
// testDomainSplit("www.a", "cgs.act.edu.au", "www.a.cgs.act.edu.au");
// testDomainSplit("www", "google.com.au", "www.google.com.au");
// testDomainSplit("www", "metp.net.cn", "www.metp.net.cn");
// testDomainSplit("www", "waiterrant.blogspot.com",
// "www.waiterrant.blogspot.com");
//
// testDomainSplit("", "kittens.blogspot.co.uk", "kittens.blogspot.co.uk");
// testDomainSplit("", "ml.s3.amazonaws.com",
// "ml.s3.amazonaws.com");
// testDomainSplit("daves_bucket", "ml.s3.amazonaws.com",
// "daves_bucket.ml.s3.amazonaws.com");
//
// testDomainSplit("example", "example", "example.example");
// testDomainSplit("b.example", "example", "b.example.example");
// testDomainSplit("a.b.example", "example", "a.b.example.example");
//
// testDomainSplit("example", "local", "example.local");
// testDomainSplit("b.example", "local", "b.example.local");
// testDomainSplit("a.b.example", "local", "a.b.example.local");
//
// testDomainSplit("r192494180984795-1-1041782-channel-live.ums",
// "ustream.tv", "r192494180984795-1-1041782-channel-live.ums.ustream.tv");
//
// testDomainSplit("192.168.62.9", "prelert.com",
// "192.168.62.9.prelert.com");
//
// // These are not a valid DNS names
// testDomainSplit("kerberos.http.192.168", "62.222",
// "kerberos.http.192.168.62.222");
// testDomainSplit("192.168", "62.9\143\127", "192.168.62.9\143\127");
// }
//
// @Test
// public void testTooLongDnsName()
// {
// // no part of the DNS name can be longer than 63 octets
// String dnsLongerThan254Chars =
// "davesbucketdavesbucketdavesbucketdavesbucketdavesbucketdaves.bucketdavesbucketdavesbucketdavesbucketdavesbucketdaves.bucketdav
// esbucketdavesbucketdavesbucketdavesbucket.davesbucketdavesbucketdavesbucketdavesbucket.davesbucketdavesbucket.ml.s3.a
// mazonaws.com";
// String hrd = "ml.s3.amazonaws.com";
// testDomainSplit(dnsLongerThan254Chars.substring(0,
// dnsLongerThan254Chars.length() - (hrd.length() + 1)),
// hrd, dnsLongerThan254Chars);
//
// // this one needs sanitising
// dnsLongerThan254Chars =
// "_davesbucketdavesbucketdavesbucketdavesbucket-davesbucketdaves.-bucketdavesbucketdavesbucketdavesbucketdavesbucketdaves.bucket
// davesbucketdavesbucketdavesbucketdavesbucket.davesbucketdavesbucketdavesbucketdavesbucket.davesbucketdavesbucket.ml.s3.ama
// zonaws.com";
// hrd = "ml.s3.amazonaws.com";
// testDomainSplit(dnsLongerThan254Chars.substring(0,
// dnsLongerThan254Chars.length() - (hrd.length() + 1)),
// hrd, dnsLongerThan254Chars);
//
// String bad =
// "0u1aof\209\1945\188hI4\236\197\205J\244\188\247\223\190F\2135\229gVE7\230i\215\231\205Qzay\225UJ\192pw\216\231\204\194\216\
// 193QV4g\196\207Whpvx.fVxl\194BjA\245kbYk\211XG\235\198\218B\252\219\225S\197\217I\2538n\229\244\213\252\215Ly\226NW\242\248\
// 244Q\220\245\221c\207\189\205Hxq5\224\240.\189Jt4\243\245t\244\198\199p\210\1987r\2050L\239sR0M\190w\238\223\234L\226\2242D\233
// \210\206\195h\199\206tA\214J\192C\224\191b\188\201\251\198M\244h\206.\198\242l\2114\191JBU\198h\207\215w\243\228R\1924\242\208\19
// 1CV\208p\197gDW\198P\217\195X\191Fp\196\197J\193\245\2070\196zH\197\243\253g\239.adz.beacon.base.net";
// hrd = "base.net";
// testDomainSplit(bad.substring(0, bad.length() - (hrd.length() +1)), hrd,
// bad);
// }
//
// @Test
// public void testDomainSplit_SanitisedDomains()
// {
// testDomainSplit("_example", "local", "_example.local");
// testDomainSplit("www._maps", "google.co.uk", "www._maps.google.co.uk");
// testDomainSplit("-forum", "theregister.co.uk",
// "-forum.theregister.co.uk");
//
// testDomainSplit("www._yourmp", "parliament.uk",
// "www._yourmp.parliament.uk");
// testDomainSplit("www.-a", "cgs.act.edu.au", "www.-a.cgs.act.edu.au");
//
// testDomainSplit("", "-foundation.org", "-foundation.org");
// testDomainSplit("www", "-foundation.org", "www.-foundation.org");
// testDomainSplit("", "_nfsv4idmapdomain", "_nfsv4idmapdomain");
// testDomainSplit("_nfsv4idmapdomain", "prelert.com",
// "_nfsv4idmapdomain.prelert.com");
//
// testDomainSplit("lb._dns-sd._udp.0.123.168", "192.in-addr.arpa",
// "lb._dns-sd._udp.0.123.168.192.in-addr.arpa");
// testDomainSplit("_kerberos._http.192.168", "62.222",
// "_kerberos._http.192.168.62.222");
// }
//
// @Test
// public void testHighestRegisteredDomainCases()
// {
// // Any copyright is dedicated to the Public Domain.
// // http://creativecommons.org/publicdomain/zero/1.0/
//
// // Domain parts starting with _ aren't valid
// assertFalse(InternetDomainName.isValid("_nfsv4idmapdomain.prelert.com"));
//
// // Mixed case.
// checkIsPublicSuffix("COM");
// checkHighestRegisteredDomain("example.COM", "example.com");
// checkHighestRegisteredDomain("WwW.example.COM", "example.com");
//
// // These pass steve's test but fail here. Example isn't a valid
// (declared, not active) TLD
//// checkIsPublicSuffix("example");
//// checkTopLevelDomain("example.example", "example.example");
//// checkTopLevelDomain("b.example.example", "example.example");
//// checkTopLevelDomain("a.b.example.example", "example.example");
//
// // Listed, but non-Internet, TLD.
// // checkIsPublicSuffix("local"); // These pass Steve's tests but not
// public suffix here
// //checkIsPublicSuffix("example.local", "");
// //checkIsPublicSuffix("b.example.local", "");
// //checkIsPublicSuffix("a.b.example.local", "");
//
// // TLD with only 1 rule.
// checkIsPublicSuffix("biz");
// checkHighestRegisteredDomain("domain.biz", "domain.biz");
// checkHighestRegisteredDomain("b.domain.biz", "domain.biz");
// checkHighestRegisteredDomain("a.b.domain.biz", "domain.biz");
// // TLD with some 2-level rules.
// // checkPublicSuffix("com", "");
// checkHighestRegisteredDomain("example.com", "example.com");
// checkHighestRegisteredDomain("b.example.com", "example.com");
// checkHighestRegisteredDomain("a.b.example.com", "example.com");
// checkIsPublicSuffix("uk.com");
// checkHighestRegisteredDomain("example.uk.com", "example.uk.com");
// checkHighestRegisteredDomain("b.example.uk.com", "example.uk.com");
// checkHighestRegisteredDomain("a.b.example.uk.com", "example.uk.com");
// checkHighestRegisteredDomain("test.ac", "test.ac");
// // TLD with only 1 (wildcard) rule.
//
// // cy passes Steve's test but is not considered a valid TLD here
// // gov.cy is.
// checkIsPublicSuffix("gov.cy");
// checkHighestRegisteredDomain("c.gov.cy", "c.gov.cy"); // changed to pass
// test - inserted .gov, .net
// checkHighestRegisteredDomain("b.c.net.cy", "c.net.cy");
// checkHighestRegisteredDomain("a.b.c.net.cy", "c.net.cy");
//
// // More complex TLD.
// checkIsPublicSuffix("jp"); // jp is valid because you can have any 2nd
// level domain
// checkIsPublicSuffix("ac.jp");
// checkIsPublicSuffix("kyoto.jp");
// checkIsPublicSuffix("c.kobe.jp");
// checkIsPublicSuffix("ide.kyoto.jp");
// checkHighestRegisteredDomain("test.jp", "test.jp");
// checkHighestRegisteredDomain("www.test.jp", "test.jp");
// checkHighestRegisteredDomain("test.ac.jp", "test.ac.jp");
// checkHighestRegisteredDomain("www.test.ac.jp", "test.ac.jp");
// checkHighestRegisteredDomain("test.kyoto.jp", "test.kyoto.jp");
// checkHighestRegisteredDomain("b.ide.kyoto.jp", "b.ide.kyoto.jp");
// checkHighestRegisteredDomain("a.b.ide.kyoto.jp", "b.ide.kyoto.jp");
// checkHighestRegisteredDomain("b.c.kobe.jp", "b.c.kobe.jp");
// checkHighestRegisteredDomain("a.b.c.kobe.jp", "b.c.kobe.jp");
// checkHighestRegisteredDomain("city.kobe.jp", "city.kobe.jp");
// checkHighestRegisteredDomain("www.city.kobe.jp", "city.kobe.jp");
//
//
// // TLD with a wildcard rule and exceptions.
//// checkIsPublicSuffix("ck"); // Passes Steve's test but is not considered
// a valid TLD here
//// checkIsPublicSuffix("test.ck");
//// checkTopLevelDomain("b.test.ck", "b.test.ck");
//// checkTopLevelDomain("a.b.test.ck", "b.test.ck");
//// checkTopLevelDomain("www.ck", "www.ck");
//// checkTopLevelDomain("www.www.ck", "www.ck");
//
// // US K12.
// checkIsPublicSuffix("us");
// checkIsPublicSuffix("ak.us");
// checkIsPublicSuffix("k12.ak.us");
// checkHighestRegisteredDomain("test.us", "test.us");
// checkHighestRegisteredDomain("www.test.us", "test.us");
// checkHighestRegisteredDomain("test.ak.us", "test.ak.us");
// checkHighestRegisteredDomain("www.test.ak.us", "test.ak.us");
// checkHighestRegisteredDomain("test.k12.ak.us", "test.k12.ak.us");
// checkHighestRegisteredDomain("www.test.k12.ak.us", "test.k12.ak.us");
//
// // IDN labels.
// checkIsPublicSuffix("公司.cn");
// checkIsPublicSuffix("中国");
// checkHighestRegisteredDomain("食狮.com.cn", "食狮.com.cn");
// checkHighestRegisteredDomain("食狮.公司.cn", "食狮.公司.cn");
// checkHighestRegisteredDomain("www.食狮.公司.cn", "食狮.公司.cn");
// checkHighestRegisteredDomain("shishi.公司.cn", "shishi.公司.cn");
// checkHighestRegisteredDomain("食狮.中国", "食狮.中国");
// checkHighestRegisteredDomain("www.食狮.中国", "食狮.中国");
// checkHighestRegisteredDomain("shishi.中国", "shishi.中国");
//
// // Same as above, but punycoded.
// checkIsPublicSuffix("xn--55qx5d.cn");
// checkIsPublicSuffix("xn--fiqs8s");
// checkHighestRegisteredDomain("xn--85x722f.com.cn", "xn--85x722f.com.cn");
// checkHighestRegisteredDomain("xn--85x722f.xn--55qx5d.cn",
// "xn--85x722f.xn--55qx5d.cn");
// checkHighestRegisteredDomain("www.xn--85x722f.xn--55qx5d.cn",
// "xn--85x722f.xn--55qx5d.cn");
// checkHighestRegisteredDomain("shishi.xn--55qx5d.cn",
// "shishi.xn--55qx5d.cn");
// checkHighestRegisteredDomain("xn--85x722f.xn--fiqs8s",
// "xn--85x722f.xn--fiqs8s");
// checkHighestRegisteredDomain("www.xn--85x722f.xn--fiqs8s",
// "xn--85x722f.xn--fiqs8s");
// checkHighestRegisteredDomain("shishi.xn--fiqs8s", "shishi.xn--fiqs8s");
// }
//
// @Test
// public void testSanitiseDomainName()
// {
// String ok_domain = "nfsv4idmapdomain.prelert.com";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "nfsv4idmapdomain\u3002ml\uFF0Ecom";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "www.test.ac\uFF61jp";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "xn--85x722f.com.cn";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "x_n--85x722f.com.cn";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
// ok_domain = "食狮.com.cn";
// assertTrue(InternetDomainName.isValid(ok_domain));
// assertTrue(HighestRegisteredDomain.sanitiseDomainName(ok_domain) ==
// ok_domain);
//
// String bad_domain = "_nfsv4idmapdomain.prelert.com";
// assertFalse(InternetDomainName.isValid(bad_domain));
// String sanitisedDomain =
// HighestRegisteredDomain.sanitiseDomainName(bad_domain);
// assertTrue(sanitisedDomain != ok_domain);
// assertEquals("p_nfsv4idmapdomain.pprelert.com", sanitisedDomain);
// assertEquals(bad_domain,
// HighestRegisteredDomain.desanitise(sanitisedDomain));
//
// bad_domain = "_www.test.ac\uFF61jp";
// assertFalse(InternetDomainName.isValid(bad_domain));
// sanitisedDomain = HighestRegisteredDomain.sanitiseDomainName(bad_domain);
// assertTrue(sanitisedDomain != ok_domain);
// assertEquals(HighestRegisteredDomain.replaceDots("p_www.test.ac\uFF61jp"),
// sanitisedDomain);
// assertEquals(HighestRegisteredDomain.replaceDots(bad_domain),
// HighestRegisteredDomain.desanitise(sanitisedDomain));
//
// bad_domain = "_xn--85x722f.com.cn";
// assertFalse(InternetDomainName.isValid(bad_domain));
// sanitisedDomain = HighestRegisteredDomain.sanitiseDomainName(bad_domain);
// assertTrue(sanitisedDomain != ok_domain);
// assertEquals("p_xn--85x722f.com.cn", sanitisedDomain);
// assertEquals(bad_domain,
// HighestRegisteredDomain.desanitise(sanitisedDomain));
//
// bad_domain = "-foundation.org";
// assertFalse(InternetDomainName.isValid(bad_domain));
// sanitisedDomain = HighestRegisteredDomain.sanitiseDomainName(bad_domain);
// assertTrue(sanitisedDomain != ok_domain);
// assertEquals("p-foundation.org", sanitisedDomain);
// assertEquals(bad_domain,
// HighestRegisteredDomain.desanitise(sanitisedDomain));
// }
//
// /**
// * Get sub domain only
// * @throws TransformException
// */
// @Test
// public void testTransform_SingleOutput() throws TransformException
// {
// List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0,
// 2));
// List<TransformIndex> writeIndexes = createIndexArray(new
// TransformIndex(2, 0));
//
// HighestRegisteredDomain transform = new HighestRegisteredDomain(
// readIndexes, writeIndexes, mock(Logger.class));
//
// String [] input = {"", "", "www.test.ac.jp"};
// String [] scratch = {};
// String [] output = new String [2];
// String [][] readWriteArea = {input, scratch, output};
//
// transform.transform(readWriteArea);
// assertEquals("www", output[0]);
// assertNull(output[1]);
//
// input[2] = "a.b.domain.biz";
// transform.transform(readWriteArea);
// assertEquals("a.b", output[0]);
// assertNull(output[1]);
// }
//
//
//
// @Test
// public void testTransform_AllOutputs() throws TransformException
// {
// List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0,
// 2));
// List<TransformIndex> writeIndexes = createIndexArray(new
// TransformIndex(2, 0), new TransformIndex(2, 1));
//
// HighestRegisteredDomain transform = new HighestRegisteredDomain(
// readIndexes, writeIndexes, mock(Logger.class));
//
//
// String [] input = {"", "", "www.test.ac.jp"};
// String [] scratch = {};
// String [] output = new String [2];
// String [][] readWriteArea = {input, scratch, output};
//
// transform.transform(readWriteArea);
// assertEquals("www", output[0]);
// assertEquals("test.ac.jp", output[1]);
//
// input[2] = "a.b.domain.biz";
// transform.transform(readWriteArea);
// assertEquals("a.b", output[0]);
// assertEquals("domain.biz", output[1]);
// }
//
// @Test
// public void testTransformTrimWhiteSpace() throws TransformException
// {
// List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1,
// 2));
// List<TransformIndex> writeIndexes = createIndexArray(new
// TransformIndex(2, 0), new TransformIndex(2, 1));
//
// HighestRegisteredDomain transform = new HighestRegisteredDomain(
// readIndexes, writeIndexes, mock(Logger.class));
//
// String [] input = {};
// String [] scratch = {"", "", " time.apple.com "};
// String [] output = new String [2];
// String [][] readWriteArea = {input, scratch, output};
//
// transform.transform(readWriteArea);
// assertEquals("time", output[0]);
// assertEquals("apple.com", output[1]);
// }
//
// @Test
// public void testTransform_WriteToScratch() throws TransformException
// {
// List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1,
// 2));
// List<TransformIndex> writeIndexes = createIndexArray(new
// TransformIndex(2, 0), new TransformIndex(2, 1));
//
// HighestRegisteredDomain transform = new HighestRegisteredDomain(
// readIndexes, writeIndexes, mock(Logger.class));
//
// String [] input = {};
// String [] scratch = {"", "", " time.apple.com "};
// String [] output = new String [2];
// String [][] readWriteArea = {input, scratch, output};
//
// assertEquals(TransformResult.OK, transform.transform(readWriteArea));
// assertEquals("time", output[0]);
// assertEquals("apple.com", output[1]);
// }
}

View File

@ -1,40 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class RegexExtractTests extends ESTestCase {
public void testTransform() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0),
new TransformIndex(2, 1), new TransformIndex(2, 2));
String regex = "Tag=\"Windfarm ([0-9]+)\\.Turbine ([0-9]+)\\.(.*)\"";
RegexExtract transform = new RegexExtract(regex, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"Tag=\"Windfarm 04.Turbine 06.Temperature\""};
String[] scratch = {};
String[] output = new String[3];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertEquals("04", output[0]);
assertEquals("06", output[1]);
assertEquals("Temperature", output[2]);
}
}

View File

@ -1,54 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
public class RegexSplitTests extends ESTestCase {
public void testTransform() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0),
new TransformIndex(2, 1), new TransformIndex(2, 2));
String regex = ":";
RegexSplit transform = new RegexSplit(regex, readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"A:B:C"};
String[] scratch = {};
String[] output = new String[3];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertArrayEquals(readWriteArea[2], new String[]{"A", "B", "C"});
readWriteArea[0] = new String[]{"A:B:C:D"};
readWriteArea[2] = new String[]{"", "", ""};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertArrayEquals(readWriteArea[2], new String[]{"A", "B", "C"});
readWriteArea[0] = new String[]{"A"};
readWriteArea[2] = new String[]{""};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertArrayEquals(readWriteArea[2], new String[]{"A"});
readWriteArea[0] = new String[]{""};
readWriteArea[2] = new String[]{""};
assertEquals(TransformResult.OK, transform.transform(readWriteArea));
assertArrayEquals(readWriteArea[2], new String[]{""});
}
}

View File

@ -1,169 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformResult;
import java.util.List;
import java.util.Locale;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
public class StringTransformTests extends ESTestCase {
public void testUpperCaseTransform_GivenZeroInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray();
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createUpperCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testUpperCaseTransform_GivenTwoInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(
new TransformIndex(0, 0), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createUpperCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testUpperCaseTransform_GivenZeroOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray();
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createUpperCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testUpperCaseTransform_GivenTwoOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(
new TransformIndex(1, 1), new TransformIndex(1, 2));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createUpperCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testUpperCaseTransform_GivenSingleInputAndSingleOutput() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
StringTransform upperCase = StringTransform.createUpperCase(readIndexes, writeIndexes,
mock(Logger.class));
String[] input = {"aa", "aBcD", "cc", "dd", "ee"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, upperCase.transform(readWriteArea));
assertEquals("aBcD".toUpperCase(Locale.ROOT), output[0]);
}
public void testLowerCaseTransform_GivenZeroInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray();
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createLowerCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testLowerCaseTransform_GivenTwoInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(
new TransformIndex(0, 0), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createLowerCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testLowerCaseTransform_GivenZeroOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray();
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createLowerCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testLowerCaseTransform_GivenTwoOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(
new TransformIndex(1, 1), new TransformIndex(1, 2));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createLowerCase(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testLowerCaseTransform_GivenSingleInputAndSingleOutput() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
StringTransform upperCase = StringTransform.createLowerCase(readIndexes, writeIndexes,
mock(Logger.class));
String[] input = {"aa", "AbCde", "cc", "dd", "ee"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, upperCase.transform(readWriteArea));
assertEquals("AbCde".toLowerCase(Locale.ROOT), output[0]);
}
public void testTrimTransform_GivenZeroInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray();
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createTrim(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTrimTransform_GivenTwoInputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(
new TransformIndex(0, 0), new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 1));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createTrim(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTrimTransform_GivenZeroOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray();
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createTrim(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTrimTransform_GivenTwoOutputs() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(
new TransformIndex(1, 1), new TransformIndex(1, 2));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> StringTransform.createTrim(readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTrimTransform_GivenSingleInputAndSingleOutput() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 1));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
StringTransform upperCase = StringTransform.createTrim(readIndexes, writeIndexes,
mock(Logger.class));
String[] input = {" a ", "\t b ", " c", "d", "e"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
assertEquals(TransformResult.OK, upperCase.transform(readWriteArea));
assertEquals("\t b".trim(), output[0]);
}
}

View File

@ -1,127 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import static org.mockito.Mockito.mock;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
public class TransformFactoryTests extends ESTestCase {
public void testIndexesMapping() {
TransformConfig conf = new TransformConfig(TransformType.CONCAT.prettyName());
conf.setInputs(Arrays.asList("field1", "field2"));
conf.setOutputs(Arrays.asList("concatted"));
Map<String, Integer> inputMap = new HashMap<>();
inputMap.put("field1", 5);
inputMap.put("field2", 3);
Map<String, Integer> scratchMap = new HashMap<>();
Map<String, Integer> outputMap = new HashMap<>();
outputMap.put("concatted", 2);
Transform tr = new TransformFactory().create(conf, inputMap, scratchMap,
outputMap, mock(Logger.class));
assertTrue(tr instanceof Concat);
List<TransformIndex> inputIndexes = tr.getReadIndexes();
assertEquals(inputIndexes.get(0), new TransformIndex(0, 5));
assertEquals(inputIndexes.get(1), new TransformIndex(0, 3));
List<TransformIndex> outputIndexes = tr.getWriteIndexes();
assertEquals(outputIndexes.get(0), new TransformIndex(2, 2));
}
public void testConcatWithOptionalArgs() {
TransformConfig conf = new TransformConfig(TransformType.CONCAT.prettyName());
conf.setInputs(Arrays.asList("field1", "field2"));
conf.setOutputs(Arrays.asList("concatted"));
Map<String, Integer> inputMap = new HashMap<>();
inputMap.put("field1", 5);
inputMap.put("field2", 3);
Map<String, Integer> scratchMap = new HashMap<>();
Map<String, Integer> outputMap = new HashMap<>();
outputMap.put("concatted", 2);
Transform tr = new TransformFactory().create(conf, inputMap, scratchMap,
outputMap, mock(Logger.class));
assertTrue(tr instanceof Concat);
assertEquals("", ((Concat) tr).getDelimiter());
conf.setArguments(Arrays.asList("delimiter"));
tr = new TransformFactory().create(conf, inputMap, scratchMap,
outputMap, mock(Logger.class));
assertTrue(tr instanceof Concat);
assertEquals("delimiter", ((Concat) tr).getDelimiter());
}
public void testAllTypesCreated() {
EnumSet<TransformType> all = EnumSet.allOf(TransformType.class);
Map<String, Integer> inputIndexes = new HashMap<>();
Map<String, Integer> scratchMap = new HashMap<>();
Map<String, Integer> outputIndexes = new HashMap<>();
for (TransformType type : all) {
TransformConfig conf = TransformTestUtils.createValidTransform(type);
conf.getInputs().stream().forEach(input -> inputIndexes.put(input, 0));
conf.getOutputs().stream().forEach(output -> outputIndexes.put(output, 0));
// throws IllegalArgumentException if it doesn't handle the type
new TransformFactory().create(conf, inputIndexes, scratchMap,
outputIndexes, mock(Logger.class));
}
}
public void testExcludeTransformsCreated() {
Map<String, Integer> inputIndexes = new HashMap<>();
Map<String, Integer> scratchMap = new HashMap<>();
Map<String, Integer> outputIndexes = new HashMap<>();
TransformConfig conf = new TransformConfig(TransformType.EXCLUDE.prettyName());
conf.setInputs(new ArrayList<>());
conf.setOutputs(new ArrayList<>());
conf.setCondition(new Condition(Operator.LT, "2000"));
ExcludeFilterNumeric numericTransform =
(ExcludeFilterNumeric) new TransformFactory().create(conf, inputIndexes,
scratchMap, outputIndexes, mock(Logger.class));
assertEquals(Operator.LT, numericTransform.getCondition().getOperator());
assertEquals(2000, numericTransform.filterValue(), 0.0000001);
conf.setCondition(new Condition(Operator.MATCH, "aaaaa"));
ExcludeFilterRegex regexTransform =
(ExcludeFilterRegex) new TransformFactory().create(conf, inputIndexes,
scratchMap, outputIndexes, mock(Logger.class));
assertEquals(Operator.MATCH, regexTransform.getCondition().getOperator());
assertEquals("aaaaa", regexTransform.getCondition().getValue());
}
}

View File

@ -1,83 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiFunction;
import org.elasticsearch.xpack.ml.job.config.Condition;
import org.elasticsearch.xpack.ml.job.config.Operator;
import org.elasticsearch.xpack.ml.job.config.transform.IntRange;
import org.elasticsearch.xpack.ml.job.config.transform.TransformConfig;
import org.elasticsearch.xpack.ml.job.config.transform.TransformType;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
public final class TransformTestUtils {
private TransformTestUtils() {
}
public static List<TransformIndex> createIndexArray(TransformIndex... indexs) {
List<TransformIndex> result = new ArrayList<Transform.TransformIndex>();
for (TransformIndex i : indexs) {
result.add(i);
}
return result;
}
public static TransformConfig createValidTransform(TransformType type) {
List<String> inputs = createValidArgs(type.arityRange(), type,
(arg, t) -> Integer.toString(arg));
List<String> args = createValidArgs(type.argumentsRange(), type,
TransformTestUtils::createValidArgument);
List<String> outputs = createValidArgs(type.outputsRange(), type,
(arg, t) -> Integer.toString(arg));
Condition condition = null;
if (type.hasCondition()) {
condition = new Condition(Operator.EQ, "100");
}
TransformConfig tr = new TransformConfig(type.toString());
tr.setInputs(inputs);
tr.setArguments(args);
tr.setOutputs(outputs);
tr.setCondition(condition);
return tr;
}
private static List<String> createValidArgs(IntRange range, TransformType type,
BiFunction<Integer, TransformType, String> argumentCreator) {
List<String> args = new ArrayList<>();
int validCount = getValidCount(range);
for (int arg = 0; arg < validCount; ++arg) {
args.add(argumentCreator.apply(arg, type));
}
return args;
}
private static String createValidArgument(int argNumber, TransformType type) {
switch (type) {
case REGEX_EXTRACT:
return Integer.toString(argNumber) + ".Foo ([0-9]+)";
case CONCAT:
case DOMAIN_SPLIT:
case EXCLUDE:
case LOWERCASE:
case REGEX_SPLIT:
case TRIM:
case UPPERCASE:
return Integer.toString(argNumber);
default:
throw new IllegalArgumentException();
}
}
private static int getValidCount(IntRange range) {
return range.hasUpperBound() ? range.upper() : range.lower();
}
}

View File

@ -1,121 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.TransformException;
import java.util.Collections;
import java.util.List;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
public class DateFormatTransformTests extends ESTestCase {
public void testTransform_GivenValidTimestamp() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("yyyy-MM-dd HH:mm:ss.SSSXXX",
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"2014-01-01 13:42:56.500Z"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
assertEquals(1388583776500L, transformer.epochMs());
assertEquals("1388583776", output[0]);
}
public void testTransform_GivenInvalidFormat() throws TransformException {
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class,
() -> new DateFormatTransform("yyyy-MM HH:mm:ss", Collections.emptyList(), Collections.emptyList(), mock(Logger.class)));
assertEquals("Timestamp cannot be derived from pattern: yyyy-MM HH:mm:ss", e.getMessage());
}
public void testTransform_GivenInvalidTimestamp() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("yyyy-MM-dd HH:mm:ss", readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"invalid"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
ParseTimestampException e = ESTestCase.expectThrows(ParseTimestampException.class,
() -> transformer.transform(readWriteArea));
assertEquals("Cannot parse date 'invalid' with format string 'yyyy-MM-dd HH:mm:ss'", e.getMessage());
}
public void testTransform_GivenNull() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("yyyy-MM-dd HH:mm:ss", readIndexes, writeIndexes, mock(Logger.class));
String[] input = {};
String[] scratch = {null};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
ESTestCase.expectThrows(ParseTimestampException.class, () -> transformer.transform(readWriteArea));
}
public void testTransform_GivenBadFormat() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
ESTestCase.expectThrows(IllegalArgumentException.class,
() -> new DateFormatTransform("e", readIndexes, writeIndexes, mock(Logger.class)));
}
public void testTransform_FromScratchArea() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("yyyy-MM-dd HH:mm:ssXXX", readIndexes, writeIndexes, mock(Logger.class));
String[] input = {};
String[] scratch = {"2014-01-01 00:00:00Z"};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
assertEquals(1388534400000L, transformer.epochMs());
assertEquals("1388534400", output[0]);
}
public void testTransform_WithBrackets() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DateFormatTransform transformer = new DateFormatTransform("'['yyyy-MM-dd HH:mm:ssX']'",
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"[2014-06-23 00:00:00Z]"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
}
}

View File

@ -1,89 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.transforms.date;
import static org.elasticsearch.xpack.ml.transforms.TransformTestUtils.createIndexArray;
import static org.mockito.Mockito.mock;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.transforms.Transform.TransformIndex;
import org.elasticsearch.xpack.ml.transforms.TransformException;
public class DoubleDateTransformTests extends ESTestCase {
public void testTransform_GivenTimestampIsNotMilliseconds() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DoubleDateTransform transformer = new DoubleDateTransform(false,
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"1000"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
assertEquals(1000000, transformer.epochMs());
assertEquals("1000", output[0]);
}
public void testTransform_GivenTimestampIsMilliseconds() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DoubleDateTransform transformer = new DoubleDateTransform(true,
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"1000"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
assertEquals(1000, transformer.epochMs());
assertEquals("1", output[0]);
}
public void testTransform_GivenTimestampIsNotValidDouble() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(0, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DoubleDateTransform transformer = new DoubleDateTransform(false,
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {"invalid"};
String[] scratch = {};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
ParseTimestampException e = ESTestCase.expectThrows(ParseTimestampException.class,
() -> transformer.transform(readWriteArea));
assertEquals("Cannot parse timestamp 'invalid' as epoch value", e.getMessage());
}
public void testTransform_InputFromScratchArea() throws TransformException {
List<TransformIndex> readIndexes = createIndexArray(new TransformIndex(1, 0));
List<TransformIndex> writeIndexes = createIndexArray(new TransformIndex(2, 0));
DoubleDateTransform transformer = new DoubleDateTransform(false,
readIndexes, writeIndexes, mock(Logger.class));
String[] input = {};
String[] scratch = {"1000"};
String[] output = new String[1];
String[][] readWriteArea = {input, scratch, output};
transformer.transform(readWriteArea);
}
}

View File

@ -1,14 +0,0 @@
{
"xpack.ml.validate_transform": {
"methods": [ "POST" ],
"url": {
"path": "/_xpack/ml/_validate/transform",
"paths": [ "/_xpack/ml/_validate/transform" ],
"params": {}
},
"body": {
"description" : "The transform",
"required" : true
}
}
}

View File

@ -1,14 +0,0 @@
{
"xpack.ml.validate_transforms": {
"methods": [ "POST" ],
"url": {
"path": "/_xpack/ml/_validate/transforms",
"paths": [ "/_xpack/ml/_validate/transforms" ],
"params": {}
},
"body": {
"description" : "The transforms",
"required" : true
}
}
}

View File

@ -1,63 +0,0 @@
---
"Test valid transform":
- do:
xpack.ml.validate_transform:
body: >
{
"transform":"concat",
"inputs": [ "one", "two" ],
"outputs": [ "oneplustwo" ]
}
- match: { acknowledged: true }
---
"Test invalid transform":
- do:
catch: /Transform type concat expected \[2‥\+∞\) input\(s\), got 1/
xpack.ml.validate_transform:
body: >
{
"transform":"concat",
"inputs": [ "justone" ],
"outputs": [ "stilljustone" ]
}
---
"Test valid transforms":
- do:
xpack.ml.validate_transforms:
body: >
{
"transforms": [
{
"transform":"concat",
"inputs": [ "one", "two" ],
"outputs": [ "oneplustwo" ]
},
{
"transform":"domain_split",
"inputs": [ "domain" ],
"outputs": [ "sub_domain", "highest_registered_domain" ]
}
]
}
- match: { acknowledged: true }
---
"Test invalid transforms":
- do:
catch: /Transform type concat with inputs \[one, two\] has a circular dependency/
xpack.ml.validate_transforms:
body: >
{
"transforms": [
{
"transform":"concat",
"inputs": [ "one", "two" ],
"outputs": [ "three" ]
},
{
"transform":"concat",
"inputs": [ "two", "three" ],
"outputs": [ "one" ]
}
]
}