[HLRC][ML] Add ML find file structure API (#35833)

Relates to #29827
This commit is contained in:
David Roberts 2018-11-23 06:58:05 +00:00 committed by GitHub
parent 813e053f62
commit 3c059ee057
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 1786 additions and 0 deletions

View File

@ -36,6 +36,7 @@ import org.elasticsearch.client.ml.DeleteFilterRequest;
import org.elasticsearch.client.ml.DeleteForecastRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
import org.elasticsearch.client.ml.FindFileStructureRequest;
import org.elasticsearch.client.ml.FlushJobRequest;
import org.elasticsearch.client.ml.ForecastJobRequest;
import org.elasticsearch.client.ml.GetBucketsRequest;
@ -70,6 +71,7 @@ import org.elasticsearch.client.ml.UpdateModelSnapshotRequest;
import org.elasticsearch.client.ml.job.util.PageParams;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
@ -648,4 +650,65 @@ final class MLRequestConverters {
Request request = new Request(HttpDelete.METHOD_NAME, endpoint);
return request;
}
static Request findFileStructure(FindFileStructureRequest findFileStructureRequest) {
String endpoint = new EndpointBuilder()
.addPathPartAsIs("_xpack")
.addPathPartAsIs("ml")
.addPathPartAsIs("find_file_structure")
.build();
Request request = new Request(HttpPost.METHOD_NAME, endpoint);
RequestConverters.Params params = new RequestConverters.Params(request);
if (findFileStructureRequest.getLinesToSample() != null) {
params.putParam(FindFileStructureRequest.LINES_TO_SAMPLE.getPreferredName(),
findFileStructureRequest.getLinesToSample().toString());
}
if (findFileStructureRequest.getTimeout() != null) {
params.putParam(FindFileStructureRequest.TIMEOUT.getPreferredName(), findFileStructureRequest.getTimeout().toString());
}
if (findFileStructureRequest.getCharset() != null) {
params.putParam(FindFileStructureRequest.CHARSET.getPreferredName(), findFileStructureRequest.getCharset());
}
if (findFileStructureRequest.getFormat() != null) {
params.putParam(FindFileStructureRequest.FORMAT.getPreferredName(), findFileStructureRequest.getFormat().toString());
}
if (findFileStructureRequest.getColumnNames() != null) {
params.putParam(FindFileStructureRequest.COLUMN_NAMES.getPreferredName(),
Strings.collectionToCommaDelimitedString(findFileStructureRequest.getColumnNames()));
}
if (findFileStructureRequest.getHasHeaderRow() != null) {
params.putParam(FindFileStructureRequest.HAS_HEADER_ROW.getPreferredName(),
findFileStructureRequest.getHasHeaderRow().toString());
}
if (findFileStructureRequest.getDelimiter() != null) {
params.putParam(FindFileStructureRequest.DELIMITER.getPreferredName(),
findFileStructureRequest.getDelimiter().toString());
}
if (findFileStructureRequest.getQuote() != null) {
params.putParam(FindFileStructureRequest.QUOTE.getPreferredName(), findFileStructureRequest.getQuote().toString());
}
if (findFileStructureRequest.getShouldTrimFields() != null) {
params.putParam(FindFileStructureRequest.SHOULD_TRIM_FIELDS.getPreferredName(),
findFileStructureRequest.getShouldTrimFields().toString());
}
if (findFileStructureRequest.getGrokPattern() != null) {
params.putParam(FindFileStructureRequest.GROK_PATTERN.getPreferredName(), findFileStructureRequest.getGrokPattern());
}
if (findFileStructureRequest.getTimestampFormat() != null) {
params.putParam(FindFileStructureRequest.TIMESTAMP_FORMAT.getPreferredName(), findFileStructureRequest.getTimestampFormat());
}
if (findFileStructureRequest.getTimestampField() != null) {
params.putParam(FindFileStructureRequest.TIMESTAMP_FIELD.getPreferredName(), findFileStructureRequest.getTimestampField());
}
if (findFileStructureRequest.getExplain() != null) {
params.putParam(FindFileStructureRequest.EXPLAIN.getPreferredName(), findFileStructureRequest.getExplain().toString());
}
BytesReference sample = findFileStructureRequest.getSample();
BytesRef source = sample.toBytesRef();
HttpEntity byteEntity = new ByteArrayEntity(source.bytes, source.offset, source.length, createContentType(XContentType.JSON));
request.setEntity(byteEntity);
return request;
}
}

View File

@ -31,6 +31,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteJobResponse;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
import org.elasticsearch.client.ml.FindFileStructureRequest;
import org.elasticsearch.client.ml.FindFileStructureResponse;
import org.elasticsearch.client.ml.FlushJobRequest;
import org.elasticsearch.client.ml.FlushJobResponse;
import org.elasticsearch.client.ml.ForecastJobRequest;
@ -1711,4 +1713,45 @@ public final class MachineLearningClient {
listener,
Collections.emptySet());
}
/**
* Finds the structure of a file
* <p>
* For additional info
* see <a href="http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-find-file-structure.html">
* ML Find File Structure documentation</a>
*
* @param request The find file structure request
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
* @return the response containing details of the file structure
* @throws IOException when there is a serialization issue sending the request or receiving the response
*/
public FindFileStructureResponse findFileStructure(FindFileStructureRequest request, RequestOptions options) throws IOException {
return restHighLevelClient.performRequestAndParseEntity(request,
MLRequestConverters::findFileStructure,
options,
FindFileStructureResponse::fromXContent,
Collections.emptySet());
}
/**
* Finds the structure of a file asynchronously and notifies the listener on completion
* <p>
* For additional info
* see <a href="http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-find-file-structure.html">
* ML Find File Structure documentation</a>
*
* @param request The find file structure request
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
* @param listener Listener to be notified upon request completion
*/
public void findFileStructureAsync(FindFileStructureRequest request, RequestOptions options,
ActionListener<FindFileStructureResponse> listener) {
restHighLevelClient.performRequestAsyncAndParseEntity(request,
MLRequestConverters::findFileStructure,
options,
FindFileStructureResponse::fromXContent,
listener,
Collections.emptySet());
}
}

View File

@ -0,0 +1,302 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml;
import org.elasticsearch.client.Validatable;
import org.elasticsearch.client.ValidationException;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
public class FindFileStructureRequest implements Validatable, ToXContent {
public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample");
public static final ParseField TIMEOUT = new ParseField("timeout");
public static final ParseField CHARSET = FileStructure.CHARSET;
public static final ParseField FORMAT = FileStructure.FORMAT;
public static final ParseField COLUMN_NAMES = FileStructure.COLUMN_NAMES;
public static final ParseField HAS_HEADER_ROW = FileStructure.HAS_HEADER_ROW;
public static final ParseField DELIMITER = FileStructure.DELIMITER;
public static final ParseField QUOTE = FileStructure.QUOTE;
public static final ParseField SHOULD_TRIM_FIELDS = FileStructure.SHOULD_TRIM_FIELDS;
public static final ParseField GROK_PATTERN = FileStructure.GROK_PATTERN;
// This one is plural in FileStructure, but singular in FileStructureOverrides
public static final ParseField TIMESTAMP_FORMAT = new ParseField("timestamp_format");
public static final ParseField TIMESTAMP_FIELD = FileStructure.TIMESTAMP_FIELD;
public static final ParseField EXPLAIN = new ParseField("explain");
private Integer linesToSample;
private TimeValue timeout;
private String charset;
private FileStructure.Format format;
private List<String> columnNames;
private Boolean hasHeaderRow;
private Character delimiter;
private Character quote;
private Boolean shouldTrimFields;
private String grokPattern;
private String timestampFormat;
private String timestampField;
private Boolean explain;
private BytesReference sample;
public FindFileStructureRequest() {
}
public Integer getLinesToSample() {
return linesToSample;
}
public void setLinesToSample(Integer linesToSample) {
this.linesToSample = linesToSample;
}
public TimeValue getTimeout() {
return timeout;
}
public void setTimeout(TimeValue timeout) {
this.timeout = timeout;
}
public String getCharset() {
return charset;
}
public void setCharset(String charset) {
this.charset = (charset == null || charset.isEmpty()) ? null : charset;
}
public FileStructure.Format getFormat() {
return format;
}
public void setFormat(FileStructure.Format format) {
this.format = format;
}
public void setFormat(String format) {
this.format = (format == null || format.isEmpty()) ? null : FileStructure.Format.fromString(format);
}
public List<String> getColumnNames() {
return columnNames;
}
public void setColumnNames(List<String> columnNames) {
this.columnNames = (columnNames == null || columnNames.isEmpty()) ? null : columnNames;
}
public void setColumnNames(String[] columnNames) {
this.columnNames = (columnNames == null || columnNames.length == 0) ? null : Arrays.asList(columnNames);
}
public Boolean getHasHeaderRow() {
return hasHeaderRow;
}
public void setHasHeaderRow(Boolean hasHeaderRow) {
this.hasHeaderRow = hasHeaderRow;
}
public Character getDelimiter() {
return delimiter;
}
public void setDelimiter(Character delimiter) {
this.delimiter = delimiter;
}
public void setDelimiter(String delimiter) {
if (delimiter == null || delimiter.isEmpty()) {
this.delimiter = null;
} else if (delimiter.length() == 1) {
this.delimiter = delimiter.charAt(0);
} else {
throw new IllegalArgumentException(DELIMITER.getPreferredName() + " must be a single character");
}
}
public Character getQuote() {
return quote;
}
public void setQuote(Character quote) {
this.quote = quote;
}
public void setQuote(String quote) {
if (quote == null || quote.isEmpty()) {
this.quote = null;
} else if (quote.length() == 1) {
this.quote = quote.charAt(0);
} else {
throw new IllegalArgumentException(QUOTE.getPreferredName() + " must be a single character");
}
}
public Boolean getShouldTrimFields() {
return shouldTrimFields;
}
public void setShouldTrimFields(Boolean shouldTrimFields) {
this.shouldTrimFields = shouldTrimFields;
}
public String getGrokPattern() {
return grokPattern;
}
public void setGrokPattern(String grokPattern) {
this.grokPattern = (grokPattern == null || grokPattern.isEmpty()) ? null : grokPattern;
}
public String getTimestampFormat() {
return timestampFormat;
}
public void setTimestampFormat(String timestampFormat) {
this.timestampFormat = (timestampFormat == null || timestampFormat.isEmpty()) ? null : timestampFormat;
}
public String getTimestampField() {
return timestampField;
}
public void setTimestampField(String timestampField) {
this.timestampField = (timestampField == null || timestampField.isEmpty()) ? null : timestampField;
}
public Boolean getExplain() {
return explain;
}
public void setExplain(Boolean explain) {
this.explain = explain;
}
public BytesReference getSample() {
return sample;
}
public void setSample(byte[] sample) {
this.sample = new BytesArray(sample);
}
public void setSample(BytesReference sample) {
this.sample = Objects.requireNonNull(sample);
}
@Override
public Optional<ValidationException> validate() {
ValidationException validationException = new ValidationException();
if (sample == null || sample.length() == 0) {
validationException.addValidationError("sample must be specified");
}
return validationException.validationErrors().isEmpty() ? Optional.empty() : Optional.of(validationException);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
if (linesToSample != null) {
builder.field(LINES_TO_SAMPLE.getPreferredName(), linesToSample);
}
if (timeout != null) {
builder.field(TIMEOUT.getPreferredName(), timeout);
}
if (charset != null) {
builder.field(CHARSET.getPreferredName(), charset);
}
if (format != null) {
builder.field(FORMAT.getPreferredName(), format);
}
if (columnNames != null) {
builder.field(COLUMN_NAMES.getPreferredName(), columnNames);
}
if (hasHeaderRow != null) {
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow);
}
if (delimiter != null) {
builder.field(DELIMITER.getPreferredName(), delimiter.toString());
}
if (quote != null) {
builder.field(QUOTE.getPreferredName(), quote.toString());
}
if (shouldTrimFields != null) {
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields);
}
if (grokPattern != null) {
builder.field(GROK_PATTERN.getPreferredName(), grokPattern);
}
if (timestampFormat != null) {
builder.field(TIMESTAMP_FORMAT.getPreferredName(), timestampFormat);
}
if (timestampField != null) {
builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField);
}
if (explain != null) {
builder.field(EXPLAIN.getPreferredName(), explain);
}
// Sample is not included in the X-Content representation
return builder;
}
@Override
public int hashCode() {
return Objects.hash(linesToSample, timeout, charset, format, columnNames, hasHeaderRow, delimiter, grokPattern, timestampFormat,
timestampField, explain, sample);
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
FindFileStructureRequest that = (FindFileStructureRequest) other;
return Objects.equals(this.linesToSample, that.linesToSample) &&
Objects.equals(this.timeout, that.timeout) &&
Objects.equals(this.charset, that.charset) &&
Objects.equals(this.format, that.format) &&
Objects.equals(this.columnNames, that.columnNames) &&
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
Objects.equals(this.delimiter, that.delimiter) &&
Objects.equals(this.grokPattern, that.grokPattern) &&
Objects.equals(this.timestampFormat, that.timestampFormat) &&
Objects.equals(this.timestampField, that.timestampField) &&
Objects.equals(this.explain, that.explain) &&
Objects.equals(this.sample, that.sample);
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.Objects;
public class FindFileStructureResponse implements ToXContentObject {
private final FileStructure fileStructure;
FindFileStructureResponse(FileStructure fileStructure) {
this.fileStructure = Objects.requireNonNull(fileStructure);
}
public static FindFileStructureResponse fromXContent(XContentParser parser) throws IOException {
return new FindFileStructureResponse(FileStructure.PARSER.parse(parser, null).build());
}
public FileStructure getFileStructure() {
return fileStructure;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
fileStructure.toXContent(builder, params);
return builder;
}
@Override
public int hashCode() {
return Objects.hash(fileStructure);
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
FindFileStructureResponse that = (FindFileStructureResponse) other;
return Objects.equals(fileStructure, that.fileStructure);
}
}

View File

@ -0,0 +1,166 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.filestructurefinder;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public class FieldStats implements ToXContentObject {
public static final ParseField COUNT = new ParseField("count");
public static final ParseField CARDINALITY = new ParseField("cardinality");
public static final ParseField MIN_VALUE = new ParseField("min_value");
public static final ParseField MAX_VALUE = new ParseField("max_value");
public static final ParseField MEAN_VALUE = new ParseField("mean_value");
public static final ParseField MEDIAN_VALUE = new ParseField("median_value");
public static final ParseField TOP_HITS = new ParseField("top_hits");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", true,
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
(List<Map<String, Object>>) a[6]));
static {
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
PARSER.declareInt(ConstructingObjectParser.constructorArg(), CARDINALITY);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MIN_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
}
private final long count;
private final int cardinality;
private final Double minValue;
private final Double maxValue;
private final Double meanValue;
private final Double medianValue;
private final List<Map<String, Object>> topHits;
FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
List<Map<String, Object>> topHits) {
this.count = count;
this.cardinality = cardinality;
this.minValue = minValue;
this.maxValue = maxValue;
this.meanValue = meanValue;
this.medianValue = medianValue;
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
}
public long getCount() {
return count;
}
public int getCardinality() {
return cardinality;
}
public Double getMinValue() {
return minValue;
}
public Double getMaxValue() {
return maxValue;
}
public Double getMeanValue() {
return meanValue;
}
public Double getMedianValue() {
return medianValue;
}
public List<Map<String, Object>> getTopHits() {
return topHits;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
builder.startObject();
builder.field(COUNT.getPreferredName(), count);
builder.field(CARDINALITY.getPreferredName(), cardinality);
if (minValue != null) {
builder.field(MIN_VALUE.getPreferredName(), toIntegerIfInteger(minValue));
}
if (maxValue != null) {
builder.field(MAX_VALUE.getPreferredName(), toIntegerIfInteger(maxValue));
}
if (meanValue != null) {
builder.field(MEAN_VALUE.getPreferredName(), toIntegerIfInteger(meanValue));
}
if (medianValue != null) {
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
}
if (topHits.isEmpty() == false) {
builder.field(TOP_HITS.getPreferredName(), topHits);
}
builder.endObject();
return builder;
}
static Number toIntegerIfInteger(double d) {
if (d >= Integer.MIN_VALUE && d <= Integer.MAX_VALUE && Double.compare(d, StrictMath.rint(d)) == 0) {
return (int) d;
}
return d;
}
@Override
public int hashCode() {
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
FieldStats that = (FieldStats) other;
return this.count == that.count &&
this.cardinality == that.cardinality &&
Objects.equals(this.minValue, that.minValue) &&
Objects.equals(this.maxValue, that.maxValue) &&
Objects.equals(this.meanValue, that.meanValue) &&
Objects.equals(this.medianValue, that.medianValue) &&
Objects.equals(this.topHits, that.topHits);
}
}

View File

@ -0,0 +1,516 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.filestructurefinder;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.SortedMap;
import java.util.TreeMap;
/**
* Stores the file format determined by Machine Learning.
*/
public class FileStructure implements ToXContentObject {
public enum Format {
NDJSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
public static Format fromString(String name) {
return valueOf(name.trim().toUpperCase(Locale.ROOT));
}
@Override
public String toString() {
return name().toLowerCase(Locale.ROOT);
}
}
public static final ParseField NUM_LINES_ANALYZED = new ParseField("num_lines_analyzed");
public static final ParseField NUM_MESSAGES_ANALYZED = new ParseField("num_messages_analyzed");
public static final ParseField SAMPLE_START = new ParseField("sample_start");
public static final ParseField CHARSET = new ParseField("charset");
public static final ParseField HAS_BYTE_ORDER_MARKER = new ParseField("has_byte_order_marker");
public static final ParseField FORMAT = new ParseField("format");
public static final ParseField MULTILINE_START_PATTERN = new ParseField("multiline_start_pattern");
public static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
public static final ParseField COLUMN_NAMES = new ParseField("column_names");
public static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
public static final ParseField DELIMITER = new ParseField("delimiter");
public static final ParseField QUOTE = new ParseField("quote");
public static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
public static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
public static final ParseField JODA_TIMESTAMP_FORMATS = new ParseField("joda_timestamp_formats");
public static final ParseField JAVA_TIMESTAMP_FORMATS = new ParseField("java_timestamp_formats");
public static final ParseField NEED_CLIENT_TIMEZONE = new ParseField("need_client_timezone");
public static final ParseField MAPPINGS = new ParseField("mappings");
public static final ParseField INGEST_PIPELINE = new ParseField("ingest_pipeline");
public static final ParseField FIELD_STATS = new ParseField("field_stats");
public static final ParseField EXPLANATION = new ParseField("explanation");
public static final ObjectParser<Builder, Void> PARSER = new ObjectParser<>("file_structure", true, Builder::new);
static {
PARSER.declareInt(Builder::setNumLinesAnalyzed, NUM_LINES_ANALYZED);
PARSER.declareInt(Builder::setNumMessagesAnalyzed, NUM_MESSAGES_ANALYZED);
PARSER.declareString(Builder::setSampleStart, SAMPLE_START);
PARSER.declareString(Builder::setCharset, CHARSET);
PARSER.declareBoolean(Builder::setHasByteOrderMarker, HAS_BYTE_ORDER_MARKER);
PARSER.declareString((p, c) -> p.setFormat(Format.fromString(c)), FORMAT);
PARSER.declareString(Builder::setMultilineStartPattern, MULTILINE_START_PATTERN);
PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
PARSER.declareStringArray(Builder::setColumnNames, COLUMN_NAMES);
PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER);
PARSER.declareString((p, c) -> p.setQuote(c.charAt(0)), QUOTE);
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
PARSER.declareStringArray(Builder::setJodaTimestampFormats, JODA_TIMESTAMP_FORMATS);
PARSER.declareStringArray(Builder::setJavaTimestampFormats, JAVA_TIMESTAMP_FORMATS);
PARSER.declareBoolean(Builder::setNeedClientTimezone, NEED_CLIENT_TIMEZONE);
PARSER.declareObject(Builder::setMappings, (p, c) -> new TreeMap<>(p.map()), MAPPINGS);
PARSER.declareObject(Builder::setIngestPipeline, (p, c) -> p.mapOrdered(), INGEST_PIPELINE);
PARSER.declareObject(Builder::setFieldStats, (p, c) -> {
Map<String, FieldStats> fieldStats = new TreeMap<>();
while (p.nextToken() == XContentParser.Token.FIELD_NAME) {
fieldStats.put(p.currentName(), FieldStats.PARSER.apply(p, c));
}
return fieldStats;
}, FIELD_STATS);
PARSER.declareStringArray(Builder::setExplanation, EXPLANATION);
}
private final int numLinesAnalyzed;
private final int numMessagesAnalyzed;
private final String sampleStart;
private final String charset;
private final Boolean hasByteOrderMarker;
private final Format format;
private final String multilineStartPattern;
private final String excludeLinesPattern;
private final List<String> columnNames;
private final Boolean hasHeaderRow;
private final Character delimiter;
private final Character quote;
private final Boolean shouldTrimFields;
private final String grokPattern;
private final List<String> jodaTimestampFormats;
private final List<String> javaTimestampFormats;
private final String timestampField;
private final boolean needClientTimezone;
private final SortedMap<String, Object> mappings;
private final Map<String, Object> ingestPipeline;
private final SortedMap<String, FieldStats> fieldStats;
private final List<String> explanation;
private FileStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> columnNames,
Boolean hasHeaderRow, Character delimiter, Character quote, Boolean shouldTrimFields, String grokPattern,
String timestampField, List<String> jodaTimestampFormats, List<String> javaTimestampFormats,
boolean needClientTimezone, Map<String, Object> mappings, Map<String, Object> ingestPipeline,
Map<String, FieldStats> fieldStats, List<String> explanation) {
this.numLinesAnalyzed = numLinesAnalyzed;
this.numMessagesAnalyzed = numMessagesAnalyzed;
this.sampleStart = Objects.requireNonNull(sampleStart);
this.charset = Objects.requireNonNull(charset);
this.hasByteOrderMarker = hasByteOrderMarker;
this.format = Objects.requireNonNull(format);
this.multilineStartPattern = multilineStartPattern;
this.excludeLinesPattern = excludeLinesPattern;
this.columnNames = (columnNames == null) ? null : Collections.unmodifiableList(new ArrayList<>(columnNames));
this.hasHeaderRow = hasHeaderRow;
this.delimiter = delimiter;
this.quote = quote;
this.shouldTrimFields = shouldTrimFields;
this.grokPattern = grokPattern;
this.timestampField = timestampField;
this.jodaTimestampFormats =
(jodaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(jodaTimestampFormats));
this.javaTimestampFormats =
(javaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(javaTimestampFormats));
this.needClientTimezone = needClientTimezone;
this.mappings = Collections.unmodifiableSortedMap(new TreeMap<>(mappings));
this.ingestPipeline = (ingestPipeline == null) ? null : Collections.unmodifiableMap(new LinkedHashMap<>(ingestPipeline));
this.fieldStats = Collections.unmodifiableSortedMap(new TreeMap<>(fieldStats));
this.explanation = (explanation == null) ? null : Collections.unmodifiableList(new ArrayList<>(explanation));
}
public int getNumLinesAnalyzed() {
return numLinesAnalyzed;
}
public int getNumMessagesAnalyzed() {
return numMessagesAnalyzed;
}
public String getSampleStart() {
return sampleStart;
}
public String getCharset() {
return charset;
}
public Boolean getHasByteOrderMarker() {
return hasByteOrderMarker;
}
public Format getFormat() {
return format;
}
public String getMultilineStartPattern() {
return multilineStartPattern;
}
public String getExcludeLinesPattern() {
return excludeLinesPattern;
}
public List<String> getColumnNames() {
return columnNames;
}
public Boolean getHasHeaderRow() {
return hasHeaderRow;
}
public Character getDelimiter() {
return delimiter;
}
public Character getQuote() {
return quote;
}
public Boolean getShouldTrimFields() {
return shouldTrimFields;
}
public String getGrokPattern() {
return grokPattern;
}
public String getTimestampField() {
return timestampField;
}
public List<String> getJodaTimestampFormats() {
return jodaTimestampFormats;
}
public List<String> getJavaTimestampFormats() {
return javaTimestampFormats;
}
public boolean needClientTimezone() {
return needClientTimezone;
}
public SortedMap<String, Object> getMappings() {
return mappings;
}
public Map<String, Object> getIngestPipeline() {
return ingestPipeline;
}
public SortedMap<String, FieldStats> getFieldStats() {
return fieldStats;
}
public List<String> getExplanation() {
return explanation;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(NUM_LINES_ANALYZED.getPreferredName(), numLinesAnalyzed);
builder.field(NUM_MESSAGES_ANALYZED.getPreferredName(), numMessagesAnalyzed);
builder.field(SAMPLE_START.getPreferredName(), sampleStart);
builder.field(CHARSET.getPreferredName(), charset);
if (hasByteOrderMarker != null) {
builder.field(HAS_BYTE_ORDER_MARKER.getPreferredName(), hasByteOrderMarker.booleanValue());
}
builder.field(FORMAT.getPreferredName(), format);
if (multilineStartPattern != null && multilineStartPattern.isEmpty() == false) {
builder.field(MULTILINE_START_PATTERN.getPreferredName(), multilineStartPattern);
}
if (excludeLinesPattern != null && excludeLinesPattern.isEmpty() == false) {
builder.field(EXCLUDE_LINES_PATTERN.getPreferredName(), excludeLinesPattern);
}
if (columnNames != null && columnNames.isEmpty() == false) {
builder.field(COLUMN_NAMES.getPreferredName(), columnNames);
}
if (hasHeaderRow != null) {
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
}
if (delimiter != null) {
builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter));
}
if (quote != null) {
builder.field(QUOTE.getPreferredName(), String.valueOf(quote));
}
if (shouldTrimFields != null) {
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
}
if (grokPattern != null && grokPattern.isEmpty() == false) {
builder.field(GROK_PATTERN.getPreferredName(), grokPattern);
}
if (timestampField != null && timestampField.isEmpty() == false) {
builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField);
}
if (jodaTimestampFormats != null && jodaTimestampFormats.isEmpty() == false) {
builder.field(JODA_TIMESTAMP_FORMATS.getPreferredName(), jodaTimestampFormats);
}
if (javaTimestampFormats != null && javaTimestampFormats.isEmpty() == false) {
builder.field(JAVA_TIMESTAMP_FORMATS.getPreferredName(), javaTimestampFormats);
}
builder.field(NEED_CLIENT_TIMEZONE.getPreferredName(), needClientTimezone);
builder.field(MAPPINGS.getPreferredName(), mappings);
if (ingestPipeline != null) {
builder.field(INGEST_PIPELINE.getPreferredName(), ingestPipeline);
}
if (fieldStats.isEmpty() == false) {
builder.startObject(FIELD_STATS.getPreferredName());
for (Map.Entry<String, FieldStats> entry : fieldStats.entrySet()) {
builder.field(entry.getKey(), entry.getValue());
}
builder.endObject();
}
if (explanation != null && explanation.isEmpty() == false) {
builder.field(EXPLANATION.getPreferredName(), explanation);
}
builder.endObject();
return builder;
}
@Override
public int hashCode() {
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern,
timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, fieldStats, explanation);
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
FileStructure that = (FileStructure) other;
return this.numLinesAnalyzed == that.numLinesAnalyzed &&
this.numMessagesAnalyzed == that.numMessagesAnalyzed &&
Objects.equals(this.sampleStart, that.sampleStart) &&
Objects.equals(this.charset, that.charset) &&
Objects.equals(this.hasByteOrderMarker, that.hasByteOrderMarker) &&
Objects.equals(this.format, that.format) &&
Objects.equals(this.multilineStartPattern, that.multilineStartPattern) &&
Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
Objects.equals(this.columnNames, that.columnNames) &&
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
Objects.equals(this.delimiter, that.delimiter) &&
Objects.equals(this.quote, that.quote) &&
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
Objects.equals(this.grokPattern, that.grokPattern) &&
Objects.equals(this.timestampField, that.timestampField) &&
Objects.equals(this.jodaTimestampFormats, that.jodaTimestampFormats) &&
Objects.equals(this.javaTimestampFormats, that.javaTimestampFormats) &&
this.needClientTimezone == that.needClientTimezone &&
Objects.equals(this.mappings, that.mappings) &&
Objects.equals(this.fieldStats, that.fieldStats) &&
Objects.equals(this.explanation, that.explanation);
}
public static class Builder {
private int numLinesAnalyzed;
private int numMessagesAnalyzed;
private String sampleStart;
private String charset;
private Boolean hasByteOrderMarker;
private Format format;
private String multilineStartPattern;
private String excludeLinesPattern;
private List<String> columnNames;
private Boolean hasHeaderRow;
private Character delimiter;
private Character quote;
private Boolean shouldTrimFields;
private String grokPattern;
private String timestampField;
private List<String> jodaTimestampFormats;
private List<String> javaTimestampFormats;
private boolean needClientTimezone;
private Map<String, Object> mappings = Collections.emptyMap();
private Map<String, Object> ingestPipeline;
private Map<String, FieldStats> fieldStats = Collections.emptyMap();
private List<String> explanation;
Builder() {
this(Format.SEMI_STRUCTURED_TEXT);
}
Builder(Format format) {
setFormat(format);
}
Builder setNumLinesAnalyzed(int numLinesAnalyzed) {
this.numLinesAnalyzed = numLinesAnalyzed;
return this;
}
Builder setNumMessagesAnalyzed(int numMessagesAnalyzed) {
this.numMessagesAnalyzed = numMessagesAnalyzed;
return this;
}
Builder setSampleStart(String sampleStart) {
this.sampleStart = Objects.requireNonNull(sampleStart);
return this;
}
Builder setCharset(String charset) {
this.charset = Objects.requireNonNull(charset);
return this;
}
Builder setHasByteOrderMarker(Boolean hasByteOrderMarker) {
this.hasByteOrderMarker = hasByteOrderMarker;
return this;
}
Builder setFormat(Format format) {
this.format = Objects.requireNonNull(format);
return this;
}
Builder setMultilineStartPattern(String multilineStartPattern) {
this.multilineStartPattern = multilineStartPattern;
return this;
}
Builder setExcludeLinesPattern(String excludeLinesPattern) {
this.excludeLinesPattern = excludeLinesPattern;
return this;
}
Builder setColumnNames(List<String> columnNames) {
this.columnNames = columnNames;
return this;
}
Builder setHasHeaderRow(Boolean hasHeaderRow) {
this.hasHeaderRow = hasHeaderRow;
return this;
}
Builder setDelimiter(Character delimiter) {
this.delimiter = delimiter;
return this;
}
Builder setQuote(Character quote) {
this.quote = quote;
return this;
}
Builder setShouldTrimFields(Boolean shouldTrimFields) {
this.shouldTrimFields = shouldTrimFields;
return this;
}
Builder setGrokPattern(String grokPattern) {
this.grokPattern = grokPattern;
return this;
}
Builder setTimestampField(String timestampField) {
this.timestampField = timestampField;
return this;
}
Builder setJodaTimestampFormats(List<String> jodaTimestampFormats) {
this.jodaTimestampFormats = jodaTimestampFormats;
return this;
}
Builder setJavaTimestampFormats(List<String> javaTimestampFormats) {
this.javaTimestampFormats = javaTimestampFormats;
return this;
}
Builder setNeedClientTimezone(boolean needClientTimezone) {
this.needClientTimezone = needClientTimezone;
return this;
}
Builder setMappings(Map<String, Object> mappings) {
this.mappings = Objects.requireNonNull(mappings);
return this;
}
Builder setIngestPipeline(Map<String, Object> ingestPipeline) {
this.ingestPipeline = ingestPipeline;
return this;
}
Builder setFieldStats(Map<String, FieldStats> fieldStats) {
this.fieldStats = Objects.requireNonNull(fieldStats);
return this;
}
Builder setExplanation(List<String> explanation) {
this.explanation = explanation;
return this;
}
public FileStructure build() {
return new FileStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern,
timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, ingestPipeline, fieldStats,
explanation);
}
}
}

View File

@ -32,6 +32,8 @@ import org.elasticsearch.client.ml.DeleteFilterRequest;
import org.elasticsearch.client.ml.DeleteForecastRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
import org.elasticsearch.client.ml.FindFileStructureRequest;
import org.elasticsearch.client.ml.FindFileStructureRequestTests;
import org.elasticsearch.client.ml.FlushJobRequest;
import org.elasticsearch.client.ml.ForecastJobRequest;
import org.elasticsearch.client.ml.GetBucketsRequest;
@ -69,6 +71,7 @@ import org.elasticsearch.client.ml.calendars.ScheduledEvent;
import org.elasticsearch.client.ml.calendars.ScheduledEventTests;
import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
import org.elasticsearch.client.ml.datafeed.DatafeedConfigTests;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
import org.elasticsearch.client.ml.job.config.Detector;
import org.elasticsearch.client.ml.job.config.Job;
@ -87,6 +90,7 @@ import org.elasticsearch.test.ESTestCase;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@ -715,6 +719,85 @@ public class MLRequestConvertersTests extends ESTestCase {
assertNull(request.getEntity());
}
public void testFindFileStructure() throws Exception {
String sample = randomAlphaOfLength(randomIntBetween(1000, 2000));
FindFileStructureRequest findFileStructureRequest = FindFileStructureRequestTests.createTestRequestWithoutSample();
findFileStructureRequest.setSample(sample.getBytes(StandardCharsets.UTF_8));
Request request = MLRequestConverters.findFileStructure(findFileStructureRequest);
assertEquals(HttpPost.METHOD_NAME, request.getMethod());
assertEquals("/_xpack/ml/find_file_structure", request.getEndpoint());
if (findFileStructureRequest.getLinesToSample() != null) {
assertEquals(findFileStructureRequest.getLinesToSample(), Integer.valueOf(request.getParameters().get("lines_to_sample")));
} else {
assertNull(request.getParameters().get("lines_to_sample"));
}
if (findFileStructureRequest.getTimeout() != null) {
assertEquals(findFileStructureRequest.getTimeout().toString(), request.getParameters().get("timeout"));
} else {
assertNull(request.getParameters().get("timeout"));
}
if (findFileStructureRequest.getCharset() != null) {
assertEquals(findFileStructureRequest.getCharset(), request.getParameters().get("charset"));
} else {
assertNull(request.getParameters().get("charset"));
}
if (findFileStructureRequest.getFormat() != null) {
assertEquals(findFileStructureRequest.getFormat(), FileStructure.Format.fromString(request.getParameters().get("format")));
} else {
assertNull(request.getParameters().get("format"));
}
if (findFileStructureRequest.getColumnNames() != null) {
assertEquals(findFileStructureRequest.getColumnNames(),
Arrays.asList(Strings.splitStringByCommaToArray(request.getParameters().get("column_names"))));
} else {
assertNull(request.getParameters().get("column_names"));
}
if (findFileStructureRequest.getHasHeaderRow() != null) {
assertEquals(findFileStructureRequest.getHasHeaderRow(), Boolean.valueOf(request.getParameters().get("has_header_row")));
} else {
assertNull(request.getParameters().get("has_header_row"));
}
if (findFileStructureRequest.getDelimiter() != null) {
assertEquals(findFileStructureRequest.getDelimiter().toString(), request.getParameters().get("delimiter"));
} else {
assertNull(request.getParameters().get("delimiter"));
}
if (findFileStructureRequest.getQuote() != null) {
assertEquals(findFileStructureRequest.getQuote().toString(), request.getParameters().get("quote"));
} else {
assertNull(request.getParameters().get("quote"));
}
if (findFileStructureRequest.getShouldTrimFields() != null) {
assertEquals(findFileStructureRequest.getShouldTrimFields(),
Boolean.valueOf(request.getParameters().get("should_trim_fields")));
} else {
assertNull(request.getParameters().get("should_trim_fields"));
}
if (findFileStructureRequest.getGrokPattern() != null) {
assertEquals(findFileStructureRequest.getGrokPattern(), request.getParameters().get("grok_pattern"));
} else {
assertNull(request.getParameters().get("grok_pattern"));
}
if (findFileStructureRequest.getTimestampFormat() != null) {
assertEquals(findFileStructureRequest.getTimestampFormat(), request.getParameters().get("timestamp_format"));
} else {
assertNull(request.getParameters().get("timestamp_format"));
}
if (findFileStructureRequest.getTimestampField() != null) {
assertEquals(findFileStructureRequest.getTimestampField(), request.getParameters().get("timestamp_field"));
} else {
assertNull(request.getParameters().get("timestamp_field"));
}
if (findFileStructureRequest.getExplain() != null) {
assertEquals(findFileStructureRequest.getExplain(), Boolean.valueOf(request.getParameters().get("explain")));
} else {
assertNull(request.getParameters().get("explain"));
}
assertEquals(sample, requestEntityToString(request));
}
private static Job createValidJob(String jobId) {
AnalysisConfig.Builder analysisConfig = AnalysisConfig.builder(Collections.singletonList(
Detector.builder().setFunction("count").build()));

View File

@ -38,6 +38,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteJobResponse;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
import org.elasticsearch.client.ml.FindFileStructureRequest;
import org.elasticsearch.client.ml.FindFileStructureResponse;
import org.elasticsearch.client.ml.FlushJobRequest;
import org.elasticsearch.client.ml.FlushJobResponse;
import org.elasticsearch.client.ml.ForecastJobRequest;
@ -94,6 +96,7 @@ import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
import org.elasticsearch.client.ml.datafeed.DatafeedState;
import org.elasticsearch.client.ml.datafeed.DatafeedStats;
import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
import org.elasticsearch.client.ml.job.config.DataDescription;
import org.elasticsearch.client.ml.job.config.Detector;
@ -110,11 +113,13 @@ import org.elasticsearch.rest.RestStatus;
import org.junit.After;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@ -1306,4 +1311,43 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
assertEquals(snapshotId, model.getSnapshotId());
}
}
public void testFindFileStructure() throws IOException {
String sample = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," +
"\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," +
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" +
"{\"logger\":\"controller\",\"timestamp\":1478261151445," +
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
FindFileStructureRequest request = new FindFileStructureRequest();
request.setSample(sample.getBytes(StandardCharsets.UTF_8));
FindFileStructureResponse response =
execute(request, machineLearningClient::findFileStructure, machineLearningClient::findFileStructureAsync);
FileStructure structure = response.getFileStructure();
assertEquals(2, structure.getNumLinesAnalyzed());
assertEquals(2, structure.getNumMessagesAnalyzed());
assertEquals(sample, structure.getSampleStart());
assertEquals(FileStructure.Format.NDJSON, structure.getFormat());
assertEquals(StandardCharsets.UTF_8.displayName(Locale.ROOT), structure.getCharset());
assertFalse(structure.getHasByteOrderMarker());
assertNull(structure.getMultilineStartPattern());
assertNull(structure.getExcludeLinesPattern());
assertNull(structure.getColumnNames());
assertNull(structure.getHasHeaderRow());
assertNull(structure.getDelimiter());
assertNull(structure.getQuote());
assertNull(structure.getShouldTrimFields());
assertNull(structure.getGrokPattern());
assertEquals(Collections.singletonList("UNIX_MS"), structure.getJavaTimestampFormats());
assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
assertEquals("timestamp", structure.getTimestampField());
assertFalse(structure.needClientTimezone());
}
}

View File

@ -44,6 +44,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteJobResponse;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
import org.elasticsearch.client.ml.FindFileStructureRequest;
import org.elasticsearch.client.ml.FindFileStructureResponse;
import org.elasticsearch.client.ml.FlushJobRequest;
import org.elasticsearch.client.ml.FlushJobResponse;
import org.elasticsearch.client.ml.ForecastJobRequest;
@ -110,6 +112,7 @@ import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
import org.elasticsearch.client.ml.datafeed.DatafeedStats;
import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
import org.elasticsearch.client.ml.datafeed.DelayedDataCheckConfig;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
import org.elasticsearch.client.ml.job.config.AnalysisLimits;
import org.elasticsearch.client.ml.job.config.DataDescription;
@ -140,6 +143,9 @@ import org.elasticsearch.tasks.TaskId;
import org.junit.After;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
@ -1730,6 +1736,68 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
}
}
public void testFindFileStructure() throws Exception {
RestHighLevelClient client = highLevelClient();
Path anInterestingFile = createTempFile();
String contents = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," +
"\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," +
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" +
"{\"logger\":\"controller\",\"timestamp\":1478261151445," +
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
Files.write(anInterestingFile, Collections.singleton(contents), StandardCharsets.UTF_8);
{
// tag::find-file-structure-request
FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest(); // <1>
findFileStructureRequest.setSample(Files.readAllBytes(anInterestingFile)); // <2>
// end::find-file-structure-request
// tag::find-file-structure-request-options
findFileStructureRequest.setLinesToSample(500); // <1>
findFileStructureRequest.setExplain(true); // <2>
// end::find-file-structure-request-options
// tag::find-file-structure-execute
FindFileStructureResponse findFileStructureResponse =
client.machineLearning().findFileStructure(findFileStructureRequest, RequestOptions.DEFAULT);
// end::find-file-structure-execute
// tag::find-file-structure-response
FileStructure structure = findFileStructureResponse.getFileStructure(); // <1>
// end::find-file-structure-response
assertEquals(2, structure.getNumLinesAnalyzed());
}
{
// tag::find-file-structure-execute-listener
ActionListener<FindFileStructureResponse> listener = new ActionListener<FindFileStructureResponse>() {
@Override
public void onResponse(FindFileStructureResponse findFileStructureResponse) {
// <1>
}
@Override
public void onFailure(Exception e) {
// <2>
}
};
// end::find-file-structure-execute-listener
FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest();
findFileStructureRequest.setSample(Files.readAllBytes(anInterestingFile));
// Replace the empty listener by a blocking listener in test
final CountDownLatch latch = new CountDownLatch(1);
listener = new LatchedActionListener<>(listener, latch);
// tag::find-file-structure-execute-async
client.machineLearning().findFileStructureAsync(findFileStructureRequest, RequestOptions.DEFAULT, listener); // <1>
// end::find-file-structure-execute-async
assertTrue(latch.await(30L, TimeUnit.SECONDS));
}
}
public void testGetInfluencers() throws IOException, InterruptedException {
RestHighLevelClient client = highLevelClient();

View File

@ -0,0 +1,114 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
public class FindFileStructureRequestTests extends AbstractXContentTestCase<FindFileStructureRequest> {
private static final ObjectParser<FindFileStructureRequest, Void> PARSER =
new ObjectParser<>("find_file_structure_request", FindFileStructureRequest::new);
static {
PARSER.declareInt(FindFileStructureRequest::setLinesToSample, FindFileStructureRequest.LINES_TO_SAMPLE);
PARSER.declareString((p, c) -> p.setTimeout(TimeValue.parseTimeValue(c, FindFileStructureRequest.TIMEOUT.getPreferredName())),
FindFileStructureRequest.TIMEOUT);
PARSER.declareString(FindFileStructureRequest::setCharset, FindFileStructureRequest.CHARSET);
PARSER.declareString(FindFileStructureRequest::setFormat, FindFileStructureRequest.FORMAT);
PARSER.declareStringArray(FindFileStructureRequest::setColumnNames, FindFileStructureRequest.COLUMN_NAMES);
PARSER.declareBoolean(FindFileStructureRequest::setHasHeaderRow, FindFileStructureRequest.HAS_HEADER_ROW);
PARSER.declareString(FindFileStructureRequest::setDelimiter, FindFileStructureRequest.DELIMITER);
PARSER.declareString(FindFileStructureRequest::setQuote, FindFileStructureRequest.QUOTE);
PARSER.declareBoolean(FindFileStructureRequest::setShouldTrimFields, FindFileStructureRequest.SHOULD_TRIM_FIELDS);
PARSER.declareString(FindFileStructureRequest::setGrokPattern, FindFileStructureRequest.GROK_PATTERN);
PARSER.declareString(FindFileStructureRequest::setTimestampFormat, FindFileStructureRequest.TIMESTAMP_FORMAT);
PARSER.declareString(FindFileStructureRequest::setTimestampField, FindFileStructureRequest.TIMESTAMP_FIELD);
PARSER.declareBoolean(FindFileStructureRequest::setExplain, FindFileStructureRequest.EXPLAIN);
// Sample is not included in the X-Content representation
}
@Override
protected FindFileStructureRequest doParseInstance(XContentParser parser) throws IOException {
return PARSER.apply(parser, null);
}
@Override
protected boolean supportsUnknownFields() {
return false;
}
@Override
protected FindFileStructureRequest createTestInstance() {
return createTestRequestWithoutSample();
}
public static FindFileStructureRequest createTestRequestWithoutSample() {
FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest();
if (randomBoolean()) {
findFileStructureRequest.setLinesToSample(randomIntBetween(1000, 2000));
}
if (randomBoolean()) {
findFileStructureRequest.setTimeout(TimeValue.timeValueSeconds(randomIntBetween(10, 20)));
}
if (randomBoolean()) {
findFileStructureRequest.setCharset(Charset.defaultCharset().toString());
}
if (randomBoolean()) {
findFileStructureRequest.setFormat(randomFrom(FileStructure.Format.values()));
}
if (randomBoolean()) {
findFileStructureRequest.setColumnNames(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
}
if (randomBoolean()) {
findFileStructureRequest.setHasHeaderRow(randomBoolean());
}
if (randomBoolean()) {
findFileStructureRequest.setDelimiter(randomAlphaOfLength(1));
}
if (randomBoolean()) {
findFileStructureRequest.setQuote(randomAlphaOfLength(1));
}
if (randomBoolean()) {
findFileStructureRequest.setShouldTrimFields(randomBoolean());
}
if (randomBoolean()) {
findFileStructureRequest.setGrokPattern(randomAlphaOfLength(100));
}
if (randomBoolean()) {
findFileStructureRequest.setTimestampFormat(randomAlphaOfLength(10));
}
if (randomBoolean()) {
findFileStructureRequest.setTimestampField(randomAlphaOfLength(10));
}
if (randomBoolean()) {
findFileStructureRequest.setExplain(randomBoolean());
}
return findFileStructureRequest;
}
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml;
import org.elasticsearch.client.ml.filestructurefinder.FileStructureTests;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
import java.io.IOException;
import java.util.function.Predicate;
public class FindFileStructureResponseTests extends AbstractXContentTestCase<FindFileStructureResponse> {
@Override
protected FindFileStructureResponse createTestInstance() {
return new FindFileStructureResponse(FileStructureTests.createTestFileStructure());
}
@Override
protected FindFileStructureResponse doParseInstance(XContentParser parser) throws IOException {
return FindFileStructureResponse.fromXContent(parser);
}
@Override
protected boolean supportsUnknownFields() {
return true;
}
@Override
protected Predicate<String> getRandomFieldsExcludeFilter() {
return field -> !field.isEmpty();
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.filestructurefinder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
public class FieldStatsTests extends AbstractXContentTestCase<FieldStats> {
@Override
protected FieldStats createTestInstance() {
return createTestFieldStats();
}
static FieldStats createTestFieldStats() {
long count = randomIntBetween(1, 100000);
int cardinality = randomIntBetween(1, (int) count);
Double minValue = null;
Double maxValue = null;
Double meanValue = null;
Double medianValue = null;
boolean isMetric = randomBoolean();
if (isMetric) {
if (randomBoolean()) {
minValue = randomDouble();
maxValue = randomDouble();
} else {
minValue = (double) randomInt();
maxValue = (double) randomInt();
}
meanValue = randomDouble();
medianValue = randomDouble();
}
List<Map<String, Object>> topHits = new ArrayList<>();
for (int i = 0; i < Math.min(10, cardinality); ++i) {
Map<String, Object> topHit = new LinkedHashMap<>();
if (isMetric) {
topHit.put("value", randomBoolean() ? randomDouble() : (double) randomInt());
} else {
topHit.put("value", randomAlphaOfLength(20));
}
topHit.put("count", randomIntBetween(1, cardinality));
topHits.add(topHit);
}
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
}
@Override
protected FieldStats doParseInstance(XContentParser parser) {
return FieldStats.PARSER.apply(parser, null);
}
@Override
protected boolean supportsUnknownFields() {
return true;
}
@Override
protected Predicate<String> getRandomFieldsExcludeFilter() {
return field -> field.contains(FieldStats.TOP_HITS.getPreferredName());
}
}

View File

@ -0,0 +1,127 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.filestructurefinder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Predicate;
public class FileStructureTests extends AbstractXContentTestCase<FileStructure> {
@Override
protected FileStructure createTestInstance() {
return createTestFileStructure();
}
public static FileStructure createTestFileStructure() {
FileStructure.Format format = randomFrom(EnumSet.allOf(FileStructure.Format.class));
FileStructure.Builder builder = new FileStructure.Builder(format);
int numLinesAnalyzed = randomIntBetween(2, 10000);
builder.setNumLinesAnalyzed(numLinesAnalyzed);
int numMessagesAnalyzed = randomIntBetween(1, numLinesAnalyzed);
builder.setNumMessagesAnalyzed(numMessagesAnalyzed);
builder.setSampleStart(randomAlphaOfLength(1000));
String charset = randomFrom(Charset.availableCharsets().keySet());
builder.setCharset(charset);
if (charset.toUpperCase(Locale.ROOT).startsWith("UTF")) {
builder.setHasByteOrderMarker(randomBoolean());
}
if (numMessagesAnalyzed < numLinesAnalyzed) {
builder.setMultilineStartPattern(randomAlphaOfLength(100));
}
if (randomBoolean()) {
builder.setExcludeLinesPattern(randomAlphaOfLength(100));
}
if (format == FileStructure.Format.DELIMITED) {
builder.setColumnNames(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
builder.setHasHeaderRow(randomBoolean());
builder.setDelimiter(randomFrom(',', '\t', ';', '|'));
builder.setQuote(randomFrom('"', '\''));
}
if (format == FileStructure.Format.SEMI_STRUCTURED_TEXT) {
builder.setGrokPattern(randomAlphaOfLength(100));
}
if (format == FileStructure.Format.SEMI_STRUCTURED_TEXT || randomBoolean()) {
builder.setTimestampField(randomAlphaOfLength(10));
builder.setJodaTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
builder.setJavaTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
builder.setNeedClientTimezone(randomBoolean());
}
Map<String, Object> mappings = new TreeMap<>();
for (String field : generateRandomStringArray(5, 20, false, false)) {
mappings.put(field, Collections.singletonMap(randomAlphaOfLength(5), randomAlphaOfLength(10)));
}
builder.setMappings(mappings);
if (randomBoolean()) {
Map<String, Object> ingestPipeline = new LinkedHashMap<>();
for (String field : generateRandomStringArray(5, 20, false, false)) {
ingestPipeline.put(field, Collections.singletonMap(randomAlphaOfLength(5), randomAlphaOfLength(10)));
}
builder.setMappings(ingestPipeline);
}
if (randomBoolean()) {
Map<String, FieldStats> fieldStats = new TreeMap<>();
for (String field : generateRandomStringArray(5, 20, false, false)) {
fieldStats.put(field, FieldStatsTests.createTestFieldStats());
}
builder.setFieldStats(fieldStats);
}
builder.setExplanation(Arrays.asList(generateRandomStringArray(10, 150, false, false)));
return builder.build();
}
@Override
protected FileStructure doParseInstance(XContentParser parser) {
return FileStructure.PARSER.apply(parser, null).build();
}
@Override
protected boolean supportsUnknownFields() {
return true;
}
@Override
protected Predicate<String> getRandomFieldsExcludeFilter() {
// unknown fields are only guaranteed to be ignored at the top level - below this several data
// structures (e.g. mappings, ingest pipeline, field stats) will preserve arbitrary fields
return field -> !field.isEmpty();
}
}

View File

@ -0,0 +1,53 @@
--
:api: find-file-structure
:request: FindFileStructureRequest
:response: FindFileStructureResponse
--
[id="{upid}-{api}"]
=== Find File Structure API
The Find File Structure API can be used to find the structure of a text file
and other information that will be useful to import its contents to an {es}
index. It accepts a +{request}+ object and responds
with a +{response}+ object.
[id="{upid}-{api}-request"]
==== Find File Structure Request
A sample from the beginning of the file (or the entire file contents if
it's small) must be added to the +{request}+ object using the
`FindFileStructureRequest#setSample` method.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-request]
--------------------------------------------------
<1> Create a new `FindFileStructureRequest` object
<2> Add the contents of `anInterestingFile` to the request
==== Optional Arguments
The following arguments are optional.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-request-options]
--------------------------------------------------
<1> Set the maximum number of lines to sample (the entire sample will be
used if it contains fewer lines)
<2> Request that an explanation of the analysis be returned in the response
include::../execution.asciidoc[]
[id="{upid}-{api}-response"]
==== Find File Structure Response
A +{response}+ contains information about the file structure,
as well as mappings and an ingest pipeline that could be used
to index the contents into {es}.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-response]
--------------------------------------------------
<1> The `FileStructure` object contains the structure information