parent
813e053f62
commit
3c059ee057
|
@ -36,6 +36,7 @@ import org.elasticsearch.client.ml.DeleteFilterRequest;
|
|||
import org.elasticsearch.client.ml.DeleteForecastRequest;
|
||||
import org.elasticsearch.client.ml.DeleteJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequest;
|
||||
import org.elasticsearch.client.ml.FlushJobRequest;
|
||||
import org.elasticsearch.client.ml.ForecastJobRequest;
|
||||
import org.elasticsearch.client.ml.GetBucketsRequest;
|
||||
|
@ -70,6 +71,7 @@ import org.elasticsearch.client.ml.UpdateModelSnapshotRequest;
|
|||
import org.elasticsearch.client.ml.job.util.PageParams;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -648,4 +650,65 @@ final class MLRequestConverters {
|
|||
Request request = new Request(HttpDelete.METHOD_NAME, endpoint);
|
||||
return request;
|
||||
}
|
||||
|
||||
static Request findFileStructure(FindFileStructureRequest findFileStructureRequest) {
|
||||
String endpoint = new EndpointBuilder()
|
||||
.addPathPartAsIs("_xpack")
|
||||
.addPathPartAsIs("ml")
|
||||
.addPathPartAsIs("find_file_structure")
|
||||
.build();
|
||||
Request request = new Request(HttpPost.METHOD_NAME, endpoint);
|
||||
|
||||
RequestConverters.Params params = new RequestConverters.Params(request);
|
||||
if (findFileStructureRequest.getLinesToSample() != null) {
|
||||
params.putParam(FindFileStructureRequest.LINES_TO_SAMPLE.getPreferredName(),
|
||||
findFileStructureRequest.getLinesToSample().toString());
|
||||
}
|
||||
if (findFileStructureRequest.getTimeout() != null) {
|
||||
params.putParam(FindFileStructureRequest.TIMEOUT.getPreferredName(), findFileStructureRequest.getTimeout().toString());
|
||||
}
|
||||
if (findFileStructureRequest.getCharset() != null) {
|
||||
params.putParam(FindFileStructureRequest.CHARSET.getPreferredName(), findFileStructureRequest.getCharset());
|
||||
}
|
||||
if (findFileStructureRequest.getFormat() != null) {
|
||||
params.putParam(FindFileStructureRequest.FORMAT.getPreferredName(), findFileStructureRequest.getFormat().toString());
|
||||
}
|
||||
if (findFileStructureRequest.getColumnNames() != null) {
|
||||
params.putParam(FindFileStructureRequest.COLUMN_NAMES.getPreferredName(),
|
||||
Strings.collectionToCommaDelimitedString(findFileStructureRequest.getColumnNames()));
|
||||
}
|
||||
if (findFileStructureRequest.getHasHeaderRow() != null) {
|
||||
params.putParam(FindFileStructureRequest.HAS_HEADER_ROW.getPreferredName(),
|
||||
findFileStructureRequest.getHasHeaderRow().toString());
|
||||
}
|
||||
if (findFileStructureRequest.getDelimiter() != null) {
|
||||
params.putParam(FindFileStructureRequest.DELIMITER.getPreferredName(),
|
||||
findFileStructureRequest.getDelimiter().toString());
|
||||
}
|
||||
if (findFileStructureRequest.getQuote() != null) {
|
||||
params.putParam(FindFileStructureRequest.QUOTE.getPreferredName(), findFileStructureRequest.getQuote().toString());
|
||||
}
|
||||
if (findFileStructureRequest.getShouldTrimFields() != null) {
|
||||
params.putParam(FindFileStructureRequest.SHOULD_TRIM_FIELDS.getPreferredName(),
|
||||
findFileStructureRequest.getShouldTrimFields().toString());
|
||||
}
|
||||
if (findFileStructureRequest.getGrokPattern() != null) {
|
||||
params.putParam(FindFileStructureRequest.GROK_PATTERN.getPreferredName(), findFileStructureRequest.getGrokPattern());
|
||||
}
|
||||
if (findFileStructureRequest.getTimestampFormat() != null) {
|
||||
params.putParam(FindFileStructureRequest.TIMESTAMP_FORMAT.getPreferredName(), findFileStructureRequest.getTimestampFormat());
|
||||
}
|
||||
if (findFileStructureRequest.getTimestampField() != null) {
|
||||
params.putParam(FindFileStructureRequest.TIMESTAMP_FIELD.getPreferredName(), findFileStructureRequest.getTimestampField());
|
||||
}
|
||||
if (findFileStructureRequest.getExplain() != null) {
|
||||
params.putParam(FindFileStructureRequest.EXPLAIN.getPreferredName(), findFileStructureRequest.getExplain().toString());
|
||||
}
|
||||
|
||||
BytesReference sample = findFileStructureRequest.getSample();
|
||||
BytesRef source = sample.toBytesRef();
|
||||
HttpEntity byteEntity = new ByteArrayEntity(source.bytes, source.offset, source.length, createContentType(XContentType.JSON));
|
||||
request.setEntity(byteEntity);
|
||||
return request;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
|
|||
import org.elasticsearch.client.ml.DeleteJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteJobResponse;
|
||||
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureResponse;
|
||||
import org.elasticsearch.client.ml.FlushJobRequest;
|
||||
import org.elasticsearch.client.ml.FlushJobResponse;
|
||||
import org.elasticsearch.client.ml.ForecastJobRequest;
|
||||
|
@ -1711,4 +1713,45 @@ public final class MachineLearningClient {
|
|||
listener,
|
||||
Collections.emptySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the structure of a file
|
||||
* <p>
|
||||
* For additional info
|
||||
* see <a href="http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-find-file-structure.html">
|
||||
* ML Find File Structure documentation</a>
|
||||
*
|
||||
* @param request The find file structure request
|
||||
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
|
||||
* @return the response containing details of the file structure
|
||||
* @throws IOException when there is a serialization issue sending the request or receiving the response
|
||||
*/
|
||||
public FindFileStructureResponse findFileStructure(FindFileStructureRequest request, RequestOptions options) throws IOException {
|
||||
return restHighLevelClient.performRequestAndParseEntity(request,
|
||||
MLRequestConverters::findFileStructure,
|
||||
options,
|
||||
FindFileStructureResponse::fromXContent,
|
||||
Collections.emptySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the structure of a file asynchronously and notifies the listener on completion
|
||||
* <p>
|
||||
* For additional info
|
||||
* see <a href="http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-find-file-structure.html">
|
||||
* ML Find File Structure documentation</a>
|
||||
*
|
||||
* @param request The find file structure request
|
||||
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
|
||||
* @param listener Listener to be notified upon request completion
|
||||
*/
|
||||
public void findFileStructureAsync(FindFileStructureRequest request, RequestOptions options,
|
||||
ActionListener<FindFileStructureResponse> listener) {
|
||||
restHighLevelClient.performRequestAsyncAndParseEntity(request,
|
||||
MLRequestConverters::findFileStructure,
|
||||
options,
|
||||
FindFileStructureResponse::fromXContent,
|
||||
listener,
|
||||
Collections.emptySet());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,302 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
|
||||
import org.elasticsearch.client.Validatable;
|
||||
import org.elasticsearch.client.ValidationException;
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
public class FindFileStructureRequest implements Validatable, ToXContent {
|
||||
|
||||
public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample");
|
||||
public static final ParseField TIMEOUT = new ParseField("timeout");
|
||||
public static final ParseField CHARSET = FileStructure.CHARSET;
|
||||
public static final ParseField FORMAT = FileStructure.FORMAT;
|
||||
public static final ParseField COLUMN_NAMES = FileStructure.COLUMN_NAMES;
|
||||
public static final ParseField HAS_HEADER_ROW = FileStructure.HAS_HEADER_ROW;
|
||||
public static final ParseField DELIMITER = FileStructure.DELIMITER;
|
||||
public static final ParseField QUOTE = FileStructure.QUOTE;
|
||||
public static final ParseField SHOULD_TRIM_FIELDS = FileStructure.SHOULD_TRIM_FIELDS;
|
||||
public static final ParseField GROK_PATTERN = FileStructure.GROK_PATTERN;
|
||||
// This one is plural in FileStructure, but singular in FileStructureOverrides
|
||||
public static final ParseField TIMESTAMP_FORMAT = new ParseField("timestamp_format");
|
||||
public static final ParseField TIMESTAMP_FIELD = FileStructure.TIMESTAMP_FIELD;
|
||||
public static final ParseField EXPLAIN = new ParseField("explain");
|
||||
|
||||
private Integer linesToSample;
|
||||
private TimeValue timeout;
|
||||
private String charset;
|
||||
private FileStructure.Format format;
|
||||
private List<String> columnNames;
|
||||
private Boolean hasHeaderRow;
|
||||
private Character delimiter;
|
||||
private Character quote;
|
||||
private Boolean shouldTrimFields;
|
||||
private String grokPattern;
|
||||
private String timestampFormat;
|
||||
private String timestampField;
|
||||
private Boolean explain;
|
||||
private BytesReference sample;
|
||||
|
||||
public FindFileStructureRequest() {
|
||||
}
|
||||
|
||||
public Integer getLinesToSample() {
|
||||
return linesToSample;
|
||||
}
|
||||
|
||||
public void setLinesToSample(Integer linesToSample) {
|
||||
this.linesToSample = linesToSample;
|
||||
}
|
||||
|
||||
public TimeValue getTimeout() {
|
||||
return timeout;
|
||||
}
|
||||
|
||||
public void setTimeout(TimeValue timeout) {
|
||||
this.timeout = timeout;
|
||||
}
|
||||
|
||||
public String getCharset() {
|
||||
return charset;
|
||||
}
|
||||
|
||||
public void setCharset(String charset) {
|
||||
this.charset = (charset == null || charset.isEmpty()) ? null : charset;
|
||||
}
|
||||
|
||||
public FileStructure.Format getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public void setFormat(FileStructure.Format format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public void setFormat(String format) {
|
||||
this.format = (format == null || format.isEmpty()) ? null : FileStructure.Format.fromString(format);
|
||||
}
|
||||
|
||||
public List<String> getColumnNames() {
|
||||
return columnNames;
|
||||
}
|
||||
|
||||
public void setColumnNames(List<String> columnNames) {
|
||||
this.columnNames = (columnNames == null || columnNames.isEmpty()) ? null : columnNames;
|
||||
}
|
||||
|
||||
public void setColumnNames(String[] columnNames) {
|
||||
this.columnNames = (columnNames == null || columnNames.length == 0) ? null : Arrays.asList(columnNames);
|
||||
}
|
||||
|
||||
public Boolean getHasHeaderRow() {
|
||||
return hasHeaderRow;
|
||||
}
|
||||
|
||||
public void setHasHeaderRow(Boolean hasHeaderRow) {
|
||||
this.hasHeaderRow = hasHeaderRow;
|
||||
}
|
||||
|
||||
public Character getDelimiter() {
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
public void setDelimiter(Character delimiter) {
|
||||
this.delimiter = delimiter;
|
||||
}
|
||||
|
||||
public void setDelimiter(String delimiter) {
|
||||
if (delimiter == null || delimiter.isEmpty()) {
|
||||
this.delimiter = null;
|
||||
} else if (delimiter.length() == 1) {
|
||||
this.delimiter = delimiter.charAt(0);
|
||||
} else {
|
||||
throw new IllegalArgumentException(DELIMITER.getPreferredName() + " must be a single character");
|
||||
}
|
||||
}
|
||||
|
||||
public Character getQuote() {
|
||||
return quote;
|
||||
}
|
||||
|
||||
public void setQuote(Character quote) {
|
||||
this.quote = quote;
|
||||
}
|
||||
|
||||
public void setQuote(String quote) {
|
||||
if (quote == null || quote.isEmpty()) {
|
||||
this.quote = null;
|
||||
} else if (quote.length() == 1) {
|
||||
this.quote = quote.charAt(0);
|
||||
} else {
|
||||
throw new IllegalArgumentException(QUOTE.getPreferredName() + " must be a single character");
|
||||
}
|
||||
}
|
||||
|
||||
public Boolean getShouldTrimFields() {
|
||||
return shouldTrimFields;
|
||||
}
|
||||
|
||||
public void setShouldTrimFields(Boolean shouldTrimFields) {
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
}
|
||||
|
||||
public String getGrokPattern() {
|
||||
return grokPattern;
|
||||
}
|
||||
|
||||
public void setGrokPattern(String grokPattern) {
|
||||
this.grokPattern = (grokPattern == null || grokPattern.isEmpty()) ? null : grokPattern;
|
||||
}
|
||||
|
||||
public String getTimestampFormat() {
|
||||
return timestampFormat;
|
||||
}
|
||||
|
||||
public void setTimestampFormat(String timestampFormat) {
|
||||
this.timestampFormat = (timestampFormat == null || timestampFormat.isEmpty()) ? null : timestampFormat;
|
||||
}
|
||||
|
||||
public String getTimestampField() {
|
||||
return timestampField;
|
||||
}
|
||||
|
||||
public void setTimestampField(String timestampField) {
|
||||
this.timestampField = (timestampField == null || timestampField.isEmpty()) ? null : timestampField;
|
||||
}
|
||||
|
||||
public Boolean getExplain() {
|
||||
return explain;
|
||||
}
|
||||
|
||||
public void setExplain(Boolean explain) {
|
||||
this.explain = explain;
|
||||
}
|
||||
|
||||
public BytesReference getSample() {
|
||||
return sample;
|
||||
}
|
||||
|
||||
public void setSample(byte[] sample) {
|
||||
this.sample = new BytesArray(sample);
|
||||
}
|
||||
|
||||
public void setSample(BytesReference sample) {
|
||||
this.sample = Objects.requireNonNull(sample);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<ValidationException> validate() {
|
||||
ValidationException validationException = new ValidationException();
|
||||
if (sample == null || sample.length() == 0) {
|
||||
validationException.addValidationError("sample must be specified");
|
||||
}
|
||||
return validationException.validationErrors().isEmpty() ? Optional.empty() : Optional.of(validationException);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
|
||||
if (linesToSample != null) {
|
||||
builder.field(LINES_TO_SAMPLE.getPreferredName(), linesToSample);
|
||||
}
|
||||
if (timeout != null) {
|
||||
builder.field(TIMEOUT.getPreferredName(), timeout);
|
||||
}
|
||||
if (charset != null) {
|
||||
builder.field(CHARSET.getPreferredName(), charset);
|
||||
}
|
||||
if (format != null) {
|
||||
builder.field(FORMAT.getPreferredName(), format);
|
||||
}
|
||||
if (columnNames != null) {
|
||||
builder.field(COLUMN_NAMES.getPreferredName(), columnNames);
|
||||
}
|
||||
if (hasHeaderRow != null) {
|
||||
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow);
|
||||
}
|
||||
if (delimiter != null) {
|
||||
builder.field(DELIMITER.getPreferredName(), delimiter.toString());
|
||||
}
|
||||
if (quote != null) {
|
||||
builder.field(QUOTE.getPreferredName(), quote.toString());
|
||||
}
|
||||
if (shouldTrimFields != null) {
|
||||
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields);
|
||||
}
|
||||
if (grokPattern != null) {
|
||||
builder.field(GROK_PATTERN.getPreferredName(), grokPattern);
|
||||
}
|
||||
if (timestampFormat != null) {
|
||||
builder.field(TIMESTAMP_FORMAT.getPreferredName(), timestampFormat);
|
||||
}
|
||||
if (timestampField != null) {
|
||||
builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField);
|
||||
}
|
||||
if (explain != null) {
|
||||
builder.field(EXPLAIN.getPreferredName(), explain);
|
||||
}
|
||||
// Sample is not included in the X-Content representation
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(linesToSample, timeout, charset, format, columnNames, hasHeaderRow, delimiter, grokPattern, timestampFormat,
|
||||
timestampField, explain, sample);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other == null || getClass() != other.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FindFileStructureRequest that = (FindFileStructureRequest) other;
|
||||
return Objects.equals(this.linesToSample, that.linesToSample) &&
|
||||
Objects.equals(this.timeout, that.timeout) &&
|
||||
Objects.equals(this.charset, that.charset) &&
|
||||
Objects.equals(this.format, that.format) &&
|
||||
Objects.equals(this.columnNames, that.columnNames) &&
|
||||
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
|
||||
Objects.equals(this.delimiter, that.delimiter) &&
|
||||
Objects.equals(this.grokPattern, that.grokPattern) &&
|
||||
Objects.equals(this.timestampFormat, that.timestampFormat) &&
|
||||
Objects.equals(this.timestampField, that.timestampField) &&
|
||||
Objects.equals(this.explain, that.explain) &&
|
||||
Objects.equals(this.sample, that.sample);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
public class FindFileStructureResponse implements ToXContentObject {
|
||||
|
||||
private final FileStructure fileStructure;
|
||||
|
||||
FindFileStructureResponse(FileStructure fileStructure) {
|
||||
this.fileStructure = Objects.requireNonNull(fileStructure);
|
||||
}
|
||||
|
||||
public static FindFileStructureResponse fromXContent(XContentParser parser) throws IOException {
|
||||
return new FindFileStructureResponse(FileStructure.PARSER.parse(parser, null).build());
|
||||
}
|
||||
|
||||
public FileStructure getFileStructure() {
|
||||
return fileStructure;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
fileStructure.toXContent(builder, params);
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(fileStructure);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other == null || getClass() != other.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FindFileStructureResponse that = (FindFileStructureResponse) other;
|
||||
return Objects.equals(fileStructure, that.fileStructure);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml.filestructurefinder;
|
||||
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public class FieldStats implements ToXContentObject {
|
||||
|
||||
public static final ParseField COUNT = new ParseField("count");
|
||||
public static final ParseField CARDINALITY = new ParseField("cardinality");
|
||||
public static final ParseField MIN_VALUE = new ParseField("min_value");
|
||||
public static final ParseField MAX_VALUE = new ParseField("max_value");
|
||||
public static final ParseField MEAN_VALUE = new ParseField("mean_value");
|
||||
public static final ParseField MEDIAN_VALUE = new ParseField("median_value");
|
||||
public static final ParseField TOP_HITS = new ParseField("top_hits");
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", true,
|
||||
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
|
||||
(List<Map<String, Object>>) a[6]));
|
||||
|
||||
static {
|
||||
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
|
||||
PARSER.declareInt(ConstructingObjectParser.constructorArg(), CARDINALITY);
|
||||
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MIN_VALUE);
|
||||
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
|
||||
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
|
||||
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
|
||||
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
|
||||
}
|
||||
|
||||
private final long count;
|
||||
private final int cardinality;
|
||||
private final Double minValue;
|
||||
private final Double maxValue;
|
||||
private final Double meanValue;
|
||||
private final Double medianValue;
|
||||
private final List<Map<String, Object>> topHits;
|
||||
|
||||
FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
|
||||
List<Map<String, Object>> topHits) {
|
||||
this.count = count;
|
||||
this.cardinality = cardinality;
|
||||
this.minValue = minValue;
|
||||
this.maxValue = maxValue;
|
||||
this.meanValue = meanValue;
|
||||
this.medianValue = medianValue;
|
||||
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
|
||||
}
|
||||
|
||||
public long getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public int getCardinality() {
|
||||
return cardinality;
|
||||
}
|
||||
|
||||
public Double getMinValue() {
|
||||
return minValue;
|
||||
}
|
||||
|
||||
public Double getMaxValue() {
|
||||
return maxValue;
|
||||
}
|
||||
|
||||
public Double getMeanValue() {
|
||||
return meanValue;
|
||||
}
|
||||
|
||||
public Double getMedianValue() {
|
||||
return medianValue;
|
||||
}
|
||||
|
||||
public List<Map<String, Object>> getTopHits() {
|
||||
return topHits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
|
||||
|
||||
builder.startObject();
|
||||
builder.field(COUNT.getPreferredName(), count);
|
||||
builder.field(CARDINALITY.getPreferredName(), cardinality);
|
||||
if (minValue != null) {
|
||||
builder.field(MIN_VALUE.getPreferredName(), toIntegerIfInteger(minValue));
|
||||
}
|
||||
if (maxValue != null) {
|
||||
builder.field(MAX_VALUE.getPreferredName(), toIntegerIfInteger(maxValue));
|
||||
}
|
||||
if (meanValue != null) {
|
||||
builder.field(MEAN_VALUE.getPreferredName(), toIntegerIfInteger(meanValue));
|
||||
}
|
||||
if (medianValue != null) {
|
||||
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
|
||||
}
|
||||
if (topHits.isEmpty() == false) {
|
||||
builder.field(TOP_HITS.getPreferredName(), topHits);
|
||||
}
|
||||
builder.endObject();
|
||||
|
||||
return builder;
|
||||
}
|
||||
|
||||
static Number toIntegerIfInteger(double d) {
|
||||
|
||||
if (d >= Integer.MIN_VALUE && d <= Integer.MAX_VALUE && Double.compare(d, StrictMath.rint(d)) == 0) {
|
||||
return (int) d;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other == null || getClass() != other.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FieldStats that = (FieldStats) other;
|
||||
return this.count == that.count &&
|
||||
this.cardinality == that.cardinality &&
|
||||
Objects.equals(this.minValue, that.minValue) &&
|
||||
Objects.equals(this.maxValue, that.maxValue) &&
|
||||
Objects.equals(this.meanValue, that.meanValue) &&
|
||||
Objects.equals(this.medianValue, that.medianValue) &&
|
||||
Objects.equals(this.topHits, that.topHits);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,516 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml.filestructurefinder;
|
||||
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Stores the file format determined by Machine Learning.
|
||||
*/
|
||||
public class FileStructure implements ToXContentObject {
|
||||
|
||||
public enum Format {
|
||||
|
||||
NDJSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
|
||||
|
||||
public static Format fromString(String name) {
|
||||
return valueOf(name.trim().toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
}
|
||||
|
||||
public static final ParseField NUM_LINES_ANALYZED = new ParseField("num_lines_analyzed");
|
||||
public static final ParseField NUM_MESSAGES_ANALYZED = new ParseField("num_messages_analyzed");
|
||||
public static final ParseField SAMPLE_START = new ParseField("sample_start");
|
||||
public static final ParseField CHARSET = new ParseField("charset");
|
||||
public static final ParseField HAS_BYTE_ORDER_MARKER = new ParseField("has_byte_order_marker");
|
||||
public static final ParseField FORMAT = new ParseField("format");
|
||||
public static final ParseField MULTILINE_START_PATTERN = new ParseField("multiline_start_pattern");
|
||||
public static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
|
||||
public static final ParseField COLUMN_NAMES = new ParseField("column_names");
|
||||
public static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
|
||||
public static final ParseField DELIMITER = new ParseField("delimiter");
|
||||
public static final ParseField QUOTE = new ParseField("quote");
|
||||
public static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
|
||||
public static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
|
||||
public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
|
||||
public static final ParseField JODA_TIMESTAMP_FORMATS = new ParseField("joda_timestamp_formats");
|
||||
public static final ParseField JAVA_TIMESTAMP_FORMATS = new ParseField("java_timestamp_formats");
|
||||
public static final ParseField NEED_CLIENT_TIMEZONE = new ParseField("need_client_timezone");
|
||||
public static final ParseField MAPPINGS = new ParseField("mappings");
|
||||
public static final ParseField INGEST_PIPELINE = new ParseField("ingest_pipeline");
|
||||
public static final ParseField FIELD_STATS = new ParseField("field_stats");
|
||||
public static final ParseField EXPLANATION = new ParseField("explanation");
|
||||
|
||||
public static final ObjectParser<Builder, Void> PARSER = new ObjectParser<>("file_structure", true, Builder::new);
|
||||
|
||||
static {
|
||||
PARSER.declareInt(Builder::setNumLinesAnalyzed, NUM_LINES_ANALYZED);
|
||||
PARSER.declareInt(Builder::setNumMessagesAnalyzed, NUM_MESSAGES_ANALYZED);
|
||||
PARSER.declareString(Builder::setSampleStart, SAMPLE_START);
|
||||
PARSER.declareString(Builder::setCharset, CHARSET);
|
||||
PARSER.declareBoolean(Builder::setHasByteOrderMarker, HAS_BYTE_ORDER_MARKER);
|
||||
PARSER.declareString((p, c) -> p.setFormat(Format.fromString(c)), FORMAT);
|
||||
PARSER.declareString(Builder::setMultilineStartPattern, MULTILINE_START_PATTERN);
|
||||
PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
|
||||
PARSER.declareStringArray(Builder::setColumnNames, COLUMN_NAMES);
|
||||
PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
|
||||
PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER);
|
||||
PARSER.declareString((p, c) -> p.setQuote(c.charAt(0)), QUOTE);
|
||||
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
|
||||
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
|
||||
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
|
||||
PARSER.declareStringArray(Builder::setJodaTimestampFormats, JODA_TIMESTAMP_FORMATS);
|
||||
PARSER.declareStringArray(Builder::setJavaTimestampFormats, JAVA_TIMESTAMP_FORMATS);
|
||||
PARSER.declareBoolean(Builder::setNeedClientTimezone, NEED_CLIENT_TIMEZONE);
|
||||
PARSER.declareObject(Builder::setMappings, (p, c) -> new TreeMap<>(p.map()), MAPPINGS);
|
||||
PARSER.declareObject(Builder::setIngestPipeline, (p, c) -> p.mapOrdered(), INGEST_PIPELINE);
|
||||
PARSER.declareObject(Builder::setFieldStats, (p, c) -> {
|
||||
Map<String, FieldStats> fieldStats = new TreeMap<>();
|
||||
while (p.nextToken() == XContentParser.Token.FIELD_NAME) {
|
||||
fieldStats.put(p.currentName(), FieldStats.PARSER.apply(p, c));
|
||||
}
|
||||
return fieldStats;
|
||||
}, FIELD_STATS);
|
||||
PARSER.declareStringArray(Builder::setExplanation, EXPLANATION);
|
||||
}
|
||||
|
||||
private final int numLinesAnalyzed;
|
||||
private final int numMessagesAnalyzed;
|
||||
private final String sampleStart;
|
||||
private final String charset;
|
||||
private final Boolean hasByteOrderMarker;
|
||||
private final Format format;
|
||||
private final String multilineStartPattern;
|
||||
private final String excludeLinesPattern;
|
||||
private final List<String> columnNames;
|
||||
private final Boolean hasHeaderRow;
|
||||
private final Character delimiter;
|
||||
private final Character quote;
|
||||
private final Boolean shouldTrimFields;
|
||||
private final String grokPattern;
|
||||
private final List<String> jodaTimestampFormats;
|
||||
private final List<String> javaTimestampFormats;
|
||||
private final String timestampField;
|
||||
private final boolean needClientTimezone;
|
||||
private final SortedMap<String, Object> mappings;
|
||||
private final Map<String, Object> ingestPipeline;
|
||||
private final SortedMap<String, FieldStats> fieldStats;
|
||||
private final List<String> explanation;
|
||||
|
||||
private FileStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
|
||||
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> columnNames,
|
||||
Boolean hasHeaderRow, Character delimiter, Character quote, Boolean shouldTrimFields, String grokPattern,
|
||||
String timestampField, List<String> jodaTimestampFormats, List<String> javaTimestampFormats,
|
||||
boolean needClientTimezone, Map<String, Object> mappings, Map<String, Object> ingestPipeline,
|
||||
Map<String, FieldStats> fieldStats, List<String> explanation) {
|
||||
|
||||
this.numLinesAnalyzed = numLinesAnalyzed;
|
||||
this.numMessagesAnalyzed = numMessagesAnalyzed;
|
||||
this.sampleStart = Objects.requireNonNull(sampleStart);
|
||||
this.charset = Objects.requireNonNull(charset);
|
||||
this.hasByteOrderMarker = hasByteOrderMarker;
|
||||
this.format = Objects.requireNonNull(format);
|
||||
this.multilineStartPattern = multilineStartPattern;
|
||||
this.excludeLinesPattern = excludeLinesPattern;
|
||||
this.columnNames = (columnNames == null) ? null : Collections.unmodifiableList(new ArrayList<>(columnNames));
|
||||
this.hasHeaderRow = hasHeaderRow;
|
||||
this.delimiter = delimiter;
|
||||
this.quote = quote;
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
this.grokPattern = grokPattern;
|
||||
this.timestampField = timestampField;
|
||||
this.jodaTimestampFormats =
|
||||
(jodaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(jodaTimestampFormats));
|
||||
this.javaTimestampFormats =
|
||||
(javaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(javaTimestampFormats));
|
||||
this.needClientTimezone = needClientTimezone;
|
||||
this.mappings = Collections.unmodifiableSortedMap(new TreeMap<>(mappings));
|
||||
this.ingestPipeline = (ingestPipeline == null) ? null : Collections.unmodifiableMap(new LinkedHashMap<>(ingestPipeline));
|
||||
this.fieldStats = Collections.unmodifiableSortedMap(new TreeMap<>(fieldStats));
|
||||
this.explanation = (explanation == null) ? null : Collections.unmodifiableList(new ArrayList<>(explanation));
|
||||
}
|
||||
|
||||
public int getNumLinesAnalyzed() {
|
||||
return numLinesAnalyzed;
|
||||
}
|
||||
|
||||
public int getNumMessagesAnalyzed() {
|
||||
return numMessagesAnalyzed;
|
||||
}
|
||||
|
||||
public String getSampleStart() {
|
||||
return sampleStart;
|
||||
}
|
||||
|
||||
public String getCharset() {
|
||||
return charset;
|
||||
}
|
||||
|
||||
public Boolean getHasByteOrderMarker() {
|
||||
return hasByteOrderMarker;
|
||||
}
|
||||
|
||||
public Format getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public String getMultilineStartPattern() {
|
||||
return multilineStartPattern;
|
||||
}
|
||||
|
||||
public String getExcludeLinesPattern() {
|
||||
return excludeLinesPattern;
|
||||
}
|
||||
|
||||
public List<String> getColumnNames() {
|
||||
return columnNames;
|
||||
}
|
||||
|
||||
public Boolean getHasHeaderRow() {
|
||||
return hasHeaderRow;
|
||||
}
|
||||
|
||||
public Character getDelimiter() {
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
public Character getQuote() {
|
||||
return quote;
|
||||
}
|
||||
|
||||
public Boolean getShouldTrimFields() {
|
||||
return shouldTrimFields;
|
||||
}
|
||||
|
||||
public String getGrokPattern() {
|
||||
return grokPattern;
|
||||
}
|
||||
|
||||
public String getTimestampField() {
|
||||
return timestampField;
|
||||
}
|
||||
|
||||
public List<String> getJodaTimestampFormats() {
|
||||
return jodaTimestampFormats;
|
||||
}
|
||||
|
||||
public List<String> getJavaTimestampFormats() {
|
||||
return javaTimestampFormats;
|
||||
}
|
||||
|
||||
public boolean needClientTimezone() {
|
||||
return needClientTimezone;
|
||||
}
|
||||
|
||||
public SortedMap<String, Object> getMappings() {
|
||||
return mappings;
|
||||
}
|
||||
|
||||
public Map<String, Object> getIngestPipeline() {
|
||||
return ingestPipeline;
|
||||
}
|
||||
|
||||
public SortedMap<String, FieldStats> getFieldStats() {
|
||||
return fieldStats;
|
||||
}
|
||||
|
||||
public List<String> getExplanation() {
|
||||
return explanation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
|
||||
builder.startObject();
|
||||
builder.field(NUM_LINES_ANALYZED.getPreferredName(), numLinesAnalyzed);
|
||||
builder.field(NUM_MESSAGES_ANALYZED.getPreferredName(), numMessagesAnalyzed);
|
||||
builder.field(SAMPLE_START.getPreferredName(), sampleStart);
|
||||
builder.field(CHARSET.getPreferredName(), charset);
|
||||
if (hasByteOrderMarker != null) {
|
||||
builder.field(HAS_BYTE_ORDER_MARKER.getPreferredName(), hasByteOrderMarker.booleanValue());
|
||||
}
|
||||
builder.field(FORMAT.getPreferredName(), format);
|
||||
if (multilineStartPattern != null && multilineStartPattern.isEmpty() == false) {
|
||||
builder.field(MULTILINE_START_PATTERN.getPreferredName(), multilineStartPattern);
|
||||
}
|
||||
if (excludeLinesPattern != null && excludeLinesPattern.isEmpty() == false) {
|
||||
builder.field(EXCLUDE_LINES_PATTERN.getPreferredName(), excludeLinesPattern);
|
||||
}
|
||||
if (columnNames != null && columnNames.isEmpty() == false) {
|
||||
builder.field(COLUMN_NAMES.getPreferredName(), columnNames);
|
||||
}
|
||||
if (hasHeaderRow != null) {
|
||||
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
|
||||
}
|
||||
if (delimiter != null) {
|
||||
builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter));
|
||||
}
|
||||
if (quote != null) {
|
||||
builder.field(QUOTE.getPreferredName(), String.valueOf(quote));
|
||||
}
|
||||
if (shouldTrimFields != null) {
|
||||
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
|
||||
}
|
||||
if (grokPattern != null && grokPattern.isEmpty() == false) {
|
||||
builder.field(GROK_PATTERN.getPreferredName(), grokPattern);
|
||||
}
|
||||
if (timestampField != null && timestampField.isEmpty() == false) {
|
||||
builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField);
|
||||
}
|
||||
if (jodaTimestampFormats != null && jodaTimestampFormats.isEmpty() == false) {
|
||||
builder.field(JODA_TIMESTAMP_FORMATS.getPreferredName(), jodaTimestampFormats);
|
||||
}
|
||||
if (javaTimestampFormats != null && javaTimestampFormats.isEmpty() == false) {
|
||||
builder.field(JAVA_TIMESTAMP_FORMATS.getPreferredName(), javaTimestampFormats);
|
||||
}
|
||||
builder.field(NEED_CLIENT_TIMEZONE.getPreferredName(), needClientTimezone);
|
||||
builder.field(MAPPINGS.getPreferredName(), mappings);
|
||||
if (ingestPipeline != null) {
|
||||
builder.field(INGEST_PIPELINE.getPreferredName(), ingestPipeline);
|
||||
}
|
||||
if (fieldStats.isEmpty() == false) {
|
||||
builder.startObject(FIELD_STATS.getPreferredName());
|
||||
for (Map.Entry<String, FieldStats> entry : fieldStats.entrySet()) {
|
||||
builder.field(entry.getKey(), entry.getValue());
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
if (explanation != null && explanation.isEmpty() == false) {
|
||||
builder.field(EXPLANATION.getPreferredName(), explanation);
|
||||
}
|
||||
builder.endObject();
|
||||
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
||||
multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern,
|
||||
timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, fieldStats, explanation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other == null || getClass() != other.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FileStructure that = (FileStructure) other;
|
||||
return this.numLinesAnalyzed == that.numLinesAnalyzed &&
|
||||
this.numMessagesAnalyzed == that.numMessagesAnalyzed &&
|
||||
Objects.equals(this.sampleStart, that.sampleStart) &&
|
||||
Objects.equals(this.charset, that.charset) &&
|
||||
Objects.equals(this.hasByteOrderMarker, that.hasByteOrderMarker) &&
|
||||
Objects.equals(this.format, that.format) &&
|
||||
Objects.equals(this.multilineStartPattern, that.multilineStartPattern) &&
|
||||
Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
|
||||
Objects.equals(this.columnNames, that.columnNames) &&
|
||||
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
|
||||
Objects.equals(this.delimiter, that.delimiter) &&
|
||||
Objects.equals(this.quote, that.quote) &&
|
||||
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
|
||||
Objects.equals(this.grokPattern, that.grokPattern) &&
|
||||
Objects.equals(this.timestampField, that.timestampField) &&
|
||||
Objects.equals(this.jodaTimestampFormats, that.jodaTimestampFormats) &&
|
||||
Objects.equals(this.javaTimestampFormats, that.javaTimestampFormats) &&
|
||||
this.needClientTimezone == that.needClientTimezone &&
|
||||
Objects.equals(this.mappings, that.mappings) &&
|
||||
Objects.equals(this.fieldStats, that.fieldStats) &&
|
||||
Objects.equals(this.explanation, that.explanation);
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private int numLinesAnalyzed;
|
||||
private int numMessagesAnalyzed;
|
||||
private String sampleStart;
|
||||
private String charset;
|
||||
private Boolean hasByteOrderMarker;
|
||||
private Format format;
|
||||
private String multilineStartPattern;
|
||||
private String excludeLinesPattern;
|
||||
private List<String> columnNames;
|
||||
private Boolean hasHeaderRow;
|
||||
private Character delimiter;
|
||||
private Character quote;
|
||||
private Boolean shouldTrimFields;
|
||||
private String grokPattern;
|
||||
private String timestampField;
|
||||
private List<String> jodaTimestampFormats;
|
||||
private List<String> javaTimestampFormats;
|
||||
private boolean needClientTimezone;
|
||||
private Map<String, Object> mappings = Collections.emptyMap();
|
||||
private Map<String, Object> ingestPipeline;
|
||||
private Map<String, FieldStats> fieldStats = Collections.emptyMap();
|
||||
private List<String> explanation;
|
||||
|
||||
Builder() {
|
||||
this(Format.SEMI_STRUCTURED_TEXT);
|
||||
}
|
||||
|
||||
Builder(Format format) {
|
||||
setFormat(format);
|
||||
}
|
||||
|
||||
Builder setNumLinesAnalyzed(int numLinesAnalyzed) {
|
||||
this.numLinesAnalyzed = numLinesAnalyzed;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setNumMessagesAnalyzed(int numMessagesAnalyzed) {
|
||||
this.numMessagesAnalyzed = numMessagesAnalyzed;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setSampleStart(String sampleStart) {
|
||||
this.sampleStart = Objects.requireNonNull(sampleStart);
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setCharset(String charset) {
|
||||
this.charset = Objects.requireNonNull(charset);
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setHasByteOrderMarker(Boolean hasByteOrderMarker) {
|
||||
this.hasByteOrderMarker = hasByteOrderMarker;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setFormat(Format format) {
|
||||
this.format = Objects.requireNonNull(format);
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setMultilineStartPattern(String multilineStartPattern) {
|
||||
this.multilineStartPattern = multilineStartPattern;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setExcludeLinesPattern(String excludeLinesPattern) {
|
||||
this.excludeLinesPattern = excludeLinesPattern;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setColumnNames(List<String> columnNames) {
|
||||
this.columnNames = columnNames;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setHasHeaderRow(Boolean hasHeaderRow) {
|
||||
this.hasHeaderRow = hasHeaderRow;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setDelimiter(Character delimiter) {
|
||||
this.delimiter = delimiter;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setQuote(Character quote) {
|
||||
this.quote = quote;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setShouldTrimFields(Boolean shouldTrimFields) {
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setGrokPattern(String grokPattern) {
|
||||
this.grokPattern = grokPattern;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setTimestampField(String timestampField) {
|
||||
this.timestampField = timestampField;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setJodaTimestampFormats(List<String> jodaTimestampFormats) {
|
||||
this.jodaTimestampFormats = jodaTimestampFormats;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setJavaTimestampFormats(List<String> javaTimestampFormats) {
|
||||
this.javaTimestampFormats = javaTimestampFormats;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setNeedClientTimezone(boolean needClientTimezone) {
|
||||
this.needClientTimezone = needClientTimezone;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setMappings(Map<String, Object> mappings) {
|
||||
this.mappings = Objects.requireNonNull(mappings);
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setIngestPipeline(Map<String, Object> ingestPipeline) {
|
||||
this.ingestPipeline = ingestPipeline;
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setFieldStats(Map<String, FieldStats> fieldStats) {
|
||||
this.fieldStats = Objects.requireNonNull(fieldStats);
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder setExplanation(List<String> explanation) {
|
||||
this.explanation = explanation;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FileStructure build() {
|
||||
|
||||
return new FileStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
||||
multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern,
|
||||
timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, ingestPipeline, fieldStats,
|
||||
explanation);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -32,6 +32,8 @@ import org.elasticsearch.client.ml.DeleteFilterRequest;
|
|||
import org.elasticsearch.client.ml.DeleteForecastRequest;
|
||||
import org.elasticsearch.client.ml.DeleteJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequestTests;
|
||||
import org.elasticsearch.client.ml.FlushJobRequest;
|
||||
import org.elasticsearch.client.ml.ForecastJobRequest;
|
||||
import org.elasticsearch.client.ml.GetBucketsRequest;
|
||||
|
@ -69,6 +71,7 @@ import org.elasticsearch.client.ml.calendars.ScheduledEvent;
|
|||
import org.elasticsearch.client.ml.calendars.ScheduledEventTests;
|
||||
import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
|
||||
import org.elasticsearch.client.ml.datafeed.DatafeedConfigTests;
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
|
||||
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
|
||||
import org.elasticsearch.client.ml.job.config.Detector;
|
||||
import org.elasticsearch.client.ml.job.config.Job;
|
||||
|
@ -87,6 +90,7 @@ import org.elasticsearch.test.ESTestCase;
|
|||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
|
@ -715,6 +719,85 @@ public class MLRequestConvertersTests extends ESTestCase {
|
|||
assertNull(request.getEntity());
|
||||
}
|
||||
|
||||
public void testFindFileStructure() throws Exception {
|
||||
|
||||
String sample = randomAlphaOfLength(randomIntBetween(1000, 2000));
|
||||
FindFileStructureRequest findFileStructureRequest = FindFileStructureRequestTests.createTestRequestWithoutSample();
|
||||
findFileStructureRequest.setSample(sample.getBytes(StandardCharsets.UTF_8));
|
||||
Request request = MLRequestConverters.findFileStructure(findFileStructureRequest);
|
||||
|
||||
assertEquals(HttpPost.METHOD_NAME, request.getMethod());
|
||||
assertEquals("/_xpack/ml/find_file_structure", request.getEndpoint());
|
||||
if (findFileStructureRequest.getLinesToSample() != null) {
|
||||
assertEquals(findFileStructureRequest.getLinesToSample(), Integer.valueOf(request.getParameters().get("lines_to_sample")));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("lines_to_sample"));
|
||||
}
|
||||
if (findFileStructureRequest.getTimeout() != null) {
|
||||
assertEquals(findFileStructureRequest.getTimeout().toString(), request.getParameters().get("timeout"));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("timeout"));
|
||||
}
|
||||
if (findFileStructureRequest.getCharset() != null) {
|
||||
assertEquals(findFileStructureRequest.getCharset(), request.getParameters().get("charset"));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("charset"));
|
||||
}
|
||||
if (findFileStructureRequest.getFormat() != null) {
|
||||
assertEquals(findFileStructureRequest.getFormat(), FileStructure.Format.fromString(request.getParameters().get("format")));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("format"));
|
||||
}
|
||||
if (findFileStructureRequest.getColumnNames() != null) {
|
||||
assertEquals(findFileStructureRequest.getColumnNames(),
|
||||
Arrays.asList(Strings.splitStringByCommaToArray(request.getParameters().get("column_names"))));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("column_names"));
|
||||
}
|
||||
if (findFileStructureRequest.getHasHeaderRow() != null) {
|
||||
assertEquals(findFileStructureRequest.getHasHeaderRow(), Boolean.valueOf(request.getParameters().get("has_header_row")));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("has_header_row"));
|
||||
}
|
||||
if (findFileStructureRequest.getDelimiter() != null) {
|
||||
assertEquals(findFileStructureRequest.getDelimiter().toString(), request.getParameters().get("delimiter"));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("delimiter"));
|
||||
}
|
||||
if (findFileStructureRequest.getQuote() != null) {
|
||||
assertEquals(findFileStructureRequest.getQuote().toString(), request.getParameters().get("quote"));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("quote"));
|
||||
}
|
||||
if (findFileStructureRequest.getShouldTrimFields() != null) {
|
||||
assertEquals(findFileStructureRequest.getShouldTrimFields(),
|
||||
Boolean.valueOf(request.getParameters().get("should_trim_fields")));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("should_trim_fields"));
|
||||
}
|
||||
if (findFileStructureRequest.getGrokPattern() != null) {
|
||||
assertEquals(findFileStructureRequest.getGrokPattern(), request.getParameters().get("grok_pattern"));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("grok_pattern"));
|
||||
}
|
||||
if (findFileStructureRequest.getTimestampFormat() != null) {
|
||||
assertEquals(findFileStructureRequest.getTimestampFormat(), request.getParameters().get("timestamp_format"));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("timestamp_format"));
|
||||
}
|
||||
if (findFileStructureRequest.getTimestampField() != null) {
|
||||
assertEquals(findFileStructureRequest.getTimestampField(), request.getParameters().get("timestamp_field"));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("timestamp_field"));
|
||||
}
|
||||
if (findFileStructureRequest.getExplain() != null) {
|
||||
assertEquals(findFileStructureRequest.getExplain(), Boolean.valueOf(request.getParameters().get("explain")));
|
||||
} else {
|
||||
assertNull(request.getParameters().get("explain"));
|
||||
}
|
||||
assertEquals(sample, requestEntityToString(request));
|
||||
}
|
||||
|
||||
private static Job createValidJob(String jobId) {
|
||||
AnalysisConfig.Builder analysisConfig = AnalysisConfig.builder(Collections.singletonList(
|
||||
Detector.builder().setFunction("count").build()));
|
||||
|
|
|
@ -38,6 +38,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
|
|||
import org.elasticsearch.client.ml.DeleteJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteJobResponse;
|
||||
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureResponse;
|
||||
import org.elasticsearch.client.ml.FlushJobRequest;
|
||||
import org.elasticsearch.client.ml.FlushJobResponse;
|
||||
import org.elasticsearch.client.ml.ForecastJobRequest;
|
||||
|
@ -94,6 +96,7 @@ import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
|
|||
import org.elasticsearch.client.ml.datafeed.DatafeedState;
|
||||
import org.elasticsearch.client.ml.datafeed.DatafeedStats;
|
||||
import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
|
||||
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
|
||||
import org.elasticsearch.client.ml.job.config.DataDescription;
|
||||
import org.elasticsearch.client.ml.job.config.Detector;
|
||||
|
@ -110,11 +113,13 @@ import org.elasticsearch.rest.RestStatus;
|
|||
import org.junit.After;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -1306,4 +1311,43 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
|
|||
assertEquals(snapshotId, model.getSnapshotId());
|
||||
}
|
||||
}
|
||||
|
||||
public void testFindFileStructure() throws IOException {
|
||||
|
||||
String sample = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," +
|
||||
"\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," +
|
||||
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" +
|
||||
"{\"logger\":\"controller\",\"timestamp\":1478261151445," +
|
||||
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
|
||||
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
|
||||
|
||||
MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
|
||||
|
||||
FindFileStructureRequest request = new FindFileStructureRequest();
|
||||
request.setSample(sample.getBytes(StandardCharsets.UTF_8));
|
||||
|
||||
FindFileStructureResponse response =
|
||||
execute(request, machineLearningClient::findFileStructure, machineLearningClient::findFileStructureAsync);
|
||||
|
||||
FileStructure structure = response.getFileStructure();
|
||||
|
||||
assertEquals(2, structure.getNumLinesAnalyzed());
|
||||
assertEquals(2, structure.getNumMessagesAnalyzed());
|
||||
assertEquals(sample, structure.getSampleStart());
|
||||
assertEquals(FileStructure.Format.NDJSON, structure.getFormat());
|
||||
assertEquals(StandardCharsets.UTF_8.displayName(Locale.ROOT), structure.getCharset());
|
||||
assertFalse(structure.getHasByteOrderMarker());
|
||||
assertNull(structure.getMultilineStartPattern());
|
||||
assertNull(structure.getExcludeLinesPattern());
|
||||
assertNull(structure.getColumnNames());
|
||||
assertNull(structure.getHasHeaderRow());
|
||||
assertNull(structure.getDelimiter());
|
||||
assertNull(structure.getQuote());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertNull(structure.getGrokPattern());
|
||||
assertEquals(Collections.singletonList("UNIX_MS"), structure.getJavaTimestampFormats());
|
||||
assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
|
||||
assertEquals("timestamp", structure.getTimestampField());
|
||||
assertFalse(structure.needClientTimezone());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,6 +44,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
|
|||
import org.elasticsearch.client.ml.DeleteJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteJobResponse;
|
||||
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequest;
|
||||
import org.elasticsearch.client.ml.FindFileStructureResponse;
|
||||
import org.elasticsearch.client.ml.FlushJobRequest;
|
||||
import org.elasticsearch.client.ml.FlushJobResponse;
|
||||
import org.elasticsearch.client.ml.ForecastJobRequest;
|
||||
|
@ -110,6 +112,7 @@ import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
|
|||
import org.elasticsearch.client.ml.datafeed.DatafeedStats;
|
||||
import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
|
||||
import org.elasticsearch.client.ml.datafeed.DelayedDataCheckConfig;
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
|
||||
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
|
||||
import org.elasticsearch.client.ml.job.config.AnalysisLimits;
|
||||
import org.elasticsearch.client.ml.job.config.DataDescription;
|
||||
|
@ -140,6 +143,9 @@ import org.elasticsearch.tasks.TaskId;
|
|||
import org.junit.After;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
|
@ -1730,6 +1736,68 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testFindFileStructure() throws Exception {
|
||||
RestHighLevelClient client = highLevelClient();
|
||||
|
||||
Path anInterestingFile = createTempFile();
|
||||
String contents = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," +
|
||||
"\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," +
|
||||
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" +
|
||||
"{\"logger\":\"controller\",\"timestamp\":1478261151445," +
|
||||
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
|
||||
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
|
||||
Files.write(anInterestingFile, Collections.singleton(contents), StandardCharsets.UTF_8);
|
||||
|
||||
{
|
||||
// tag::find-file-structure-request
|
||||
FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest(); // <1>
|
||||
findFileStructureRequest.setSample(Files.readAllBytes(anInterestingFile)); // <2>
|
||||
// end::find-file-structure-request
|
||||
|
||||
// tag::find-file-structure-request-options
|
||||
findFileStructureRequest.setLinesToSample(500); // <1>
|
||||
findFileStructureRequest.setExplain(true); // <2>
|
||||
// end::find-file-structure-request-options
|
||||
|
||||
// tag::find-file-structure-execute
|
||||
FindFileStructureResponse findFileStructureResponse =
|
||||
client.machineLearning().findFileStructure(findFileStructureRequest, RequestOptions.DEFAULT);
|
||||
// end::find-file-structure-execute
|
||||
|
||||
// tag::find-file-structure-response
|
||||
FileStructure structure = findFileStructureResponse.getFileStructure(); // <1>
|
||||
// end::find-file-structure-response
|
||||
assertEquals(2, structure.getNumLinesAnalyzed());
|
||||
}
|
||||
{
|
||||
// tag::find-file-structure-execute-listener
|
||||
ActionListener<FindFileStructureResponse> listener = new ActionListener<FindFileStructureResponse>() {
|
||||
@Override
|
||||
public void onResponse(FindFileStructureResponse findFileStructureResponse) {
|
||||
// <1>
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Exception e) {
|
||||
// <2>
|
||||
}
|
||||
};
|
||||
// end::find-file-structure-execute-listener
|
||||
FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest();
|
||||
findFileStructureRequest.setSample(Files.readAllBytes(anInterestingFile));
|
||||
|
||||
// Replace the empty listener by a blocking listener in test
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
listener = new LatchedActionListener<>(listener, latch);
|
||||
|
||||
// tag::find-file-structure-execute-async
|
||||
client.machineLearning().findFileStructureAsync(findFileStructureRequest, RequestOptions.DEFAULT, listener); // <1>
|
||||
// end::find-file-structure-execute-async
|
||||
|
||||
assertTrue(latch.await(30L, TimeUnit.SECONDS));
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetInfluencers() throws IOException, InterruptedException {
|
||||
RestHighLevelClient client = highLevelClient();
|
||||
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class FindFileStructureRequestTests extends AbstractXContentTestCase<FindFileStructureRequest> {
|
||||
|
||||
private static final ObjectParser<FindFileStructureRequest, Void> PARSER =
|
||||
new ObjectParser<>("find_file_structure_request", FindFileStructureRequest::new);
|
||||
|
||||
static {
|
||||
PARSER.declareInt(FindFileStructureRequest::setLinesToSample, FindFileStructureRequest.LINES_TO_SAMPLE);
|
||||
PARSER.declareString((p, c) -> p.setTimeout(TimeValue.parseTimeValue(c, FindFileStructureRequest.TIMEOUT.getPreferredName())),
|
||||
FindFileStructureRequest.TIMEOUT);
|
||||
PARSER.declareString(FindFileStructureRequest::setCharset, FindFileStructureRequest.CHARSET);
|
||||
PARSER.declareString(FindFileStructureRequest::setFormat, FindFileStructureRequest.FORMAT);
|
||||
PARSER.declareStringArray(FindFileStructureRequest::setColumnNames, FindFileStructureRequest.COLUMN_NAMES);
|
||||
PARSER.declareBoolean(FindFileStructureRequest::setHasHeaderRow, FindFileStructureRequest.HAS_HEADER_ROW);
|
||||
PARSER.declareString(FindFileStructureRequest::setDelimiter, FindFileStructureRequest.DELIMITER);
|
||||
PARSER.declareString(FindFileStructureRequest::setQuote, FindFileStructureRequest.QUOTE);
|
||||
PARSER.declareBoolean(FindFileStructureRequest::setShouldTrimFields, FindFileStructureRequest.SHOULD_TRIM_FIELDS);
|
||||
PARSER.declareString(FindFileStructureRequest::setGrokPattern, FindFileStructureRequest.GROK_PATTERN);
|
||||
PARSER.declareString(FindFileStructureRequest::setTimestampFormat, FindFileStructureRequest.TIMESTAMP_FORMAT);
|
||||
PARSER.declareString(FindFileStructureRequest::setTimestampField, FindFileStructureRequest.TIMESTAMP_FIELD);
|
||||
PARSER.declareBoolean(FindFileStructureRequest::setExplain, FindFileStructureRequest.EXPLAIN);
|
||||
// Sample is not included in the X-Content representation
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FindFileStructureRequest doParseInstance(XContentParser parser) throws IOException {
|
||||
return PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FindFileStructureRequest createTestInstance() {
|
||||
return createTestRequestWithoutSample();
|
||||
}
|
||||
|
||||
public static FindFileStructureRequest createTestRequestWithoutSample() {
|
||||
|
||||
FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest();
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setLinesToSample(randomIntBetween(1000, 2000));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setTimeout(TimeValue.timeValueSeconds(randomIntBetween(10, 20)));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setCharset(Charset.defaultCharset().toString());
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setFormat(randomFrom(FileStructure.Format.values()));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setColumnNames(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setHasHeaderRow(randomBoolean());
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setDelimiter(randomAlphaOfLength(1));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setQuote(randomAlphaOfLength(1));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setShouldTrimFields(randomBoolean());
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setGrokPattern(randomAlphaOfLength(100));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setTimestampFormat(randomAlphaOfLength(10));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setTimestampField(randomAlphaOfLength(10));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setExplain(randomBoolean());
|
||||
}
|
||||
|
||||
return findFileStructureRequest;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructureTests;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class FindFileStructureResponseTests extends AbstractXContentTestCase<FindFileStructureResponse> {
|
||||
|
||||
@Override
|
||||
protected FindFileStructureResponse createTestInstance() {
|
||||
return new FindFileStructureResponse(FileStructureTests.createTestFileStructure());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FindFileStructureResponse doParseInstance(XContentParser parser) throws IOException {
|
||||
return FindFileStructureResponse.fromXContent(parser);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<String> getRandomFieldsExcludeFilter() {
|
||||
return field -> !field.isEmpty();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml.filestructurefinder;
|
||||
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class FieldStatsTests extends AbstractXContentTestCase<FieldStats> {
|
||||
|
||||
@Override
|
||||
protected FieldStats createTestInstance() {
|
||||
return createTestFieldStats();
|
||||
}
|
||||
|
||||
static FieldStats createTestFieldStats() {
|
||||
|
||||
long count = randomIntBetween(1, 100000);
|
||||
int cardinality = randomIntBetween(1, (int) count);
|
||||
|
||||
Double minValue = null;
|
||||
Double maxValue = null;
|
||||
Double meanValue = null;
|
||||
Double medianValue = null;
|
||||
boolean isMetric = randomBoolean();
|
||||
if (isMetric) {
|
||||
if (randomBoolean()) {
|
||||
minValue = randomDouble();
|
||||
maxValue = randomDouble();
|
||||
} else {
|
||||
minValue = (double) randomInt();
|
||||
maxValue = (double) randomInt();
|
||||
}
|
||||
meanValue = randomDouble();
|
||||
medianValue = randomDouble();
|
||||
}
|
||||
|
||||
List<Map<String, Object>> topHits = new ArrayList<>();
|
||||
for (int i = 0; i < Math.min(10, cardinality); ++i) {
|
||||
Map<String, Object> topHit = new LinkedHashMap<>();
|
||||
if (isMetric) {
|
||||
topHit.put("value", randomBoolean() ? randomDouble() : (double) randomInt());
|
||||
} else {
|
||||
topHit.put("value", randomAlphaOfLength(20));
|
||||
}
|
||||
topHit.put("count", randomIntBetween(1, cardinality));
|
||||
topHits.add(topHit);
|
||||
}
|
||||
|
||||
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FieldStats doParseInstance(XContentParser parser) {
|
||||
return FieldStats.PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<String> getRandomFieldsExcludeFilter() {
|
||||
return field -> field.contains(FieldStats.TOP_HITS.getPreferredName());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml.filestructurefinder;
|
||||
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class FileStructureTests extends AbstractXContentTestCase<FileStructure> {
|
||||
|
||||
@Override
|
||||
protected FileStructure createTestInstance() {
|
||||
return createTestFileStructure();
|
||||
}
|
||||
|
||||
public static FileStructure createTestFileStructure() {
|
||||
|
||||
FileStructure.Format format = randomFrom(EnumSet.allOf(FileStructure.Format.class));
|
||||
|
||||
FileStructure.Builder builder = new FileStructure.Builder(format);
|
||||
|
||||
int numLinesAnalyzed = randomIntBetween(2, 10000);
|
||||
builder.setNumLinesAnalyzed(numLinesAnalyzed);
|
||||
int numMessagesAnalyzed = randomIntBetween(1, numLinesAnalyzed);
|
||||
builder.setNumMessagesAnalyzed(numMessagesAnalyzed);
|
||||
builder.setSampleStart(randomAlphaOfLength(1000));
|
||||
|
||||
String charset = randomFrom(Charset.availableCharsets().keySet());
|
||||
builder.setCharset(charset);
|
||||
if (charset.toUpperCase(Locale.ROOT).startsWith("UTF")) {
|
||||
builder.setHasByteOrderMarker(randomBoolean());
|
||||
}
|
||||
|
||||
if (numMessagesAnalyzed < numLinesAnalyzed) {
|
||||
builder.setMultilineStartPattern(randomAlphaOfLength(100));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
builder.setExcludeLinesPattern(randomAlphaOfLength(100));
|
||||
}
|
||||
|
||||
if (format == FileStructure.Format.DELIMITED) {
|
||||
builder.setColumnNames(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
|
||||
builder.setHasHeaderRow(randomBoolean());
|
||||
builder.setDelimiter(randomFrom(',', '\t', ';', '|'));
|
||||
builder.setQuote(randomFrom('"', '\''));
|
||||
}
|
||||
|
||||
if (format == FileStructure.Format.SEMI_STRUCTURED_TEXT) {
|
||||
builder.setGrokPattern(randomAlphaOfLength(100));
|
||||
}
|
||||
|
||||
if (format == FileStructure.Format.SEMI_STRUCTURED_TEXT || randomBoolean()) {
|
||||
builder.setTimestampField(randomAlphaOfLength(10));
|
||||
builder.setJodaTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
|
||||
builder.setJavaTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
|
||||
builder.setNeedClientTimezone(randomBoolean());
|
||||
}
|
||||
|
||||
Map<String, Object> mappings = new TreeMap<>();
|
||||
for (String field : generateRandomStringArray(5, 20, false, false)) {
|
||||
mappings.put(field, Collections.singletonMap(randomAlphaOfLength(5), randomAlphaOfLength(10)));
|
||||
}
|
||||
builder.setMappings(mappings);
|
||||
|
||||
if (randomBoolean()) {
|
||||
Map<String, Object> ingestPipeline = new LinkedHashMap<>();
|
||||
for (String field : generateRandomStringArray(5, 20, false, false)) {
|
||||
ingestPipeline.put(field, Collections.singletonMap(randomAlphaOfLength(5), randomAlphaOfLength(10)));
|
||||
}
|
||||
builder.setMappings(ingestPipeline);
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
Map<String, FieldStats> fieldStats = new TreeMap<>();
|
||||
for (String field : generateRandomStringArray(5, 20, false, false)) {
|
||||
fieldStats.put(field, FieldStatsTests.createTestFieldStats());
|
||||
}
|
||||
builder.setFieldStats(fieldStats);
|
||||
}
|
||||
|
||||
builder.setExplanation(Arrays.asList(generateRandomStringArray(10, 150, false, false)));
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FileStructure doParseInstance(XContentParser parser) {
|
||||
return FileStructure.PARSER.apply(parser, null).build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<String> getRandomFieldsExcludeFilter() {
|
||||
// unknown fields are only guaranteed to be ignored at the top level - below this several data
|
||||
// structures (e.g. mappings, ingest pipeline, field stats) will preserve arbitrary fields
|
||||
return field -> !field.isEmpty();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
--
|
||||
:api: find-file-structure
|
||||
:request: FindFileStructureRequest
|
||||
:response: FindFileStructureResponse
|
||||
--
|
||||
[id="{upid}-{api}"]
|
||||
=== Find File Structure API
|
||||
|
||||
The Find File Structure API can be used to find the structure of a text file
|
||||
and other information that will be useful to import its contents to an {es}
|
||||
index. It accepts a +{request}+ object and responds
|
||||
with a +{response}+ object.
|
||||
|
||||
[id="{upid}-{api}-request"]
|
||||
==== Find File Structure Request
|
||||
|
||||
A sample from the beginning of the file (or the entire file contents if
|
||||
it's small) must be added to the +{request}+ object using the
|
||||
`FindFileStructureRequest#setSample` method.
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-request]
|
||||
--------------------------------------------------
|
||||
<1> Create a new `FindFileStructureRequest` object
|
||||
<2> Add the contents of `anInterestingFile` to the request
|
||||
|
||||
==== Optional Arguments
|
||||
|
||||
The following arguments are optional.
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-request-options]
|
||||
--------------------------------------------------
|
||||
<1> Set the maximum number of lines to sample (the entire sample will be
|
||||
used if it contains fewer lines)
|
||||
<2> Request that an explanation of the analysis be returned in the response
|
||||
|
||||
include::../execution.asciidoc[]
|
||||
|
||||
[id="{upid}-{api}-response"]
|
||||
==== Find File Structure Response
|
||||
|
||||
A +{response}+ contains information about the file structure,
|
||||
as well as mappings and an ingest pipeline that could be used
|
||||
to index the contents into {es}.
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-response]
|
||||
--------------------------------------------------
|
||||
<1> The `FileStructure` object contains the structure information
|
Loading…
Reference in New Issue