[ML] Change default data_description to JSON (elastic/x-pack-elasticsearch#707)

relates elastic/x-pack-elasticsearch#674

Original commit: elastic/x-pack-elasticsearch@efc5f4a269
This commit is contained in:
Dimitris Athanasiou 2017-03-16 11:48:57 +00:00 committed by GitHub
parent 803654d9bf
commit 6c9727c2db
7 changed files with 84 additions and 38 deletions

View File

@ -123,11 +123,10 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
private final DataFormat dataFormat;
private final String timeFieldName;
private final String timeFormat;
private final char fieldDelimiter;
private final char quoteCharacter;
private final Character fieldDelimiter;
private final Character quoteCharacter;
public static final ObjectParser<Builder, Void> PARSER =
new ObjectParser<>(DATA_DESCRIPTION_FIELD.getPreferredName(), Builder::new);
public static final ObjectParser<Builder, Void> PARSER = new ObjectParser<>(DATA_DESCRIPTION_FIELD.getPreferredName(), Builder::new);
static {
PARSER.declareString(Builder::setFormat, FORMAT_FIELD);
@ -137,7 +136,8 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
PARSER.declareField(Builder::setQuoteCharacter, DataDescription::extractChar, QUOTE_CHARACTER_FIELD, ValueType.STRING);
}
public DataDescription(DataFormat dataFormat, String timeFieldName, String timeFormat, char fieldDelimiter, char quoteCharacter) {
public DataDescription(DataFormat dataFormat, String timeFieldName, String timeFormat, Character fieldDelimiter,
Character quoteCharacter) {
this.dataFormat = dataFormat;
this.timeFieldName = timeFieldName;
this.timeFormat = timeFormat;
@ -149,8 +149,8 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
dataFormat = DataFormat.readFromStream(in);
timeFieldName = in.readString();
timeFormat = in.readString();
fieldDelimiter = (char) in.read();
quoteCharacter = (char) in.read();
fieldDelimiter = in.readBoolean() ? (char) in.read() : null;
quoteCharacter = in.readBoolean() ? (char) in.read() : null;
}
@Override
@ -158,25 +158,41 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
dataFormat.writeTo(out);
out.writeString(timeFieldName);
out.writeString(timeFormat);
out.write(fieldDelimiter);
out.write(quoteCharacter);
if (fieldDelimiter != null) {
out.writeBoolean(true);
out.write(fieldDelimiter);
} else {
out.writeBoolean(false);
}
if (quoteCharacter != null) {
out.writeBoolean(true);
out.write(quoteCharacter);
} else {
out.writeBoolean(false);
}
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(FORMAT_FIELD.getPreferredName(), dataFormat);
if (dataFormat != DataFormat.JSON) {
builder.field(FORMAT_FIELD.getPreferredName(), dataFormat);
}
builder.field(TIME_FIELD_NAME_FIELD.getPreferredName(), timeFieldName);
builder.field(TIME_FORMAT_FIELD.getPreferredName(), timeFormat);
builder.field(FIELD_DELIMITER_FIELD.getPreferredName(), String.valueOf(fieldDelimiter));
builder.field(QUOTE_CHARACTER_FIELD.getPreferredName(), String.valueOf(quoteCharacter));
if (fieldDelimiter != null) {
builder.field(FIELD_DELIMITER_FIELD.getPreferredName(), String.valueOf(fieldDelimiter));
}
if (quoteCharacter != null) {
builder.field(QUOTE_CHARACTER_FIELD.getPreferredName(), String.valueOf(quoteCharacter));
}
builder.endObject();
return builder;
}
/**
* The format of the data to be processed.
* Defaults to {@link DataDescription.DataFormat#DELIMITED}
* Defaults to {@link DataDescription.DataFormat#JSON}
*
* @return The data format
*/
@ -196,8 +212,8 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
/**
* Either {@value #EPOCH}, {@value #EPOCH_MS} or a SimpleDateTime format string.
* If not set (is <code>null</code> or an empty string) or set to
* {@value #EPOCH} (the default) then the date is assumed to be in
* seconds from the epoch.
* {@value #EPOCH_MS} (the default) then the date is assumed to be in
* milliseconds from the epoch.
*
* @return A String if set or <code>null</code>
*/
@ -209,21 +225,21 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
* If the data is in a delineated format with a header e.g. csv or tsv
* this is the delimiter character used. This is only applicable if
* {@linkplain #getFormat()} is {@link DataDescription.DataFormat#DELIMITED}.
* The default value is {@value #DEFAULT_DELIMITER}
* The default value for delimited format is {@value #DEFAULT_DELIMITER}.
*
* @return A char
*/
public char getFieldDelimiter() {
public Character getFieldDelimiter() {
return fieldDelimiter;
}
/**
* The quote character used in delineated formats.
* Defaults to {@value #DEFAULT_QUOTE_CHAR}
* The default value for delimited format is {@value #DEFAULT_QUOTE_CHAR}.
*
* @return The delineated format quote character
*/
public char getQuoteCharacter() {
public Character getQuoteCharacter() {
return quoteCharacter;
}
@ -236,8 +252,7 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
* @return True if the data should be transformed.
*/
public boolean transform() {
return dataFormat == DataFormat.JSON ||
isTransformTime();
return dataFormat == DataFormat.JSON || isTransformTime();
}
/**
@ -260,7 +275,7 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
return EPOCH_MS.equals(timeFormat);
}
private static char extractChar(XContentParser parser) throws IOException {
private static Character extractChar(XContentParser parser) throws IOException {
if (parser.currentToken() == XContentParser.Token.VALUE_STRING) {
String charStr = parser.text();
if (charStr.length() != 1) {
@ -287,7 +302,7 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
DataDescription that = (DataDescription) other;
return this.dataFormat == that.dataFormat &&
this.quoteCharacter == that.quoteCharacter &&
Objects.equals(this.quoteCharacter, that.quoteCharacter) &&
Objects.equals(this.timeFieldName, that.timeFieldName) &&
Objects.equals(this.timeFormat, that.timeFormat) &&
Objects.equals(this.fieldDelimiter, that.fieldDelimiter);
@ -295,17 +310,16 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
@Override
public int hashCode() {
return Objects.hash(dataFormat, quoteCharacter, timeFieldName,
timeFormat, fieldDelimiter);
return Objects.hash(dataFormat, quoteCharacter, timeFieldName, timeFormat, fieldDelimiter);
}
public static class Builder {
private DataFormat dataFormat = DataFormat.DELIMITED;
private DataFormat dataFormat = DataFormat.JSON;
private String timeFieldName = DEFAULT_TIME_FIELD;
private String timeFormat = EPOCH;
private char fieldDelimiter = DEFAULT_DELIMITER;
private char quoteCharacter = DEFAULT_QUOTE_CHAR;
private String timeFormat = EPOCH_MS;
private Character fieldDelimiter;
private Character quoteCharacter;
public void setFormat(DataFormat format) {
dataFormat = ExceptionsHelper.requireNonNull(format, FORMAT_FIELD.getPreferredName() + " must not be null");
@ -335,18 +349,24 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
timeFormat = format;
}
public void setFieldDelimiter(char delimiter) {
public void setFieldDelimiter(Character delimiter) {
fieldDelimiter = delimiter;
}
public void setQuoteCharacter(char value) {
public void setQuoteCharacter(Character value) {
quoteCharacter = value;
}
public DataDescription build() {
return new DataDescription(dataFormat, timeFieldName, timeFormat, fieldDelimiter,quoteCharacter);
if (dataFormat == DataFormat.DELIMITED) {
if (fieldDelimiter == null) {
fieldDelimiter = DEFAULT_DELIMITER;
}
if (quoteCharacter == null) {
quoteCharacter = DEFAULT_QUOTE_CHAR;
}
}
return new DataDescription(dataFormat, timeFieldName, timeFormat, fieldDelimiter, quoteCharacter);
}
}
}

View File

@ -57,10 +57,10 @@ public class ValidateJobConfigActionRequestTests extends AbstractStreamableXCont
if (randomBoolean()) {
dataDescription.setFormat(DataDescription.DataFormat.DELIMITED);
if (randomBoolean()) {
dataDescription.setFieldDelimiter(';');
dataDescription.setFieldDelimiter(new Character(';'));
}
if (randomBoolean()) {
dataDescription.setQuoteCharacter('\'');
dataDescription.setQuoteCharacter(new Character('\''));
}
} else {
dataDescription.setFormat(DataDescription.DataFormat.JSON);

View File

@ -16,10 +16,34 @@ import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.support.AbstractSerializingTestCase;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.nullValue;
import static org.hamcrest.core.Is.is;
public class DataDescriptionTests extends AbstractSerializingTestCase<DataDescription> {
public void testDefault() {
DataDescription dataDescription = new DataDescription.Builder().build();
assertThat(dataDescription.getFormat(), equalTo(DataFormat.JSON));
assertThat(dataDescription.getTimeField(), equalTo("time"));
assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms"));
assertThat(dataDescription.getFieldDelimiter(), is(nullValue()));
assertThat(dataDescription.getQuoteCharacter(), is(nullValue()));
}
public void testDefaultDelimited() {
DataDescription.Builder dataDescriptionBuilder = new DataDescription.Builder();
dataDescriptionBuilder.setFormat(DataFormat.DELIMITED);
DataDescription dataDescription = dataDescriptionBuilder.build();
assertThat(dataDescription.getFormat(), equalTo(DataFormat.DELIMITED));
assertThat(dataDescription.getTimeField(), equalTo("time"));
assertThat(dataDescription.getTimeFormat(), equalTo("epoch_ms"));
assertThat(dataDescription.getFieldDelimiter(), is('\t'));
assertThat(dataDescription.getQuoteCharacter(), is('"'));
}
public void testVerify_GivenValidFormat() {
DataDescription.Builder description = new DataDescription.Builder();
description.setTimeFormat("epoch");

View File

@ -46,6 +46,7 @@ public class ProcessCtrlTests extends ESTestCase {
job.setAnalysisConfig(acBuilder);
DataDescription.Builder dd = new DataDescription.Builder();
dd.setFormat(DataDescription.DataFormat.DELIMITED);
dd.setFieldDelimiter('|');
dd.setTimeField("tf");
job.setDataDescription(dd);

View File

@ -356,6 +356,7 @@ public class AutodetectProcessManagerTests extends ESTestCase {
private Job createJobDetails(String jobId) {
DataDescription.Builder dd = new DataDescription.Builder();
dd.setFormat(DataDescription.DataFormat.DELIMITED);
dd.setTimeFormat("epoch");
dd.setFieldDelimiter(',');
Detector d = new Detector.Builder("metric", "value").build();

View File

@ -62,8 +62,8 @@ public class CsvDataToProcessWriterTests extends ESTestCase {
}).when(autodetectProcess).writeRecord(any(String[].class));
dataDescription = new DataDescription.Builder();
dataDescription.setFieldDelimiter(',');
dataDescription.setFormat(DataFormat.DELIMITED);
dataDescription.setFieldDelimiter(',');
dataDescription.setTimeFormat(DataDescription.EPOCH);
Detector detector = new Detector.Builder("metric", "value").build();

View File

@ -31,7 +31,7 @@ public class CsvParserTests extends ESTestCase {
InputStream inputStream = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8));
CsvPreference csvPref = new CsvPreference.Builder(
DataDescription.DEFAULT_QUOTE_CHAR,
'"',
',',
new String(new char[]{DataDescription.LINE_ENDING})).build();