[ML] Refactor delimited file structure detection (#33233)
1. Use the term "delimited" rather than "separated values" 2. Use a single factory class with arguments to specify the delimiter and identification constraints This change makes it easier to add support for other delimiter characters.
This commit is contained in:
parent
73eb4cbbbe
commit
7345878d33
|
@ -1,35 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
import org.supercsv.prefs.CsvPreference;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class CsvLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||
|
||||
/**
|
||||
* Rules are:
|
||||
* - The file must be valid CSV
|
||||
* - It must contain at least two complete records
|
||||
* - There must be at least two fields per record (otherwise files with no commas could be treated as CSV!)
|
||||
* - Every CSV record except the last must have the same number of fields
|
||||
* The reason the last record is allowed to have fewer fields than the others is that
|
||||
* it could have been truncated when the file was sampled.
|
||||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.EXCEL_PREFERENCE, "CSV");
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
CsvPreference.EXCEL_PREFERENCE, false);
|
||||
}
|
||||
}
|
|
@ -29,17 +29,16 @@ import java.util.regex.Pattern;
|
|||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
||||
public class DelimitedLogStructureFinder implements LogStructureFinder {
|
||||
|
||||
private static final int MAX_LEVENSHTEIN_COMPARISONS = 100;
|
||||
|
||||
private final List<String> sampleMessages;
|
||||
private final LogStructure structure;
|
||||
|
||||
static SeparatedValuesLogStructureFinder makeSeparatedValuesLogStructureFinder(List<String> explanation, String sample,
|
||||
String charsetName, Boolean hasByteOrderMarker,
|
||||
CsvPreference csvPreference, boolean trimFields)
|
||||
throws IOException {
|
||||
static DelimitedLogStructureFinder makeDelimitedLogStructureFinder(List<String> explanation, String sample, String charsetName,
|
||||
Boolean hasByteOrderMarker, CsvPreference csvPreference,
|
||||
boolean trimFields) throws IOException {
|
||||
|
||||
Tuple<List<List<String>>, List<Integer>> parsed = readRows(sample, csvPreference);
|
||||
List<List<String>> rows = parsed.v1();
|
||||
|
@ -73,13 +72,14 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
|||
String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n"));
|
||||
|
||||
char delimiter = (char) csvPreference.getDelimiterChar();
|
||||
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.fromSeparator(delimiter))
|
||||
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.DELIMITED)
|
||||
.setCharset(charsetName)
|
||||
.setHasByteOrderMarker(hasByteOrderMarker)
|
||||
.setSampleStart(preamble)
|
||||
.setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1))
|
||||
.setNumMessagesAnalyzed(sampleRecords.size())
|
||||
.setHasHeaderRow(isHeaderInFile)
|
||||
.setDelimiter(delimiter)
|
||||
.setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList()));
|
||||
|
||||
if (trimFields) {
|
||||
|
@ -131,10 +131,10 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
|||
.setExplanation(explanation)
|
||||
.build();
|
||||
|
||||
return new SeparatedValuesLogStructureFinder(sampleMessages, structure);
|
||||
return new DelimitedLogStructureFinder(sampleMessages, structure);
|
||||
}
|
||||
|
||||
private SeparatedValuesLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
|
||||
private DelimitedLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
|
||||
this.sampleMessages = Collections.unmodifiableList(sampleMessages);
|
||||
this.structure = structure;
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
import org.supercsv.prefs.CsvPreference;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
public class DelimitedLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||
|
||||
private final CsvPreference csvPreference;
|
||||
private final int minFieldsPerRow;
|
||||
private final boolean trimFields;
|
||||
|
||||
DelimitedLogStructureFinderFactory(char delimiter, int minFieldsPerRow, boolean trimFields) {
|
||||
csvPreference = new CsvPreference.Builder('"', delimiter, "\n").build();
|
||||
this.minFieldsPerRow = minFieldsPerRow;
|
||||
this.trimFields = trimFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rules are:
|
||||
* - It must contain at least two complete records
|
||||
* - There must be a minimum number of fields per record (otherwise files with no commas could be treated as CSV!)
|
||||
* - Every record except the last must have the same number of fields
|
||||
* The reason the last record is allowed to have fewer fields than the others is that
|
||||
* it could have been truncated when the file was sampled.
|
||||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
String formatName;
|
||||
switch ((char) csvPreference.getDelimiterChar()) {
|
||||
case ',':
|
||||
formatName = "CSV";
|
||||
break;
|
||||
case '\t':
|
||||
formatName = "TSV";
|
||||
break;
|
||||
default:
|
||||
formatName = Character.getName(csvPreference.getDelimiterChar()).toLowerCase(Locale.ROOT) + " delimited values";
|
||||
break;
|
||||
}
|
||||
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, minFieldsPerRow, csvPreference, formatName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
csvPreference, trimFields);
|
||||
}
|
||||
}
|
|
@ -27,37 +27,14 @@ public class LogStructure implements ToXContentObject {
|
|||
|
||||
public enum Format {
|
||||
|
||||
JSON, XML, CSV, TSV, SEMI_COLON_SEPARATED_VALUES, PIPE_SEPARATED_VALUES, SEMI_STRUCTURED_TEXT;
|
||||
|
||||
public Character separator() {
|
||||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
return null;
|
||||
case CSV:
|
||||
return ',';
|
||||
case TSV:
|
||||
return '\t';
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
return ';';
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
return '|';
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return null;
|
||||
default:
|
||||
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
|
||||
}
|
||||
}
|
||||
JSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
|
||||
|
||||
public boolean supportsNesting() {
|
||||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
return true;
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
case DELIMITED:
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return false;
|
||||
default:
|
||||
|
@ -69,10 +46,7 @@ public class LogStructure implements ToXContentObject {
|
|||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
case DELIMITED:
|
||||
return true;
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return false;
|
||||
|
@ -85,10 +59,7 @@ public class LogStructure implements ToXContentObject {
|
|||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
case DELIMITED:
|
||||
return false;
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return true;
|
||||
|
@ -97,38 +68,6 @@ public class LogStructure implements ToXContentObject {
|
|||
}
|
||||
}
|
||||
|
||||
public boolean isSeparatedValues() {
|
||||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
return false;
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
return true;
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return false;
|
||||
default:
|
||||
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
|
||||
}
|
||||
}
|
||||
|
||||
public static Format fromSeparator(char separator) {
|
||||
switch (separator) {
|
||||
case ',':
|
||||
return CSV;
|
||||
case '\t':
|
||||
return TSV;
|
||||
case ';':
|
||||
return SEMI_COLON_SEPARATED_VALUES;
|
||||
case '|':
|
||||
return PIPE_SEPARATED_VALUES;
|
||||
default:
|
||||
throw new IllegalArgumentException("No known format has separator [" + separator + "]");
|
||||
}
|
||||
}
|
||||
|
||||
public static Format fromString(String name) {
|
||||
return valueOf(name.trim().toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
@ -149,7 +88,7 @@ public class LogStructure implements ToXContentObject {
|
|||
static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
|
||||
static final ParseField INPUT_FIELDS = new ParseField("input_fields");
|
||||
static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
|
||||
static final ParseField SEPARATOR = new ParseField("separator");
|
||||
static final ParseField DELIMITER = new ParseField("delimiter");
|
||||
static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
|
||||
static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
|
||||
static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
|
||||
|
@ -171,7 +110,7 @@ public class LogStructure implements ToXContentObject {
|
|||
PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
|
||||
PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS);
|
||||
PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
|
||||
PARSER.declareString((p, c) -> p.setSeparator(c.charAt(0)), SEPARATOR);
|
||||
PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER);
|
||||
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
|
||||
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
|
||||
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
|
||||
|
@ -191,7 +130,7 @@ public class LogStructure implements ToXContentObject {
|
|||
private final String excludeLinesPattern;
|
||||
private final List<String> inputFields;
|
||||
private final Boolean hasHeaderRow;
|
||||
private final Character separator;
|
||||
private final Character delimiter;
|
||||
private final Boolean shouldTrimFields;
|
||||
private final String grokPattern;
|
||||
private final List<String> timestampFormats;
|
||||
|
@ -202,7 +141,7 @@ public class LogStructure implements ToXContentObject {
|
|||
|
||||
public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
|
||||
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> inputFields,
|
||||
Boolean hasHeaderRow, Character separator, Boolean shouldTrimFields, String grokPattern, String timestampField,
|
||||
Boolean hasHeaderRow, Character delimiter, Boolean shouldTrimFields, String grokPattern, String timestampField,
|
||||
List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings,
|
||||
List<String> explanation) {
|
||||
|
||||
|
@ -216,7 +155,7 @@ public class LogStructure implements ToXContentObject {
|
|||
this.excludeLinesPattern = excludeLinesPattern;
|
||||
this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields));
|
||||
this.hasHeaderRow = hasHeaderRow;
|
||||
this.separator = separator;
|
||||
this.delimiter = delimiter;
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
this.grokPattern = grokPattern;
|
||||
this.timestampField = timestampField;
|
||||
|
@ -266,8 +205,8 @@ public class LogStructure implements ToXContentObject {
|
|||
return hasHeaderRow;
|
||||
}
|
||||
|
||||
public Character getSeparator() {
|
||||
return separator;
|
||||
public Character getDelimiter() {
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
public Boolean getShouldTrimFields() {
|
||||
|
@ -322,8 +261,8 @@ public class LogStructure implements ToXContentObject {
|
|||
if (hasHeaderRow != null) {
|
||||
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
|
||||
}
|
||||
if (separator != null) {
|
||||
builder.field(SEPARATOR.getPreferredName(), String.valueOf(separator));
|
||||
if (delimiter != null) {
|
||||
builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter));
|
||||
}
|
||||
if (shouldTrimFields != null) {
|
||||
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
|
||||
|
@ -349,7 +288,7 @@ public class LogStructure implements ToXContentObject {
|
|||
public int hashCode() {
|
||||
|
||||
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, timestampField,
|
||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, timestampField,
|
||||
timestampFormats, needClientTimezone, mappings, explanation);
|
||||
}
|
||||
|
||||
|
@ -376,7 +315,7 @@ public class LogStructure implements ToXContentObject {
|
|||
Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
|
||||
Objects.equals(this.inputFields, that.inputFields) &&
|
||||
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
|
||||
Objects.equals(this.separator, that.separator) &&
|
||||
Objects.equals(this.delimiter, that.delimiter) &&
|
||||
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
|
||||
Objects.equals(this.grokPattern, that.grokPattern) &&
|
||||
Objects.equals(this.timestampField, that.timestampField) &&
|
||||
|
@ -397,7 +336,7 @@ public class LogStructure implements ToXContentObject {
|
|||
private String excludeLinesPattern;
|
||||
private List<String> inputFields;
|
||||
private Boolean hasHeaderRow;
|
||||
private Character separator;
|
||||
private Character delimiter;
|
||||
private Boolean shouldTrimFields;
|
||||
private String grokPattern;
|
||||
private String timestampField;
|
||||
|
@ -441,7 +380,6 @@ public class LogStructure implements ToXContentObject {
|
|||
|
||||
public Builder setFormat(Format format) {
|
||||
this.format = Objects.requireNonNull(format);
|
||||
this.separator = format.separator();
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -465,13 +403,13 @@ public class LogStructure implements ToXContentObject {
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder setShouldTrimFields(Boolean shouldTrimFields) {
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
public Builder setDelimiter(Character delimiter) {
|
||||
this.delimiter = delimiter;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setSeparator(Character separator) {
|
||||
this.separator = separator;
|
||||
public Builder setShouldTrimFields(Boolean shouldTrimFields) {
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -542,28 +480,22 @@ public class LogStructure implements ToXContentObject {
|
|||
if (hasHeaderRow != null) {
|
||||
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (separator != null) {
|
||||
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures.");
|
||||
if (delimiter != null) {
|
||||
throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (grokPattern != null) {
|
||||
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
break;
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
case DELIMITED:
|
||||
if (inputFields == null || inputFields.isEmpty()) {
|
||||
throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (hasHeaderRow == null) {
|
||||
throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures.");
|
||||
}
|
||||
Character expectedSeparator = format.separator();
|
||||
assert expectedSeparator != null;
|
||||
if (expectedSeparator.equals(separator) == false) {
|
||||
throw new IllegalArgumentException("Separator must be [" + expectedSeparator + "] for [" + format +
|
||||
"] structures.");
|
||||
if (delimiter == null) {
|
||||
throw new IllegalArgumentException("Delimiter must be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (grokPattern != null) {
|
||||
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
|
||||
|
@ -576,8 +508,8 @@ public class LogStructure implements ToXContentObject {
|
|||
if (hasHeaderRow != null) {
|
||||
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (separator != null) {
|
||||
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures.");
|
||||
if (delimiter != null) {
|
||||
throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (shouldTrimFields != null) {
|
||||
throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures.");
|
||||
|
@ -607,7 +539,7 @@ public class LogStructure implements ToXContentObject {
|
|||
}
|
||||
|
||||
return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern,
|
||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern,
|
||||
timestampField, timestampFormats, needClientTimezone, mappings, explanation);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,10 +69,10 @@ public final class LogStructureFinderManager {
|
|||
new JsonLogStructureFinderFactory(),
|
||||
new XmlLogStructureFinderFactory(),
|
||||
// ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV
|
||||
new CsvLogStructureFinderFactory(),
|
||||
new TsvLogStructureFinderFactory(),
|
||||
new SemiColonSeparatedValuesLogStructureFinderFactory(),
|
||||
new PipeSeparatedValuesLogStructureFinderFactory(),
|
||||
new DelimitedLogStructureFinderFactory(',', 2, false),
|
||||
new DelimitedLogStructureFinderFactory('\t', 2, false),
|
||||
new DelimitedLogStructureFinderFactory(';', 4, false),
|
||||
new DelimitedLogStructureFinderFactory('|', 5, true),
|
||||
new TextLogStructureFinderFactory()
|
||||
));
|
||||
|
||||
|
|
|
@ -21,12 +21,12 @@ import java.util.TreeMap;
|
|||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
final class LogStructureUtils {
|
||||
public final class LogStructureUtils {
|
||||
|
||||
static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
|
||||
static final String MAPPING_TYPE_SETTING = "type";
|
||||
static final String MAPPING_FORMAT_SETTING = "format";
|
||||
static final String MAPPING_PROPERTIES_SETTING = "properties";
|
||||
public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
|
||||
public static final String MAPPING_TYPE_SETTING = "type";
|
||||
public static final String MAPPING_FORMAT_SETTING = "format";
|
||||
public static final String MAPPING_PROPERTIES_SETTING = "properties";
|
||||
|
||||
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
|
||||
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$");
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
import org.supercsv.prefs.CsvPreference;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class PipeSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||
|
||||
private static final CsvPreference PIPE_PREFERENCE = new CsvPreference.Builder('"', '|', "\n").build();
|
||||
|
||||
/**
|
||||
* Rules are:
|
||||
* - The file must be valid pipe (<code>|</code>) separated values
|
||||
* - It must contain at least two complete records
|
||||
* - There must be at least five fields per record (otherwise files with coincidental
|
||||
* or no pipe characters could be treated as pipe separated)
|
||||
* - Every pipe separated value record except the last must have the same number of fields
|
||||
* The reason the last record is allowed to have fewer fields than the others is that
|
||||
* it could have been truncated when the file was sampled.
|
||||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 5, PIPE_PREFERENCE, "pipe separated values");
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
PIPE_PREFERENCE, true);
|
||||
}
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
import org.supercsv.prefs.CsvPreference;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class SemiColonSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||
|
||||
/**
|
||||
* Rules are:
|
||||
* - The file must be valid semi-colon separated values
|
||||
* - It must contain at least two complete records
|
||||
* - There must be at least four fields per record (otherwise files with coincidental
|
||||
* or no semi-colons could be treated as semi-colon separated)
|
||||
* - Every semi-colon separated value record except the last must have the same number of fields
|
||||
* The reason the last record is allowed to have fewer fields than the others is that
|
||||
* it could have been truncated when the file was sampled.
|
||||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 4,
|
||||
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, "semi-colon separated values");
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, false);
|
||||
}
|
||||
}
|
|
@ -23,13 +23,13 @@ public class TsvLogStructureFinderFactory implements LogStructureFinderFactory {
|
|||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
|
||||
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
CsvPreference.TAB_PREFERENCE, false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class CsvLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
|
||||
|
||||
// No need to check JSON or XML because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateFromSampleGivenCsv() {
|
||||
|
||||
assertTrue(factory.canCreateFromSample(explanation, CSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenTsv() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class DelimitedLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
|
||||
private LogStructureFinderFactory tsvFactory = new DelimitedLogStructureFinderFactory('\t', 2, false);
|
||||
private LogStructureFinderFactory semiColonDelimitedfactory = new DelimitedLogStructureFinderFactory(';', 4, false);
|
||||
private LogStructureFinderFactory pipeDelimitedFactory = new DelimitedLogStructureFinderFactory('|', 5, true);
|
||||
|
||||
// CSV - no need to check JSON or XML because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenCsv() {
|
||||
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, CSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenTsv() {
|
||||
|
||||
assertFalse(csvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertFalse(csvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(csvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenText() {
|
||||
|
||||
assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
|
||||
// TSV - no need to check JSON, XML or CSV because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateTsvFromSampleGivenTsv() {
|
||||
|
||||
assertTrue(tsvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateTsvFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertFalse(tsvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateTsvFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(tsvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateTsvFromSampleGivenText() {
|
||||
|
||||
assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
|
||||
// Semi-colon delimited - no need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertTrue(semiColonDelimitedfactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateSemiColonDelimitedFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateSemiColonDelimitedFromSampleGivenText() {
|
||||
|
||||
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
|
||||
// Pipe delimited - no need to check JSON, XML, CSV, TSV or semi-colon delimited
|
||||
// values because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertTrue(pipeDelimitedFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreatePipeDelimitedFromSampleGivenText() {
|
||||
|
||||
assertFalse(pipeDelimitedFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|
|
@ -12,27 +12,27 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinFieldwiseCompareRows;
|
||||
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinDistance;
|
||||
import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinFieldwiseCompareRows;
|
||||
import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinDistance;
|
||||
import static org.hamcrest.Matchers.arrayContaining;
|
||||
|
||||
public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase {
|
||||
public class DelimitedLogStructureFinderTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
|
||||
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
|
||||
|
||||
public void testCreateConfigsGivenCompleteCsv() throws Exception {
|
||||
String sample = "time,message\n" +
|
||||
"2018-05-17T13:41:23,hello\n" +
|
||||
"2018-05-17T13:41:32,hello again\n";
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -41,7 +41,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
}
|
||||
assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern());
|
||||
assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("time", "message"), structure.getInputFields());
|
||||
|
@ -55,15 +55,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"\"hello\n" +
|
||||
"world\",2018-05-17T13:41:23,1\n" +
|
||||
"\"hello again\n"; // note that this last record is truncated
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -72,7 +72,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
}
|
||||
assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern());
|
||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields());
|
||||
|
@ -88,15 +88,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
|
||||
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
|
||||
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -108,7 +108,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?",
|
||||
structure.getExcludeLinesPattern());
|
||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
|
||||
|
@ -126,15 +126,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
|
||||
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
|
||||
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -146,7 +146,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?",
|
||||
structure.getExcludeLinesPattern());
|
||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
|
||||
|
@ -161,15 +161,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" +
|
||||
"\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" +
|
||||
"\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n";
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -179,7 +179,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?",
|
||||
structure.getExcludeLinesPattern());
|
||||
assertNull(structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields());
|
||||
|
@ -195,8 +195,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
|
||||
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
|
||||
|
||||
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation,
|
||||
SeparatedValuesLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||
Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
|
||||
DelimitedLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||
|
||||
assertTrue(header.v1());
|
||||
assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype"));
|
||||
|
@ -208,8 +208,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
|
||||
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
|
||||
|
||||
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation,
|
||||
SeparatedValuesLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||
Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
|
||||
DelimitedLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||
|
||||
assertFalse(header.v1());
|
||||
assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4"));
|
||||
|
@ -251,43 +251,43 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
|
||||
public void testLineHasUnescapedQuote() {
|
||||
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
}
|
||||
|
||||
public void testRowContainsDuplicateNonEmptyValues() {
|
||||
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
|
||||
assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
|
||||
assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
|
||||
}
|
||||
}
|
|
@ -29,14 +29,14 @@ public class JsonLogStructureFinderFactoryTests extends LogStructureTestCase {
|
|||
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
||||
public void testCanCreateFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
public void testCanCreateFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
|
|
@ -29,7 +29,7 @@ public class JsonLogStructureFinderTests extends LogStructureTestCase {
|
|||
}
|
||||
assertNull(structure.getExcludeLinesPattern());
|
||||
assertNull(structure.getMultilineStartPattern());
|
||||
assertNull(structure.getSeparator());
|
||||
assertNull(structure.getDelimiter());
|
||||
assertNull(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertNull(structure.getGrokPattern());
|
||||
|
|
|
@ -61,7 +61,7 @@ public class LogStructureFinderManagerTests extends LogStructureTestCase {
|
|||
public void testMakeBestStructureGivenCsv() throws Exception {
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" +
|
||||
"2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()),
|
||||
instanceOf(SeparatedValuesLogStructureFinder.class));
|
||||
instanceOf(DelimitedLogStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenText() throws Exception {
|
||||
|
|
|
@ -34,14 +34,14 @@ public abstract class LogStructureTestCase extends ESTestCase {
|
|||
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
|
||||
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
|
||||
|
||||
protected static final String PIPE_SEPARATED_VALUES_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
|
||||
protected static final String PIPE_DELIMITED_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
|
||||
"listening on 0.0.0.0:9987, :::9987\n" +
|
||||
"2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " +
|
||||
"'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" +
|
||||
"2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " +
|
||||
"'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)";
|
||||
|
||||
protected static final String SEMI_COLON_SEPARATED_VALUES_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
|
||||
protected static final String SEMI_COLON_DELIMITED_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
|
||||
"\"timestamp\"\n" +
|
||||
"\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" +
|
||||
"\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" +
|
||||
|
|
|
@ -43,14 +43,12 @@ public class LogStructureTests extends AbstractXContentTestCase<LogStructure> {
|
|||
builder.setExcludeLinesPattern(randomAlphaOfLength(100));
|
||||
}
|
||||
|
||||
if (format.isSeparatedValues() || (format.supportsNesting() && randomBoolean())) {
|
||||
if (format == LogStructure.Format.DELIMITED || (format.supportsNesting() && randomBoolean())) {
|
||||
builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
|
||||
}
|
||||
if (format.isSeparatedValues()) {
|
||||
if (format == LogStructure.Format.DELIMITED) {
|
||||
builder.setHasHeaderRow(randomBoolean());
|
||||
if (rarely()) {
|
||||
builder.setSeparator(format.separator());
|
||||
}
|
||||
builder.setDelimiter(randomFrom(',', '\t', ';', '|'));
|
||||
}
|
||||
if (format.isSemiStructured()) {
|
||||
builder.setGrokPattern(randomAlphaOfLength(100));
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class PipeSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new PipeSeparatedValuesLogStructureFinderFactory();
|
||||
|
||||
// No need to check JSON, XML, CSV, TSV or semi-colon separated values because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
|
||||
assertTrue(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class SemiColonSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new SemiColonSeparatedValuesLogStructureFinderFactory();
|
||||
|
||||
// No need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
||||
|
||||
assertTrue(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|
|
@ -9,8 +9,8 @@ public class TextLogStructureFinderFactoryTests extends LogStructureTestCase {
|
|||
|
||||
private LogStructureFinderFactory factory = new TextLogStructureFinderFactory();
|
||||
|
||||
// No need to check JSON, XML, CSV, TSV, semi-colon separated values or pipe
|
||||
// separated values because they come earlier in the order we check formats
|
||||
// No need to check JSON, XML, CSV, TSV, semi-colon delimited values or pipe
|
||||
// delimited values because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ public class TextLogStructureFinderTests extends LogStructureTestCase {
|
|||
}
|
||||
assertNull(structure.getExcludeLinesPattern());
|
||||
assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertNull(structure.getSeparator());
|
||||
assertNull(structure.getDelimiter());
|
||||
assertNull(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals("\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern());
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class TsvLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new TsvLogStructureFinderFactory();
|
||||
|
||||
// No need to check JSON, XML or CSV because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateFromSampleGivenTsv() {
|
||||
|
||||
assertTrue(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|
|
@ -26,14 +26,14 @@ public class XmlLogStructureFinderFactoryTests extends LogStructureTestCase {
|
|||
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
||||
public void testCanCreateFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
public void testCanCreateFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
|
|
@ -29,7 +29,7 @@ public class XmlLogStructureFinderTests extends LogStructureTestCase {
|
|||
}
|
||||
assertNull(structure.getExcludeLinesPattern());
|
||||
assertEquals("^\\s*<log4j:event", structure.getMultilineStartPattern());
|
||||
assertNull(structure.getSeparator());
|
||||
assertNull(structure.getDelimiter());
|
||||
assertNull(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertNull(structure.getGrokPattern());
|
||||
|
|
Loading…
Reference in New Issue