diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactory.java deleted file mode 100644 index cb9e6537252..00000000000 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactory.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.logstructurefinder; - -import org.supercsv.prefs.CsvPreference; - -import java.io.IOException; -import java.util.List; - -public class CsvLogStructureFinderFactory implements LogStructureFinderFactory { - - /** - * Rules are: - * - The file must be valid CSV - * - It must contain at least two complete records - * - There must be at least two fields per record (otherwise files with no commas could be treated as CSV!) - * - Every CSV record except the last must have the same number of fields - * The reason the last record is allowed to have fewer fields than the others is that - * it could have been truncated when the file was sampled. - */ - @Override - public boolean canCreateFromSample(List explanation, String sample) { - return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.EXCEL_PREFERENCE, "CSV"); - } - - @Override - public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) - throws IOException { - return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, - CsvPreference.EXCEL_PREFERENCE, false); - } -} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinder.java similarity index 97% rename from x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinder.java rename to x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinder.java index fd9d34096b2..2f7bb41d0ba 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinder.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinder.java @@ -29,17 +29,16 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; -public class SeparatedValuesLogStructureFinder implements LogStructureFinder { +public class DelimitedLogStructureFinder implements LogStructureFinder { private static final int MAX_LEVENSHTEIN_COMPARISONS = 100; private final List sampleMessages; private final LogStructure structure; - static SeparatedValuesLogStructureFinder makeSeparatedValuesLogStructureFinder(List explanation, String sample, - String charsetName, Boolean hasByteOrderMarker, - CsvPreference csvPreference, boolean trimFields) - throws IOException { + static DelimitedLogStructureFinder makeDelimitedLogStructureFinder(List explanation, String sample, String charsetName, + Boolean hasByteOrderMarker, CsvPreference csvPreference, + boolean trimFields) throws IOException { Tuple>, List> parsed = readRows(sample, csvPreference); List> rows = parsed.v1(); @@ -73,13 +72,14 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder { String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n")); char delimiter = (char) csvPreference.getDelimiterChar(); - LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.fromSeparator(delimiter)) + LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.DELIMITED) .setCharset(charsetName) .setHasByteOrderMarker(hasByteOrderMarker) .setSampleStart(preamble) .setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1)) .setNumMessagesAnalyzed(sampleRecords.size()) .setHasHeaderRow(isHeaderInFile) + .setDelimiter(delimiter) .setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList())); if (trimFields) { @@ -131,10 +131,10 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder { .setExplanation(explanation) .build(); - return new SeparatedValuesLogStructureFinder(sampleMessages, structure); + return new DelimitedLogStructureFinder(sampleMessages, structure); } - private SeparatedValuesLogStructureFinder(List sampleMessages, LogStructure structure) { + private DelimitedLogStructureFinder(List sampleMessages, LogStructure structure) { this.sampleMessages = Collections.unmodifiableList(sampleMessages); this.structure = structure; } diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderFactory.java new file mode 100644 index 00000000000..3e4c3ea225c --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderFactory.java @@ -0,0 +1,57 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.supercsv.prefs.CsvPreference; + +import java.io.IOException; +import java.util.List; +import java.util.Locale; + +public class DelimitedLogStructureFinderFactory implements LogStructureFinderFactory { + + private final CsvPreference csvPreference; + private final int minFieldsPerRow; + private final boolean trimFields; + + DelimitedLogStructureFinderFactory(char delimiter, int minFieldsPerRow, boolean trimFields) { + csvPreference = new CsvPreference.Builder('"', delimiter, "\n").build(); + this.minFieldsPerRow = minFieldsPerRow; + this.trimFields = trimFields; + } + + /** + * Rules are: + * - It must contain at least two complete records + * - There must be a minimum number of fields per record (otherwise files with no commas could be treated as CSV!) + * - Every record except the last must have the same number of fields + * The reason the last record is allowed to have fewer fields than the others is that + * it could have been truncated when the file was sampled. + */ + @Override + public boolean canCreateFromSample(List explanation, String sample) { + String formatName; + switch ((char) csvPreference.getDelimiterChar()) { + case ',': + formatName = "CSV"; + break; + case '\t': + formatName = "TSV"; + break; + default: + formatName = Character.getName(csvPreference.getDelimiterChar()).toLowerCase(Locale.ROOT) + " delimited values"; + break; + } + return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, minFieldsPerRow, csvPreference, formatName); + } + + @Override + public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws IOException { + return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, + csvPreference, trimFields); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java index 64a00d20899..ea8fe37e62f 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java @@ -27,37 +27,14 @@ public class LogStructure implements ToXContentObject { public enum Format { - JSON, XML, CSV, TSV, SEMI_COLON_SEPARATED_VALUES, PIPE_SEPARATED_VALUES, SEMI_STRUCTURED_TEXT; - - public Character separator() { - switch (this) { - case JSON: - case XML: - return null; - case CSV: - return ','; - case TSV: - return '\t'; - case SEMI_COLON_SEPARATED_VALUES: - return ';'; - case PIPE_SEPARATED_VALUES: - return '|'; - case SEMI_STRUCTURED_TEXT: - return null; - default: - throw new IllegalStateException("enum value [" + this + "] missing from switch."); - } - } + JSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT; public boolean supportsNesting() { switch (this) { case JSON: case XML: return true; - case CSV: - case TSV: - case SEMI_COLON_SEPARATED_VALUES: - case PIPE_SEPARATED_VALUES: + case DELIMITED: case SEMI_STRUCTURED_TEXT: return false; default: @@ -69,10 +46,7 @@ public class LogStructure implements ToXContentObject { switch (this) { case JSON: case XML: - case CSV: - case TSV: - case SEMI_COLON_SEPARATED_VALUES: - case PIPE_SEPARATED_VALUES: + case DELIMITED: return true; case SEMI_STRUCTURED_TEXT: return false; @@ -85,10 +59,7 @@ public class LogStructure implements ToXContentObject { switch (this) { case JSON: case XML: - case CSV: - case TSV: - case SEMI_COLON_SEPARATED_VALUES: - case PIPE_SEPARATED_VALUES: + case DELIMITED: return false; case SEMI_STRUCTURED_TEXT: return true; @@ -97,38 +68,6 @@ public class LogStructure implements ToXContentObject { } } - public boolean isSeparatedValues() { - switch (this) { - case JSON: - case XML: - return false; - case CSV: - case TSV: - case SEMI_COLON_SEPARATED_VALUES: - case PIPE_SEPARATED_VALUES: - return true; - case SEMI_STRUCTURED_TEXT: - return false; - default: - throw new IllegalStateException("enum value [" + this + "] missing from switch."); - } - } - - public static Format fromSeparator(char separator) { - switch (separator) { - case ',': - return CSV; - case '\t': - return TSV; - case ';': - return SEMI_COLON_SEPARATED_VALUES; - case '|': - return PIPE_SEPARATED_VALUES; - default: - throw new IllegalArgumentException("No known format has separator [" + separator + "]"); - } - } - public static Format fromString(String name) { return valueOf(name.trim().toUpperCase(Locale.ROOT)); } @@ -149,7 +88,7 @@ public class LogStructure implements ToXContentObject { static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern"); static final ParseField INPUT_FIELDS = new ParseField("input_fields"); static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row"); - static final ParseField SEPARATOR = new ParseField("separator"); + static final ParseField DELIMITER = new ParseField("delimiter"); static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields"); static final ParseField GROK_PATTERN = new ParseField("grok_pattern"); static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field"); @@ -171,7 +110,7 @@ public class LogStructure implements ToXContentObject { PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN); PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS); PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW); - PARSER.declareString((p, c) -> p.setSeparator(c.charAt(0)), SEPARATOR); + PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER); PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS); PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN); PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD); @@ -191,7 +130,7 @@ public class LogStructure implements ToXContentObject { private final String excludeLinesPattern; private final List inputFields; private final Boolean hasHeaderRow; - private final Character separator; + private final Character delimiter; private final Boolean shouldTrimFields; private final String grokPattern; private final List timestampFormats; @@ -202,7 +141,7 @@ public class LogStructure implements ToXContentObject { public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker, Format format, String multilineStartPattern, String excludeLinesPattern, List inputFields, - Boolean hasHeaderRow, Character separator, Boolean shouldTrimFields, String grokPattern, String timestampField, + Boolean hasHeaderRow, Character delimiter, Boolean shouldTrimFields, String grokPattern, String timestampField, List timestampFormats, boolean needClientTimezone, Map mappings, List explanation) { @@ -216,7 +155,7 @@ public class LogStructure implements ToXContentObject { this.excludeLinesPattern = excludeLinesPattern; this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields)); this.hasHeaderRow = hasHeaderRow; - this.separator = separator; + this.delimiter = delimiter; this.shouldTrimFields = shouldTrimFields; this.grokPattern = grokPattern; this.timestampField = timestampField; @@ -266,8 +205,8 @@ public class LogStructure implements ToXContentObject { return hasHeaderRow; } - public Character getSeparator() { - return separator; + public Character getDelimiter() { + return delimiter; } public Boolean getShouldTrimFields() { @@ -322,8 +261,8 @@ public class LogStructure implements ToXContentObject { if (hasHeaderRow != null) { builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue()); } - if (separator != null) { - builder.field(SEPARATOR.getPreferredName(), String.valueOf(separator)); + if (delimiter != null) { + builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter)); } if (shouldTrimFields != null) { builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue()); @@ -349,7 +288,7 @@ public class LogStructure implements ToXContentObject { public int hashCode() { return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, - multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, timestampField, + multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, timestampField, timestampFormats, needClientTimezone, mappings, explanation); } @@ -376,7 +315,7 @@ public class LogStructure implements ToXContentObject { Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) && Objects.equals(this.inputFields, that.inputFields) && Objects.equals(this.hasHeaderRow, that.hasHeaderRow) && - Objects.equals(this.separator, that.separator) && + Objects.equals(this.delimiter, that.delimiter) && Objects.equals(this.shouldTrimFields, that.shouldTrimFields) && Objects.equals(this.grokPattern, that.grokPattern) && Objects.equals(this.timestampField, that.timestampField) && @@ -397,7 +336,7 @@ public class LogStructure implements ToXContentObject { private String excludeLinesPattern; private List inputFields; private Boolean hasHeaderRow; - private Character separator; + private Character delimiter; private Boolean shouldTrimFields; private String grokPattern; private String timestampField; @@ -441,7 +380,6 @@ public class LogStructure implements ToXContentObject { public Builder setFormat(Format format) { this.format = Objects.requireNonNull(format); - this.separator = format.separator(); return this; } @@ -465,13 +403,13 @@ public class LogStructure implements ToXContentObject { return this; } - public Builder setShouldTrimFields(Boolean shouldTrimFields) { - this.shouldTrimFields = shouldTrimFields; + public Builder setDelimiter(Character delimiter) { + this.delimiter = delimiter; return this; } - public Builder setSeparator(Character separator) { - this.separator = separator; + public Builder setShouldTrimFields(Boolean shouldTrimFields) { + this.shouldTrimFields = shouldTrimFields; return this; } @@ -542,28 +480,22 @@ public class LogStructure implements ToXContentObject { if (hasHeaderRow != null) { throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures."); } - if (separator != null) { - throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures."); + if (delimiter != null) { + throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures."); } if (grokPattern != null) { throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures."); } break; - case CSV: - case TSV: - case SEMI_COLON_SEPARATED_VALUES: - case PIPE_SEPARATED_VALUES: + case DELIMITED: if (inputFields == null || inputFields.isEmpty()) { throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures."); } if (hasHeaderRow == null) { throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures."); } - Character expectedSeparator = format.separator(); - assert expectedSeparator != null; - if (expectedSeparator.equals(separator) == false) { - throw new IllegalArgumentException("Separator must be [" + expectedSeparator + "] for [" + format + - "] structures."); + if (delimiter == null) { + throw new IllegalArgumentException("Delimiter must be specified for [" + format + "] structures."); } if (grokPattern != null) { throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures."); @@ -576,8 +508,8 @@ public class LogStructure implements ToXContentObject { if (hasHeaderRow != null) { throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures."); } - if (separator != null) { - throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures."); + if (delimiter != null) { + throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures."); } if (shouldTrimFields != null) { throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures."); @@ -607,7 +539,7 @@ public class LogStructure implements ToXContentObject { } return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, - multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, + multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, timestampField, timestampFormats, needClientTimezone, mappings, explanation); } } diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java index a8fd9d7eb89..e747a588dfd 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java @@ -69,10 +69,10 @@ public final class LogStructureFinderManager { new JsonLogStructureFinderFactory(), new XmlLogStructureFinderFactory(), // ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV - new CsvLogStructureFinderFactory(), - new TsvLogStructureFinderFactory(), - new SemiColonSeparatedValuesLogStructureFinderFactory(), - new PipeSeparatedValuesLogStructureFinderFactory(), + new DelimitedLogStructureFinderFactory(',', 2, false), + new DelimitedLogStructureFinderFactory('\t', 2, false), + new DelimitedLogStructureFinderFactory(';', 4, false), + new DelimitedLogStructureFinderFactory('|', 5, true), new TextLogStructureFinderFactory() )); diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java index b1dfee22ee6..71a68c39991 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java @@ -21,12 +21,12 @@ import java.util.TreeMap; import java.util.stream.Collectors; import java.util.stream.Stream; -final class LogStructureUtils { +public final class LogStructureUtils { - static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp"; - static final String MAPPING_TYPE_SETTING = "type"; - static final String MAPPING_FORMAT_SETTING = "format"; - static final String MAPPING_PROPERTIES_SETTING = "properties"; + public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp"; + public static final String MAPPING_TYPE_SETTING = "type"; + public static final String MAPPING_FORMAT_SETTING = "format"; + public static final String MAPPING_PROPERTIES_SETTING = "properties"; // NUMBER Grok pattern doesn't support scientific notation, so we extend it private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$"); diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactory.java deleted file mode 100644 index 085599de847..00000000000 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.logstructurefinder; - -import org.supercsv.prefs.CsvPreference; - -import java.io.IOException; -import java.util.List; - -public class PipeSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory { - - private static final CsvPreference PIPE_PREFERENCE = new CsvPreference.Builder('"', '|', "\n").build(); - - /** - * Rules are: - * - The file must be valid pipe (|) separated values - * - It must contain at least two complete records - * - There must be at least five fields per record (otherwise files with coincidental - * or no pipe characters could be treated as pipe separated) - * - Every pipe separated value record except the last must have the same number of fields - * The reason the last record is allowed to have fewer fields than the others is that - * it could have been truncated when the file was sampled. - */ - @Override - public boolean canCreateFromSample(List explanation, String sample) { - return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 5, PIPE_PREFERENCE, "pipe separated values"); - } - - @Override - public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) - throws IOException { - return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, - PIPE_PREFERENCE, true); - } -} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactory.java deleted file mode 100644 index e0e80fa7465..00000000000 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactory.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.logstructurefinder; - -import org.supercsv.prefs.CsvPreference; - -import java.io.IOException; -import java.util.List; - -public class SemiColonSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory { - - /** - * Rules are: - * - The file must be valid semi-colon separated values - * - It must contain at least two complete records - * - There must be at least four fields per record (otherwise files with coincidental - * or no semi-colons could be treated as semi-colon separated) - * - Every semi-colon separated value record except the last must have the same number of fields - * The reason the last record is allowed to have fewer fields than the others is that - * it could have been truncated when the file was sampled. - */ - @Override - public boolean canCreateFromSample(List explanation, String sample) { - return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 4, - CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, "semi-colon separated values"); - } - - @Override - public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) - throws IOException { - return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, - CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, false); - } -} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactory.java index 733b32346fb..1b53a33f31e 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactory.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactory.java @@ -23,13 +23,13 @@ public class TsvLogStructureFinderFactory implements LogStructureFinderFactory { */ @Override public boolean canCreateFromSample(List explanation, String sample) { - return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV"); + return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV"); } @Override public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) throws IOException { - return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, + return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, CsvPreference.TAB_PREFERENCE, false); } } diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactoryTests.java deleted file mode 100644 index f53ee008d69..00000000000 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactoryTests.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.logstructurefinder; - -public class CsvLogStructureFinderFactoryTests extends LogStructureTestCase { - - private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory(); - - // No need to check JSON or XML because they come earlier in the order we check formats - - public void testCanCreateFromSampleGivenCsv() { - - assertTrue(factory.canCreateFromSample(explanation, CSV_SAMPLE)); - } - - public void testCanCreateFromSampleGivenTsv() { - - assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE)); - } - - public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { - - assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); - } - - public void testCanCreateFromSampleGivenPipeSeparatedValues() { - - assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); - } - - public void testCanCreateFromSampleGivenText() { - - assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); - } -} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderFactoryTests.java new file mode 100644 index 00000000000..d9eadbc8f0f --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderFactoryTests.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +public class DelimitedLogStructureFinderFactoryTests extends LogStructureTestCase { + + private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false); + private LogStructureFinderFactory tsvFactory = new DelimitedLogStructureFinderFactory('\t', 2, false); + private LogStructureFinderFactory semiColonDelimitedfactory = new DelimitedLogStructureFinderFactory(';', 4, false); + private LogStructureFinderFactory pipeDelimitedFactory = new DelimitedLogStructureFinderFactory('|', 5, true); + + // CSV - no need to check JSON or XML because they come earlier in the order we check formats + + public void testCanCreateCsvFromSampleGivenCsv() { + + assertTrue(csvFactory.canCreateFromSample(explanation, CSV_SAMPLE)); + } + + public void testCanCreateCsvFromSampleGivenTsv() { + + assertFalse(csvFactory.canCreateFromSample(explanation, TSV_SAMPLE)); + } + + public void testCanCreateCsvFromSampleGivenSemiColonDelimited() { + + assertFalse(csvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE)); + } + + public void testCanCreateCsvFromSampleGivenPipeDelimited() { + + assertFalse(csvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE)); + } + + public void testCanCreateCsvFromSampleGivenText() { + + assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } + + // TSV - no need to check JSON, XML or CSV because they come earlier in the order we check formats + + public void testCanCreateTsvFromSampleGivenTsv() { + + assertTrue(tsvFactory.canCreateFromSample(explanation, TSV_SAMPLE)); + } + + public void testCanCreateTsvFromSampleGivenSemiColonDelimited() { + + assertFalse(tsvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE)); + } + + public void testCanCreateTsvFromSampleGivenPipeDelimited() { + + assertFalse(tsvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE)); + } + + public void testCanCreateTsvFromSampleGivenText() { + + assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } + + // Semi-colon delimited - no need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats + + public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() { + + assertTrue(semiColonDelimitedfactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE)); + } + + public void testCanCreateSemiColonDelimitedFromSampleGivenPipeDelimited() { + + assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE)); + } + + public void testCanCreateSemiColonDelimitedFromSampleGivenText() { + + assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } + + // Pipe delimited - no need to check JSON, XML, CSV, TSV or semi-colon delimited + // values because they come earlier in the order we check formats + + public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() { + + assertTrue(pipeDelimitedFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE)); + } + + public void testCanCreatePipeDelimitedFromSampleGivenText() { + + assertFalse(pipeDelimitedFactory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderTests.java similarity index 65% rename from x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinderTests.java rename to x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderTests.java index b62832a0a19..57c297cf8d5 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinderTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinderTests.java @@ -12,27 +12,27 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collections; -import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinFieldwiseCompareRows; -import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinDistance; +import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinFieldwiseCompareRows; +import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinDistance; import static org.hamcrest.Matchers.arrayContaining; -public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase { +public class DelimitedLogStructureFinderTests extends LogStructureTestCase { - private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory(); + private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false); public void testCreateConfigsGivenCompleteCsv() throws Exception { String sample = "time,message\n" + "2018-05-17T13:41:23,hello\n" + "2018-05-17T13:41:32,hello again\n"; - assertTrue(factory.canCreateFromSample(explanation, sample)); + assertTrue(csvFactory.canCreateFromSample(explanation, sample)); String charset = randomFrom(POSSIBLE_CHARSETS); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); - LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructure structure = structureFinder.getStructure(); - assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(LogStructure.Format.DELIMITED, structure.getFormat()); assertEquals(charset, structure.getCharset()); if (hasByteOrderMarker == null) { assertNull(structure.getHasByteOrderMarker()); @@ -41,7 +41,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase } assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern()); assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); - assertEquals(Character.valueOf(','), structure.getSeparator()); + assertEquals(Character.valueOf(','), structure.getDelimiter()); assertTrue(structure.getHasHeaderRow()); assertNull(structure.getShouldTrimFields()); assertEquals(Arrays.asList("time", "message"), structure.getInputFields()); @@ -55,15 +55,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase "\"hello\n" + "world\",2018-05-17T13:41:23,1\n" + "\"hello again\n"; // note that this last record is truncated - assertTrue(factory.canCreateFromSample(explanation, sample)); + assertTrue(csvFactory.canCreateFromSample(explanation, sample)); String charset = randomFrom(POSSIBLE_CHARSETS); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); - LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructure structure = structureFinder.getStructure(); - assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(LogStructure.Format.DELIMITED, structure.getFormat()); assertEquals(charset, structure.getCharset()); if (hasByteOrderMarker == null) { assertNull(structure.getHasByteOrderMarker()); @@ -72,7 +72,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase } assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern()); assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); - assertEquals(Character.valueOf(','), structure.getSeparator()); + assertEquals(Character.valueOf(','), structure.getDelimiter()); assertTrue(structure.getHasHeaderRow()); assertNull(structure.getShouldTrimFields()); assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields()); @@ -88,15 +88,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" + "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" + "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n"; - assertTrue(factory.canCreateFromSample(explanation, sample)); + assertTrue(csvFactory.canCreateFromSample(explanation, sample)); String charset = randomFrom(POSSIBLE_CHARSETS); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); - LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructure structure = structureFinder.getStructure(); - assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(LogStructure.Format.DELIMITED, structure.getFormat()); assertEquals(charset, structure.getCharset()); if (hasByteOrderMarker == null) { assertNull(structure.getHasByteOrderMarker()); @@ -108,7 +108,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase "\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?", structure.getExcludeLinesPattern()); assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); - assertEquals(Character.valueOf(','), structure.getSeparator()); + assertEquals(Character.valueOf(','), structure.getDelimiter()); assertTrue(structure.getHasHeaderRow()); assertNull(structure.getShouldTrimFields()); assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance", @@ -126,15 +126,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" + "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" + "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n"; - assertTrue(factory.canCreateFromSample(explanation, sample)); + assertTrue(csvFactory.canCreateFromSample(explanation, sample)); String charset = randomFrom(POSSIBLE_CHARSETS); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); - LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructure structure = structureFinder.getStructure(); - assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(LogStructure.Format.DELIMITED, structure.getFormat()); assertEquals(charset, structure.getCharset()); if (hasByteOrderMarker == null) { assertNull(structure.getHasByteOrderMarker()); @@ -146,7 +146,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase "\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?", structure.getExcludeLinesPattern()); assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); - assertEquals(Character.valueOf(','), structure.getSeparator()); + assertEquals(Character.valueOf(','), structure.getDelimiter()); assertTrue(structure.getHasHeaderRow()); assertNull(structure.getShouldTrimFields()); assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance", @@ -161,15 +161,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" + "\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" + "\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n"; - assertTrue(factory.canCreateFromSample(explanation, sample)); + assertTrue(csvFactory.canCreateFromSample(explanation, sample)); String charset = randomFrom(POSSIBLE_CHARSETS); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); - LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructure structure = structureFinder.getStructure(); - assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(LogStructure.Format.DELIMITED, structure.getFormat()); assertEquals(charset, structure.getCharset()); if (hasByteOrderMarker == null) { assertNull(structure.getHasByteOrderMarker()); @@ -179,7 +179,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?", structure.getExcludeLinesPattern()); assertNull(structure.getMultilineStartPattern()); - assertEquals(Character.valueOf(','), structure.getSeparator()); + assertEquals(Character.valueOf(','), structure.getDelimiter()); assertTrue(structure.getHasHeaderRow()); assertNull(structure.getShouldTrimFields()); assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields()); @@ -195,8 +195,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase "2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" + "2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n"; - Tuple header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation, - SeparatedValuesLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1()); + Tuple header = DelimitedLogStructureFinder.findHeaderFromSample(explanation, + DelimitedLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1()); assertTrue(header.v1()); assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype")); @@ -208,8 +208,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase "2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" + "2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n"; - Tuple header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation, - SeparatedValuesLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1()); + Tuple header = DelimitedLogStructureFinder.findHeaderFromSample(explanation, + DelimitedLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1()); assertFalse(header.v1()); assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4")); @@ -251,43 +251,43 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase public void testLineHasUnescapedQuote() { - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE)); - assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE)); - assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE)); + assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE)); + assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE)); - assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE)); - assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE)); - assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE)); + assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE)); } public void testRowContainsDuplicateNonEmptyValues() { - assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a"))); - assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList(""))); - assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c"))); - assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a"))); - assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b"))); - assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", ""))); - assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", ""))); + assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a"))); + assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList(""))); + assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c"))); + assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a"))); + assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b"))); + assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", ""))); + assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", ""))); } } diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactoryTests.java index 39ef3b9eedb..cdbffa8259e 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactoryTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactoryTests.java @@ -29,14 +29,14 @@ public class JsonLogStructureFinderFactoryTests extends LogStructureTestCase { assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE)); } - public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { + public void testCanCreateFromSampleGivenSemiColonDelimited() { - assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); + assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE)); } - public void testCanCreateFromSampleGivenPipeSeparatedValues() { + public void testCanCreateFromSampleGivenPipeDelimited() { - assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); + assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE)); } public void testCanCreateFromSampleGivenText() { diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderTests.java index 2f727747bbf..917054919dd 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderTests.java @@ -29,7 +29,7 @@ public class JsonLogStructureFinderTests extends LogStructureTestCase { } assertNull(structure.getExcludeLinesPattern()); assertNull(structure.getMultilineStartPattern()); - assertNull(structure.getSeparator()); + assertNull(structure.getDelimiter()); assertNull(structure.getHasHeaderRow()); assertNull(structure.getShouldTrimFields()); assertNull(structure.getGrokPattern()); diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManagerTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManagerTests.java index 1f8691de8cf..520a55510c7 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManagerTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManagerTests.java @@ -61,7 +61,7 @@ public class LogStructureFinderManagerTests extends LogStructureTestCase { public void testMakeBestStructureGivenCsv() throws Exception { assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" + "2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()), - instanceOf(SeparatedValuesLogStructureFinder.class)); + instanceOf(DelimitedLogStructureFinder.class)); } public void testMakeBestStructureGivenText() throws Exception { diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTestCase.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTestCase.java index 5f9a87ef2a7..6b718fef6c7 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTestCase.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTestCase.java @@ -34,14 +34,14 @@ public abstract class LogStructureTestCase extends ESTestCase { "\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," + "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n"; - protected static final String PIPE_SEPARATED_VALUES_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" + + protected static final String PIPE_DELIMITED_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" + "listening on 0.0.0.0:9987, :::9987\n" + "2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " + "'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" + "2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " + "'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)"; - protected static final String SEMI_COLON_SEPARATED_VALUES_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" + + protected static final String SEMI_COLON_DELIMITED_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" + "\"timestamp\"\n" + "\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" + "\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" + diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java index 738928ed28a..302946dcaa8 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java @@ -43,14 +43,12 @@ public class LogStructureTests extends AbstractXContentTestCase { builder.setExcludeLinesPattern(randomAlphaOfLength(100)); } - if (format.isSeparatedValues() || (format.supportsNesting() && randomBoolean())) { + if (format == LogStructure.Format.DELIMITED || (format.supportsNesting() && randomBoolean())) { builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false))); } - if (format.isSeparatedValues()) { + if (format == LogStructure.Format.DELIMITED) { builder.setHasHeaderRow(randomBoolean()); - if (rarely()) { - builder.setSeparator(format.separator()); - } + builder.setDelimiter(randomFrom(',', '\t', ';', '|')); } if (format.isSemiStructured()) { builder.setGrokPattern(randomAlphaOfLength(100)); diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactoryTests.java deleted file mode 100644 index 3fd2fb7840a..00000000000 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactoryTests.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.logstructurefinder; - -public class PipeSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase { - - private LogStructureFinderFactory factory = new PipeSeparatedValuesLogStructureFinderFactory(); - - // No need to check JSON, XML, CSV, TSV or semi-colon separated values because they come earlier in the order we check formats - - public void testCanCreateFromSampleGivenPipeSeparatedValues() { - - assertTrue(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); - } - - public void testCanCreateFromSampleGivenText() { - - assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); - } -} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactoryTests.java deleted file mode 100644 index 64dad7e078c..00000000000 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactoryTests.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.logstructurefinder; - -public class SemiColonSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase { - - private LogStructureFinderFactory factory = new SemiColonSeparatedValuesLogStructureFinderFactory(); - - // No need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats - - public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { - - assertTrue(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); - } - - public void testCanCreateFromSampleGivenPipeSeparatedValues() { - - assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); - } - - public void testCanCreateFromSampleGivenText() { - - assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); - } -} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactoryTests.java index 267ce375d6e..c1b30cc7496 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactoryTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactoryTests.java @@ -9,8 +9,8 @@ public class TextLogStructureFinderFactoryTests extends LogStructureTestCase { private LogStructureFinderFactory factory = new TextLogStructureFinderFactory(); - // No need to check JSON, XML, CSV, TSV, semi-colon separated values or pipe - // separated values because they come earlier in the order we check formats + // No need to check JSON, XML, CSV, TSV, semi-colon delimited values or pipe + // delimited values because they come earlier in the order we check formats public void testCanCreateFromSampleGivenText() { diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderTests.java index 7c6a58bb683..c9e153a82c4 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderTests.java @@ -34,7 +34,7 @@ public class TextLogStructureFinderTests extends LogStructureTestCase { } assertNull(structure.getExcludeLinesPattern()); assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); - assertNull(structure.getSeparator()); + assertNull(structure.getDelimiter()); assertNull(structure.getHasHeaderRow()); assertNull(structure.getShouldTrimFields()); assertEquals("\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern()); diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactoryTests.java deleted file mode 100644 index 1c8acc14d32..00000000000 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactoryTests.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.logstructurefinder; - -public class TsvLogStructureFinderFactoryTests extends LogStructureTestCase { - - private LogStructureFinderFactory factory = new TsvLogStructureFinderFactory(); - - // No need to check JSON, XML or CSV because they come earlier in the order we check formats - - public void testCanCreateFromSampleGivenTsv() { - - assertTrue(factory.canCreateFromSample(explanation, TSV_SAMPLE)); - } - - public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { - - assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); - } - - public void testCanCreateFromSampleGivenPipeSeparatedValues() { - - assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); - } - - public void testCanCreateFromSampleGivenText() { - - assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); - } -} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactoryTests.java index 27eb4ede040..b6dc3a56f1d 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactoryTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactoryTests.java @@ -26,14 +26,14 @@ public class XmlLogStructureFinderFactoryTests extends LogStructureTestCase { assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE)); } - public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { + public void testCanCreateFromSampleGivenSemiColonDelimited() { - assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); + assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE)); } - public void testCanCreateFromSampleGivenPipeSeparatedValues() { + public void testCanCreateFromSampleGivenPipeDelimited() { - assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); + assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE)); } public void testCanCreateFromSampleGivenText() { diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderTests.java index 0d04df152ef..de653d7bcd0 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderTests.java @@ -29,7 +29,7 @@ public class XmlLogStructureFinderTests extends LogStructureTestCase { } assertNull(structure.getExcludeLinesPattern()); assertEquals("^\\s*