[ML] Add earliest and latest timestamps to field stats (#42890)

This change adds the earliest and latest timestamps into
the field stats for fields of type "date" in the output of
the ML find_file_structure endpoint.  This will enable the
cards for date fields in the file data visualizer in the UI
to be made to look more similar to the cards for date
fields in the index data visualizer in the UI.
This commit is contained in:
David Roberts 2019-06-06 08:56:57 +01:00
parent 280a2c9401
commit b202a59f88
16 changed files with 365 additions and 100 deletions

View File

@ -38,12 +38,14 @@ public class FieldStats implements ToXContentObject {
public static final ParseField MAX_VALUE = new ParseField("max_value");
public static final ParseField MEAN_VALUE = new ParseField("mean_value");
public static final ParseField MEDIAN_VALUE = new ParseField("median_value");
public static final ParseField EARLIEST = new ParseField("earliest");
public static final ParseField LATEST = new ParseField("latest");
public static final ParseField TOP_HITS = new ParseField("top_hits");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", true,
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
(List<Map<String, Object>>) a[6]));
(String) a[6], (String) a[7], (List<Map<String, Object>>) a[8]));
static {
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
@ -52,6 +54,8 @@ public class FieldStats implements ToXContentObject {
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), EARLIEST);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), LATEST);
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
}
@ -61,16 +65,20 @@ public class FieldStats implements ToXContentObject {
private final Double maxValue;
private final Double meanValue;
private final Double medianValue;
private final String earliestTimestamp;
private final String latestTimestamp;
private final List<Map<String, Object>> topHits;
FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
List<Map<String, Object>> topHits) {
String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this.count = count;
this.cardinality = cardinality;
this.minValue = minValue;
this.maxValue = maxValue;
this.meanValue = meanValue;
this.medianValue = medianValue;
this.earliestTimestamp = earliestTimestamp;
this.latestTimestamp = latestTimestamp;
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
}
@ -98,6 +106,14 @@ public class FieldStats implements ToXContentObject {
return medianValue;
}
public String getEarliestTimestamp() {
return earliestTimestamp;
}
public String getLatestTimestamp() {
return latestTimestamp;
}
public List<Map<String, Object>> getTopHits() {
return topHits;
}
@ -120,6 +136,12 @@ public class FieldStats implements ToXContentObject {
if (medianValue != null) {
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
}
if (earliestTimestamp != null) {
builder.field(EARLIEST.getPreferredName(), earliestTimestamp);
}
if (latestTimestamp != null) {
builder.field(LATEST.getPreferredName(), latestTimestamp);
}
if (topHits.isEmpty() == false) {
builder.field(TOP_HITS.getPreferredName(), topHits);
}
@ -140,7 +162,7 @@ public class FieldStats implements ToXContentObject {
@Override
public int hashCode() {
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}
@Override
@ -161,6 +183,8 @@ public class FieldStats implements ToXContentObject {
Objects.equals(this.maxValue, that.maxValue) &&
Objects.equals(this.meanValue, that.meanValue) &&
Objects.equals(this.medianValue, that.medianValue) &&
Objects.equals(this.earliestTimestamp, that.earliestTimestamp) &&
Objects.equals(this.latestTimestamp, that.latestTimestamp) &&
Objects.equals(this.topHits, that.topHits);
}
}

View File

@ -43,6 +43,8 @@ public class FieldStatsTests extends AbstractXContentTestCase<FieldStats> {
Double maxValue = null;
Double meanValue = null;
Double medianValue = null;
String earliestTimestamp = null;
String latestTimestamp = null;
boolean isMetric = randomBoolean();
if (isMetric) {
if (randomBoolean()) {
@ -54,6 +56,12 @@ public class FieldStatsTests extends AbstractXContentTestCase<FieldStats> {
}
meanValue = randomDouble();
medianValue = randomDouble();
} else {
boolean isDate = randomBoolean();
if (isDate) {
earliestTimestamp = randomAlphaOfLength(20);
latestTimestamp = randomAlphaOfLength(20);
}
}
List<Map<String, Object>> topHits = new ArrayList<>();
@ -68,7 +76,7 @@ public class FieldStatsTests extends AbstractXContentTestCase<FieldStats> {
topHits.add(topHit);
}
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}
@Override

View File

@ -445,6 +445,8 @@ If the request does not encounter errors, you receive the following result:
"release_date" : {
"count" : 24,
"cardinality" : 20,
"earliest" : "1932-06-01",
"latest" : "2011-06-02",
"top_hits" : [
{
"value" : "1985-06-01",
@ -1152,6 +1154,8 @@ If the request does not encounter errors, you receive the following result:
"tpep_dropoff_datetime" : {
"count" : 19998,
"cardinality" : 9066,
"earliest" : "2018-05-31 06:18:15",
"latest" : "2018-06-02 02:25:44",
"top_hits" : [
{
"value" : "2018-06-01 01:12:12",
@ -1198,6 +1202,8 @@ If the request does not encounter errors, you receive the following result:
"tpep_pickup_datetime" : {
"count" : 19998,
"cardinality" : 8760,
"earliest" : "2018-05-31 06:08:31",
"latest" : "2018-06-02 01:21:21",
"top_hits" : [
{
"value" : "2018-06-01 00:01:23",
@ -1457,6 +1463,8 @@ this:
"timestamp" : {
"count" : 53,
"cardinality" : 28,
"earliest" : "2018-09-27T14:39:28,518",
"latest" : "2018-09-27T14:39:37,012",
"top_hits" : [
{
"value" : "2018-09-27T14:39:29,859",
@ -1719,6 +1727,8 @@ this:
"timestamp" : {
"count" : 53,
"cardinality" : 28,
"earliest" : "2018-09-27T14:39:28,518",
"latest" : "2018-09-27T14:39:37,012",
"top_hits" : [
{
"value" : "2018-09-27T14:39:29,859",

View File

@ -5,7 +5,9 @@
*/
package org.elasticsearch.xpack.core.ml.filestructurefinder;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
@ -27,12 +29,14 @@ public class FieldStats implements ToXContentObject, Writeable {
static final ParseField MAX_VALUE = new ParseField("max_value");
static final ParseField MEAN_VALUE = new ParseField("mean_value");
static final ParseField MEDIAN_VALUE = new ParseField("median_value");
static final ParseField EARLIEST = new ParseField("earliest");
static final ParseField LATEST = new ParseField("latest");
static final ParseField TOP_HITS = new ParseField("top_hits");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", false,
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
(List<Map<String, Object>>) a[6]));
(String) a[6], (String) a[7], (List<Map<String, Object>>) a[8]));
static {
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
@ -41,6 +45,8 @@ public class FieldStats implements ToXContentObject, Writeable {
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), EARLIEST);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), LATEST);
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
}
@ -50,20 +56,33 @@ public class FieldStats implements ToXContentObject, Writeable {
private final Double maxValue;
private final Double meanValue;
private final Double medianValue;
private final String earliestTimestamp;
private final String latestTimestamp;
private final List<Map<String, Object>> topHits;
public FieldStats(long count, int cardinality, List<Map<String, Object>> topHits) {
this(count, cardinality, null, null, null, null, topHits);
this(count, cardinality, null, null, null, null, null, null, topHits);
}
public FieldStats(long count, int cardinality, String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this(count, cardinality, null, null, null, null, earliestTimestamp, latestTimestamp, topHits);
}
public FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
List<Map<String, Object>> topHits) {
this(count, cardinality, minValue, maxValue, meanValue, medianValue, null, null, topHits);
}
FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this.count = count;
this.cardinality = cardinality;
this.minValue = minValue;
this.maxValue = maxValue;
this.meanValue = meanValue;
this.medianValue = medianValue;
this.earliestTimestamp = earliestTimestamp;
this.latestTimestamp = latestTimestamp;
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
}
@ -74,6 +93,13 @@ public class FieldStats implements ToXContentObject, Writeable {
maxValue = in.readOptionalDouble();
meanValue = in.readOptionalDouble();
medianValue = in.readOptionalDouble();
if (in.getVersion().onOrAfter(Version.V_7_3_0)) {
earliestTimestamp = in.readOptionalString();
latestTimestamp = in.readOptionalString();
} else {
earliestTimestamp = null;
latestTimestamp = null;
}
topHits = in.readList(StreamInput::readMap);
}
@ -85,6 +111,10 @@ public class FieldStats implements ToXContentObject, Writeable {
out.writeOptionalDouble(maxValue);
out.writeOptionalDouble(meanValue);
out.writeOptionalDouble(medianValue);
if (out.getVersion().onOrAfter(Version.V_7_3_0)) {
out.writeOptionalString(earliestTimestamp);
out.writeOptionalString(latestTimestamp);
}
out.writeCollection(topHits, StreamOutput::writeMap);
}
@ -112,6 +142,14 @@ public class FieldStats implements ToXContentObject, Writeable {
return medianValue;
}
public String getEarliestTimestamp() {
return earliestTimestamp;
}
public String getLatestTimestamp() {
return latestTimestamp;
}
public List<Map<String, Object>> getTopHits() {
return topHits;
}
@ -134,6 +172,12 @@ public class FieldStats implements ToXContentObject, Writeable {
if (medianValue != null) {
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
}
if (earliestTimestamp != null) {
builder.field(EARLIEST.getPreferredName(), earliestTimestamp);
}
if (latestTimestamp != null) {
builder.field(LATEST.getPreferredName(), latestTimestamp);
}
if (topHits.isEmpty() == false) {
builder.field(TOP_HITS.getPreferredName(), topHits);
}
@ -154,7 +198,7 @@ public class FieldStats implements ToXContentObject, Writeable {
@Override
public int hashCode() {
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}
@Override
@ -175,6 +219,13 @@ public class FieldStats implements ToXContentObject, Writeable {
Objects.equals(this.maxValue, that.maxValue) &&
Objects.equals(this.meanValue, that.meanValue) &&
Objects.equals(this.medianValue, that.medianValue) &&
Objects.equals(this.earliestTimestamp, that.earliestTimestamp) &&
Objects.equals(this.latestTimestamp, that.latestTimestamp) &&
Objects.equals(this.topHits, that.topHits);
}
@Override
public String toString() {
return Strings.toString(this);
}
}

View File

@ -30,6 +30,8 @@ public class FieldStatsTests extends AbstractSerializingTestCase<FieldStats> {
Double maxValue = null;
Double meanValue = null;
Double medianValue = null;
String earliestTimestamp = null;
String latestTimestamp = null;
boolean isMetric = randomBoolean();
if (isMetric) {
if (randomBoolean()) {
@ -41,6 +43,12 @@ public class FieldStatsTests extends AbstractSerializingTestCase<FieldStats> {
}
meanValue = randomDouble();
medianValue = randomDouble();
} else {
boolean isDate = randomBoolean();
if (isDate) {
earliestTimestamp = randomAlphaOfLength(20);
latestTimestamp = randomAlphaOfLength(20);
}
}
List<Map<String, Object>> topHits = new ArrayList<>();
@ -55,7 +63,7 @@ public class FieldStatsTests extends AbstractSerializingTestCase<FieldStats> {
topHits.add(topHit);
}
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}
@Override

View File

@ -159,8 +159,7 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
SortedMap<String, Object> mappings = mappingsAndFieldStats.v1();
if (timeField != null) {
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD,
Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date"));
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
}
if (mappingsAndFieldStats.v2() != null) {

View File

@ -5,10 +5,15 @@
*/
package org.elasticsearch.xpack.ml.filestructurefinder;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.DateFormatters;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
@ -26,8 +31,52 @@ import java.util.stream.Collectors;
public class FieldStatsCalculator {
private long count;
private SortedMap<String, Integer> countsByStringValue = new TreeMap<>();
private SortedMap<Double, Integer> countsByNumericValue = new TreeMap<>();
private SortedMap<String, Integer> countsByStringValue;
private SortedMap<Double, Integer> countsByNumericValue;
private DateFormatter dateFormatter;
/**
* Parsed earliest and latest times. Some date formats may cause these to be
* wrong due to lack of information. For example, if the date format does not
* contain a year then these will be in 1970, and if there's no timezone in
* the format then these will be on the assumption the time was in UTC. However,
* since all the timestamps will be inaccurate in the same way the determination
* of the earliest and latest will still be correct. The trick then is to never
* print them out...
*/
private Instant earliestTimestamp;
private Instant latestTimestamp;
/**
* Earliest and latest times in the exact form they were present in the input,
* making the output immune to issues like not knowing the correct timezone
* or year when parsing.
*/
private String earliestTimeString;
private String latestTimeString;
public FieldStatsCalculator(Map<String, String> mapping) {
switch (mapping.get(FileStructureUtils.MAPPING_TYPE_SETTING)) {
case "byte":
case "short":
case "integer":
case "long":
case "half_float":
case "float":
case "double":
countsByNumericValue = new TreeMap<>();
break;
case "date":
case "date_nanos":
String format = mapping.get(FileStructureUtils.MAPPING_FORMAT_SETTING);
dateFormatter = (format == null) ? DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER : DateFormatter.forPattern(format);
// Dates are treated like strings for top hits
countsByStringValue = new TreeMap<>();
break;
default:
countsByStringValue = new TreeMap<>();
break;
}
}
/**
* Add a collection of values to the calculator.
@ -41,14 +90,27 @@ public class FieldStatsCalculator {
for (String fieldValue : fieldValues) {
countsByStringValue.compute(fieldValue, (k, v) -> (v == null) ? 1 : (1 + v));
if (countsByNumericValue != null) {
try {
countsByNumericValue.compute(Double.valueOf(fieldValue), (k, v) -> (v == null) ? 1 : (1 + v));
} catch (NumberFormatException e) {
countsByNumericValue = null;
// This should not happen in the usual context this class is used in within the file structure finder,
// as "double" should be big enough to hold any value that the file structure finder considers numeric
throw new IllegalArgumentException("Field with numeric mapping [" + fieldValue + "] could not be parsed as type double",
e);
}
} else {
countsByStringValue.compute(fieldValue, (k, v) -> (v == null) ? 1 : (1 + v));
if (dateFormatter != null) {
Instant parsedTimestamp = DateFormatters.from(dateFormatter.parse(fieldValue)).toInstant();
if (earliestTimestamp == null || earliestTimestamp.isAfter(parsedTimestamp)) {
earliestTimestamp = parsedTimestamp;
earliestTimeString = fieldValue;
}
if (latestTimestamp == null || latestTimestamp.isBefore(parsedTimestamp)) {
latestTimestamp = parsedTimestamp;
latestTimeString = fieldValue;
}
}
}
}
@ -61,11 +123,17 @@ public class FieldStatsCalculator {
*/
public FieldStats calculate(int numTopHits) {
if (countsByNumericValue != null && countsByNumericValue.isEmpty() == false) {
return new FieldStats(count, countsByNumericValue.size(), countsByNumericValue.firstKey(), countsByNumericValue.lastKey(),
calculateMean(), calculateMedian(), findNumericTopHits(numTopHits));
if (countsByNumericValue != null) {
if (countsByNumericValue.isEmpty()) {
assert count == 0;
return new FieldStats(count, 0, Collections.emptyList());
} else {
assert count > 0;
return new FieldStats(count, countsByNumericValue.size(), countsByNumericValue.firstKey(), countsByNumericValue.lastKey(),
calculateMean(), calculateMedian(), findNumericTopHits(numTopHits));
}
} else {
return new FieldStats(count, countsByStringValue.size(), findStringTopHits(numTopHits));
return new FieldStats(count, countsByStringValue.size(), earliestTimeString, latestTimeString, findStringTopHits(numTopHits));
}
}

View File

@ -29,6 +29,8 @@ public final class FileStructureUtils {
public static final String MAPPING_TYPE_SETTING = "type";
public static final String MAPPING_FORMAT_SETTING = "format";
public static final String MAPPING_PROPERTIES_SETTING = "properties";
public static final Map<String, String> DATE_MAPPING_WITHOUT_FORMAT =
Collections.singletonMap(MAPPING_TYPE_SETTING, "date");
private static final int NUM_TOP_HITS = 10;
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
@ -231,7 +233,7 @@ public final class FileStructureUtils {
Collection<String> fieldValuesAsStrings = fieldValues.stream().map(Object::toString).collect(Collectors.toList());
Map<String, String> mapping = guessScalarMapping(explanation, fieldName, fieldValuesAsStrings, timeoutChecker);
timeoutChecker.check("mapping determination");
return new Tuple<>(mapping, calculateFieldStats(fieldValuesAsStrings, timeoutChecker));
return new Tuple<>(mapping, calculateFieldStats(mapping, fieldValuesAsStrings, timeoutChecker));
}
private static Stream<Object> flatten(Object value) {
@ -323,13 +325,14 @@ public final class FileStructureUtils {
/**
* Calculate stats for a set of field values.
* @param mapping The mapping for the field.
* @param fieldValues Values of the field for which field stats are to be calculated.
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
* @return The stats calculated from the field values.
*/
static FieldStats calculateFieldStats(Collection<String> fieldValues, TimeoutChecker timeoutChecker) {
static FieldStats calculateFieldStats(Map<String, String> mapping, Collection<String> fieldValues, TimeoutChecker timeoutChecker) {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(mapping);
calculator.accept(fieldValues);
timeoutChecker.check("field stats calculation");
return calculator.calculate(NUM_TOP_HITS);

View File

@ -195,14 +195,16 @@ public final class GrokPatternCreator {
/**
* Build a Grok pattern that will match all of the sample messages in their entirety.
* @param seedPatternName A pattern that has already been determined to match some portion of every sample message.
* @param seedFieldName The field name to be used for the portion of every sample message that the seed pattern matches.
* @param seedMapping The mapping for the seed field.
* @param seedFieldName The field name to be used for the portion of every sample message that the seed pattern matches.
* @return The built Grok pattern.
*/
public String createGrokPatternFromExamples(String seedPatternName, String seedFieldName) {
public String createGrokPatternFromExamples(String seedPatternName, Map<String, String> seedMapping, String seedFieldName) {
overallGrokPatternBuilder.setLength(0);
GrokPatternCandidate seedCandidate = new NoMappingGrokPatternCandidate(seedPatternName, seedFieldName, grokPatternDefinitions);
GrokPatternCandidate seedCandidate = new PrecalculatedMappingGrokPatternCandidate(seedPatternName, seedMapping, seedFieldName,
grokPatternDefinitions);
processCandidateAndSplit(seedCandidate, true, sampleMessages, false, 0, false, 0);
@ -433,7 +435,7 @@ public final class GrokPatternCreator {
static class ValueOnlyGrokPatternCandidate implements GrokPatternCandidate {
private final String grokPatternName;
private final String mappingType;
private final Map<String, String> mapping;
private final String fieldName;
private final Grok grok;
@ -451,7 +453,8 @@ public final class GrokPatternCreator {
* @param fieldName Name of the field to extract from the match.
*/
ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName) {
this(grokPatternName, mappingType, fieldName, "\\b", "\\b", Grok.getBuiltinPatterns());
this(grokPatternName, Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType), fieldName,
"\\b", "\\b", Grok.getBuiltinPatterns());
}
/**
@ -462,7 +465,8 @@ public final class GrokPatternCreator {
*/
ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName,
Map<String, String> grokPatternDefinitions) {
this(grokPatternName, mappingType, fieldName, "\\b", "\\b", grokPatternDefinitions);
this(grokPatternName, Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType), fieldName,
"\\b", "\\b", grokPatternDefinitions);
}
/**
@ -473,25 +477,28 @@ public final class GrokPatternCreator {
* @param postBreak Only consider the match if it's broken from the following text by this.
*/
ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName, String preBreak, String postBreak) {
this(grokPatternName, mappingType, fieldName, preBreak, postBreak, Grok.getBuiltinPatterns());
this(grokPatternName, Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType), fieldName,
preBreak, postBreak, Grok.getBuiltinPatterns());
}
/**
* @param grokPatternName Name of the Grok pattern to try to match - must match one defined in Logstash.
* @param mappingType Data type for field in Elasticsearch mappings.
* @param mapping Elasticsearch mapping for the field.
* @param fieldName Name of the field to extract from the match.
* @param preBreak Only consider the match if it's broken from the previous text by this.
* @param postBreak Only consider the match if it's broken from the following text by this.
* @param grokPatternDefinitions Definitions of Grok patterns to be used.
*/
ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName, String preBreak, String postBreak,
Map<String, String> grokPatternDefinitions) {
this.grokPatternName = grokPatternName;
this.mappingType = mappingType;
this.fieldName = fieldName;
ValueOnlyGrokPatternCandidate(String grokPatternName, Map<String, String> mapping, String fieldName, String preBreak,
String postBreak, Map<String, String> grokPatternDefinitions) {
this.grokPatternName = Objects.requireNonNull(grokPatternName);
this.mapping = Collections.unmodifiableMap(mapping);
this.fieldName = Objects.requireNonNull(fieldName);
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
grok = new Grok(grokPatternDefinitions, "(?m)%{DATA:" + PREFACE + "}" + preBreak +
"%{" + grokPatternName + ":" + VALUE + "}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
grok = new Grok(grokPatternDefinitions,
"(?m)%{DATA:" + PREFACE + "}" + Objects.requireNonNull(preBreak) +
"%{" + grokPatternName + ":" + VALUE + "}" + Objects.requireNonNull(postBreak) + "%{GREEDYDATA:" + EPILOGUE + "}",
TimeoutChecker.watchdog);
}
@Override
@ -520,23 +527,24 @@ public final class GrokPatternCreator {
epilogues.add(captures.getOrDefault(EPILOGUE, "").toString());
}
String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName);
if (mappings != null) {
Map<String, String> fullMappingType = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType);
if ("date".equals(mappingType)) {
try {
fullMappingType = FileStructureUtils.findTimestampMapping(explanation, values, timeoutChecker);
} catch (IllegalArgumentException e) {
// This feels like it shouldn't happen, but there may be some obscure edge case
// where it does, and in production it will cause less frustration to just return
// a mapping type of "date" with no format than to fail the whole analysis
assert e == null : e.getMessage();
}
timeoutChecker.check("mapping determination");
Map<String, String> adjustedMapping = mapping;
// If the mapping is type "date" with no format, try to adjust it to include the format
if (FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT.equals(adjustedMapping)) {
try {
adjustedMapping = FileStructureUtils.findTimestampMapping(explanation, values, timeoutChecker);
} catch (IllegalArgumentException e) {
// This feels like it shouldn't happen, but there may be some obscure edge case
// where it does, and in production it will cause less frustration to just return
// a mapping type of "date" with no format than to fail the whole analysis
assert e == null : e.getMessage();
}
mappings.put(adjustedFieldName, fullMappingType);
timeoutChecker.check("mapping determination");
}
if (mappings != null) {
mappings.put(adjustedFieldName, adjustedMapping);
}
if (fieldStats != null) {
fieldStats.put(adjustedFieldName, FileStructureUtils.calculateFieldStats(values, timeoutChecker));
fieldStats.put(adjustedFieldName, FileStructureUtils.calculateFieldStats(adjustedMapping, values, timeoutChecker));
}
return "%{" + grokPatternName + ":" + adjustedFieldName + "}";
}
@ -598,13 +606,13 @@ public final class GrokPatternCreator {
timeoutChecker.check("full message Grok pattern field extraction");
}
String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName);
Map<String, String> mapping = FileStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values, timeoutChecker);
timeoutChecker.check("mapping determination");
if (mappings != null) {
mappings.put(adjustedFieldName,
FileStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values, timeoutChecker));
timeoutChecker.check("mapping determination");
mappings.put(adjustedFieldName, mapping);
}
if (fieldStats != null) {
fieldStats.put(adjustedFieldName, FileStructureUtils.calculateFieldStats(values, timeoutChecker));
fieldStats.put(adjustedFieldName, FileStructureUtils.calculateFieldStats(mapping, values, timeoutChecker));
}
return "\\b" + fieldName + "=%{USER:" + adjustedFieldName + "}";
}
@ -613,10 +621,11 @@ public final class GrokPatternCreator {
/**
* A Grok pattern candidate that matches a single named Grok pattern but will not update mappings.
*/
static class NoMappingGrokPatternCandidate extends ValueOnlyGrokPatternCandidate {
static class PrecalculatedMappingGrokPatternCandidate extends ValueOnlyGrokPatternCandidate {
NoMappingGrokPatternCandidate(String grokPatternName, String fieldName, Map<String, String> grokPatternDefinitions) {
super(grokPatternName, null, fieldName, grokPatternDefinitions);
PrecalculatedMappingGrokPatternCandidate(String grokPatternName, Map<String, String> mapping, String fieldName,
Map<String, String> grokPatternDefinitions) {
super(grokPatternName, mapping, fieldName, "\\b", "\\b", grokPatternDefinitions);
}
@Override
@ -710,16 +719,16 @@ public final class GrokPatternCreator {
for (Map.Entry<String, Collection<String>> valuesForField : valuesPerField.entrySet()) {
String fieldName = valuesForField.getKey();
if (mappings != null) {
// Exclude the time field because that will be dropped and replaced with @timestamp
if (fieldName.equals(timeField) == false) {
mappings.put(fieldName,
FileStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue(), timeoutChecker));
timeoutChecker.check("mapping determination");
}
Map<String, String> mapping =
FileStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue(), timeoutChecker);
timeoutChecker.check("mapping determination");
// Exclude the time field because that will be dropped and replaced with @timestamp
if (mappings != null && fieldName.equals(timeField) == false) {
mappings.put(fieldName, mapping);
}
if (fieldStats != null) {
fieldStats.put(fieldName, FileStructureUtils.calculateFieldStats(valuesForField.getValue(), timeoutChecker));
fieldStats.put(fieldName,
FileStructureUtils.calculateFieldStats(mapping, valuesForField.getValue(), timeoutChecker));
}
}
}

View File

@ -70,8 +70,7 @@ public class NdJsonFileStructureFinder implements FileStructureFinder {
SortedMap<String, Object> mappings = mappingsAndFieldStats.v1();
if (timeField != null) {
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD,
Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date"));
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
}
if (mappingsAndFieldStats.v2() != null) {

View File

@ -108,12 +108,13 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
.setNumMessagesAnalyzed(sampleMessages.size())
.setMultilineStartPattern(multiLineRegex);
Map<String, String> messageMapping = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "text");
SortedMap<String, Object> mappings = new TreeMap<>();
mappings.put("message", Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "text"));
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date"));
mappings.put("message", messageMapping);
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
SortedMap<String, FieldStats> fieldStats = new TreeMap<>();
fieldStats.put("message", FileStructureUtils.calculateFieldStats(sampleMessages, timeoutChecker));
fieldStats.put("message", FileStructureUtils.calculateFieldStats(messageMapping, sampleMessages, timeoutChecker));
Map<String, String> customGrokPatternDefinitions = timestampFormatFinder.getCustomGrokPatternDefinitions();
GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, fieldStats,
@ -136,8 +137,8 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
if (interimTimestampField == null) {
interimTimestampField = "timestamp";
}
grokPattern =
grokPatternCreator.createGrokPatternFromExamples(timestampFormatFinder.getGrokPatternName(), interimTimestampField);
grokPattern = grokPatternCreator.createGrokPatternFromExamples(timestampFormatFinder.getGrokPatternName(),
timestampFormatFinder.getEsDateMappingTypeWithFormat(), interimTimestampField);
}
}

View File

@ -120,8 +120,7 @@ public class XmlFileStructureFinder implements FileStructureFinder {
SortedMap<String, Object> outerMappings = new TreeMap<>();
outerMappings.put(topLevelTag, secondLevelProperties);
if (timeField != null) {
outerMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD,
Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date"));
outerMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
}
FileStructure structure = structureBuilder

View File

@ -10,14 +10,19 @@ import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
import java.util.Arrays;
import java.util.Collections;
import java.util.DoubleSummaryStatistics;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class FieldStatsCalculatorTests extends FileStructureTestCase {
private static final Map<String, String> LONG = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long");
private static final Map<String, String> DOUBLE = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "double");
private static final Map<String, String> KEYWORD = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword");
public void testMean() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(DOUBLE);
calculator.accept(Arrays.asList("1", "3.5", "2.5", "9"));
@ -26,7 +31,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testMedianGivenOddCount() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(LONG);
calculator.accept(Arrays.asList("3", "23", "-1", "5", "1000"));
@ -35,7 +40,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testMedianGivenOddCountMinimal() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(LONG);
calculator.accept(Collections.singletonList("3"));
@ -44,7 +49,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testMedianGivenEvenCountMiddleValuesDifferent() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(LONG);
calculator.accept(Arrays.asList("3", "23", "-1", "5", "1000", "6"));
@ -53,7 +58,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testMedianGivenEvenCountMiddleValuesSame() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(LONG);
calculator.accept(Arrays.asList("3", "23", "-1", "5", "1000", "5"));
@ -62,7 +67,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testMedianGivenEvenCountMinimal() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(LONG);
calculator.accept(Arrays.asList("4", "4"));
@ -71,7 +76,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testTopHitsNumeric() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(DOUBLE);
calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5.2", "6", "5.2", "16", "4", "5.2"));
@ -88,7 +93,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testTopHitsString() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(KEYWORD);
calculator.accept(Arrays.asList("s", "s", "d", "s", "f", "x", "f", "x", "n", "s", "x"));
@ -105,7 +110,8 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testCalculateGivenEmpty() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator =
new FieldStatsCalculator(randomFrom(Arrays.asList(LONG, DOUBLE, KEYWORD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT)));
calculator.accept(Collections.emptyList());
@ -117,12 +123,15 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
assertNull(stats.getMaxValue());
assertNull(stats.getMeanValue());
assertNull(stats.getMedianValue());
assertNull(stats.getEarliestTimestamp());
assertNull(stats.getLatestTimestamp());
assertEquals(0, stats.getTopHits().size());
}
public void testCalculateGivenNumericField() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(DOUBLE);
calculator.accept(Arrays.asList("4.5", "4.5", "7", "4.5", "6", "5", "6", "5", "25", "4.5", "5"));
@ -134,6 +143,8 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
assertEquals(25.0, stats.getMaxValue(), 1e-10);
assertEquals(7.0, stats.getMeanValue(), 1e-10);
assertEquals(5.0, stats.getMedianValue(), 1e-10);
assertNull(stats.getEarliestTimestamp());
assertNull(stats.getLatestTimestamp());
List<Map<String, Object>> topHits = stats.getTopHits();
@ -148,7 +159,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testCalculateGivenStringField() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(KEYWORD);
calculator.accept(Arrays.asList("s", "s", "d", "s", "f", "x", "f", "x", "n", "s", "x"));
@ -160,6 +171,8 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
assertNull(stats.getMaxValue());
assertNull(stats.getMeanValue());
assertNull(stats.getMedianValue());
assertNull(stats.getEarliestTimestamp());
assertNull(stats.getLatestTimestamp());
List<Map<String, Object>> topHits = stats.getTopHits();
@ -174,7 +187,7 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
public void testCalculateGivenMixedField() {
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(KEYWORD);
calculator.accept(Arrays.asList("4", "4", "d", "4", "f", "x", "f", "x", "16", "4", "x"));
@ -186,6 +199,8 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
assertNull(stats.getMaxValue());
assertNull(stats.getMeanValue());
assertNull(stats.getMedianValue());
assertNull(stats.getEarliestTimestamp());
assertNull(stats.getLatestTimestamp());
List<Map<String, Object>> topHits = stats.getTopHits();
@ -198,10 +213,71 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
assertEquals(2, topHits.get(2).get("count"));
}
public void testGivenDateFieldWithoutFormat() {
FieldStatsCalculator calculator = new FieldStatsCalculator(FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
calculator.accept(Arrays.asList("2018-10-08T10:49:16.642", "2018-10-08T10:49:16.642", "2018-10-08T10:49:16.642",
"2018-09-08T11:12:13.789", "2019-01-28T01:02:03.456", "2018-09-08T11:12:13.789"));
FieldStats stats = calculator.calculate(3);
assertEquals(6L, stats.getCount());
assertEquals(3, stats.getCardinality());
assertNull(stats.getMinValue());
assertNull(stats.getMaxValue());
assertNull(stats.getMeanValue());
assertNull(stats.getMedianValue());
assertEquals("2018-09-08T11:12:13.789", stats.getEarliestTimestamp());
assertEquals("2019-01-28T01:02:03.456", stats.getLatestTimestamp());
List<Map<String, Object>> topHits = stats.getTopHits();
assertEquals(3, topHits.size());
assertEquals("2018-10-08T10:49:16.642", topHits.get(0).get("value"));
assertEquals(3, topHits.get(0).get("count"));
assertEquals("2018-09-08T11:12:13.789", topHits.get(1).get("value"));
assertEquals(2, topHits.get(1).get("count"));
assertEquals("2019-01-28T01:02:03.456", topHits.get(2).get("value"));
assertEquals(1, topHits.get(2).get("count"));
}
public void testGivenDateFieldWithFormat() {
Map<String, String> dateMapping = new HashMap<>();
dateMapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date");
dateMapping.put(FileStructureUtils.MAPPING_FORMAT_SETTING, "M/dd/yyyy h:mma");
FieldStatsCalculator calculator = new FieldStatsCalculator(dateMapping);
calculator.accept(Arrays.asList("10/08/2018 10:49AM", "10/08/2018 10:49AM", "10/08/2018 10:49AM",
"9/08/2018 11:12AM", "1/28/2019 1:02AM", "9/08/2018 11:12AM"));
FieldStats stats = calculator.calculate(3);
assertEquals(6L, stats.getCount());
assertEquals(3, stats.getCardinality());
assertNull(stats.getMinValue());
assertNull(stats.getMaxValue());
assertNull(stats.getMeanValue());
assertNull(stats.getMedianValue());
assertEquals("9/08/2018 11:12AM", stats.getEarliestTimestamp());
assertEquals("1/28/2019 1:02AM", stats.getLatestTimestamp());
List<Map<String, Object>> topHits = stats.getTopHits();
assertEquals(3, topHits.size());
assertEquals("10/08/2018 10:49AM", topHits.get(0).get("value"));
assertEquals(3, topHits.get(0).get("count"));
assertEquals("9/08/2018 11:12AM", topHits.get(1).get("value"));
assertEquals(2, topHits.get(1).get("count"));
assertEquals("1/28/2019 1:02AM", topHits.get(2).get("value"));
assertEquals(1, topHits.get(2).get("count"));
}
public void testJavaStatsEquivalence() {
DoubleSummaryStatistics summaryStatistics = new DoubleSummaryStatistics();
FieldStatsCalculator calculator = new FieldStatsCalculator();
FieldStatsCalculator calculator = new FieldStatsCalculator(DOUBLE);
for (int numValues = randomIntBetween(1000, 10000); numValues > 0; --numValues) {

View File

@ -323,8 +323,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
sample2.put("nothing", null);
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, Arrays.asList(sample1, sample2),
NOOP_TIMEOUT_CHECKER);
FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, Arrays.asList(sample1, sample2), NOOP_TIMEOUT_CHECKER);
assertNotNull(mappingsAndFieldStats);
Map<String, Object> mappings = mappingsAndFieldStats.v1();
@ -341,7 +340,8 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
assertNotNull(fieldStats);
assertEquals(3, fieldStats.size());
assertEquals(new FieldStats(2, 2, makeTopHits("not a time", 1, "whatever", 1)), fieldStats.get("foo"));
assertEquals(new FieldStats(2, 2, makeTopHits("2018-05-24 17:28:31,735", 1, "2018-05-29 11:53:02,837", 1)), fieldStats.get("time"));
assertEquals(new FieldStats(2, 2, "2018-05-24 17:28:31,735", "2018-05-29 11:53:02,837",
makeTopHits("2018-05-24 17:28:31,735", 1, "2018-05-29 11:53:02,837", 1)), fieldStats.get("time"));
assertEquals(new FieldStats(2, 2, 17.0, 42.0, 29.5, 29.5, makeTopHits(17, 1, 42, 1)), fieldStats.get("bar"));
assertNull(fieldStats.get("nothing"));
}

View File

@ -203,7 +203,8 @@ public class GrokPatternCreatorTests extends FileStructureTestCase {
assertEquals("%{SYSLOGTIMESTAMP:timestamp} .*? .*?\\[%{INT:field}\\]: %{LOGLEVEL:loglevel} \\(.*? .*? .*?\\) .*? " +
"%{QUOTEDSTRING:field2}: %{IP:ipaddress}#%{INT:field3}",
grokPatternCreator.createGrokPatternFromExamples("SYSLOGTIMESTAMP", "timestamp"));
grokPatternCreator.createGrokPatternFromExamples("SYSLOGTIMESTAMP", FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT,
"timestamp"));
assertEquals(5, mappings.size());
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field"));
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("loglevel"));
@ -229,7 +230,8 @@ public class GrokPatternCreatorTests extends FileStructureTestCase {
NOOP_TIMEOUT_CHECKER);
assertEquals("%{CATALINA_DATESTAMP:timestamp} .*? .*?\\n%{LOGLEVEL:loglevel}: .*",
grokPatternCreator.createGrokPatternFromExamples("CATALINA_DATESTAMP", "timestamp"));
grokPatternCreator.createGrokPatternFromExamples("CATALINA_DATESTAMP", FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT,
"timestamp"));
assertEquals(1, mappings.size());
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("loglevel"));
}
@ -253,7 +255,8 @@ public class GrokPatternCreatorTests extends FileStructureTestCase {
assertEquals("%{INT:field}\\t%{TIMESTAMP_ISO8601:timestamp}\\t%{TIMESTAMP_ISO8601:extra_timestamp}\\t%{INT:field2}\\t.*?\\t" +
"%{IP:ipaddress}\\t.*?\\t%{LOGLEVEL:loglevel}\\t.*",
grokPatternCreator.createGrokPatternFromExamples("TIMESTAMP_ISO8601", "timestamp"));
grokPatternCreator.createGrokPatternFromExamples("TIMESTAMP_ISO8601", FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT,
"timestamp"));
assertEquals(5, mappings.size());
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field"));
Map<String, String> expectedDateMapping = new HashMap<>();
@ -284,7 +287,8 @@ public class GrokPatternCreatorTests extends FileStructureTestCase {
assertEquals("%{INT:field}\\t%{TIMESTAMP_ISO8601:timestamp}\\t%{DATESTAMP:extra_timestamp}\\t%{INT:field2}\\t.*?\\t" +
"%{IP:ipaddress}\\t.*?\\t%{LOGLEVEL:loglevel}\\t.*",
grokPatternCreator.createGrokPatternFromExamples("TIMESTAMP_ISO8601", "timestamp"));
grokPatternCreator.createGrokPatternFromExamples("TIMESTAMP_ISO8601", FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT,
"timestamp"));
assertEquals(5, mappings.size());
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field"));
Map<String, String> expectedDateMapping = new HashMap<>();
@ -314,9 +318,12 @@ public class GrokPatternCreatorTests extends FileStructureTestCase {
Collections.singletonMap("CUSTOM_TIMESTAMP", "%{MONTHNUM}/%{MONTHDAY}/%{YEAR} %{HOUR}:%{MINUTE}(?:AM|PM)"),
NOOP_TIMEOUT_CHECKER);
Map<String, String> customMapping = new HashMap<>();
customMapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date");
customMapping.put(FileStructureUtils.MAPPING_FORMAT_SETTING, "M/dd/yyyy h:mma");
assertEquals("%{INT:field}\\t%{CUSTOM_TIMESTAMP:timestamp}\\t%{TIMESTAMP_ISO8601:extra_timestamp}\\t%{INT:field2}\\t.*?\\t" +
"%{IP:ipaddress}\\t.*?\\t%{LOGLEVEL:loglevel}\\t.*",
grokPatternCreator.createGrokPatternFromExamples("CUSTOM_TIMESTAMP", "timestamp"));
grokPatternCreator.createGrokPatternFromExamples("CUSTOM_TIMESTAMP", customMapping, "timestamp"));
assertEquals(5, mappings.size());
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field"));
Map<String, String> expectedDateMapping = new HashMap<>();
@ -347,7 +354,8 @@ public class GrokPatternCreatorTests extends FileStructureTestCase {
assertEquals("%{INT:field}\\t%{TIMESTAMP_ISO8601:timestamp}\\t%{TIME:time}\\t%{INT:field2}\\t.*?\\t" +
"%{IP:ipaddress}\\t.*?\\t%{LOGLEVEL:loglevel}\\t.*",
grokPatternCreator.createGrokPatternFromExamples("TIMESTAMP_ISO8601", "timestamp"));
grokPatternCreator.createGrokPatternFromExamples("TIMESTAMP_ISO8601", FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT,
"timestamp"));
assertEquals(5, mappings.size());
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field"));
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("time"));

View File

@ -56,7 +56,8 @@ setup:
- match: { field_stats.sourcetype.cardinality: 1 }
- match: { field_stats.time.count: 3 }
- match: { field_stats.time.cardinality: 3 }
- match: { field_stats.time.cardinality: 3 }
- match: { field_stats.time.earliest: "1403481600" }
- match: { field_stats.time.latest: "1403481800" }
- is_false: explanation
---
@ -116,5 +117,6 @@ setup:
- match: { field_stats.sourcetype.cardinality: 1 }
- match: { field_stats.time.count: 3 }
- match: { field_stats.time.cardinality: 3 }
- match: { field_stats.time.cardinality: 3 }
- match: { field_stats.time.earliest: "1403481600" }
- match: { field_stats.time.latest: "1403481800" }
- match: { explanation.0: "Using specified character encoding [UTF-8]" }