mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 02:14:54 +00:00
[ML] Adjust structure finder for Joda to Java time migration (#37306)
The ML file structure finder has always reported both Joda and Java time format strings. This change makes the Java time format strings the ones that are incorporated into mappings and ingest pipeline definitions. The BWC syntax of prepending "8" to these formats is used. This will need to be removed once Java time format strings become the default in Elasticsearch. This commit also removes direct imports of Joda classes in the structure finder unit tests. Instead the core Joda BWC class is used.
This commit is contained in:
parent
afc60bb0e5
commit
f2c0c26d15
@ -164,37 +164,40 @@ format corresponds to the primary timestamp, but you do not want to specify the
|
||||
full `grok_pattern`.
|
||||
|
||||
If this parameter is not specified, the structure finder chooses the best format from
|
||||
the formats it knows, which are these Joda formats and their Java time equivalents:
|
||||
the formats it knows, which are these Java time formats and their Joda equivalents:
|
||||
|
||||
* `dd/MMM/YYYY:HH:mm:ss Z`
|
||||
* `EEE MMM dd HH:mm zzz YYYY`
|
||||
* `EEE MMM dd HH:mm:ss YYYY`
|
||||
* `EEE MMM dd HH:mm:ss zzz YYYY`
|
||||
* `EEE MMM dd YYYY HH:mm zzz`
|
||||
* `EEE MMM dd YYYY HH:mm:ss zzz`
|
||||
* `EEE, dd MMM YYYY HH:mm Z`
|
||||
* `EEE, dd MMM YYYY HH:mm ZZ`
|
||||
* `EEE, dd MMM YYYY HH:mm:ss Z`
|
||||
* `EEE, dd MMM YYYY HH:mm:ss ZZ`
|
||||
* `dd/MMM/yyyy:HH:mm:ss XX`
|
||||
* `EEE MMM dd HH:mm zzz yyyy`
|
||||
* `EEE MMM dd HH:mm:ss yyyy`
|
||||
* `EEE MMM dd HH:mm:ss zzz yyyy`
|
||||
* `EEE MMM dd yyyy HH:mm zzz`
|
||||
* `EEE MMM dd yyyy HH:mm:ss zzz`
|
||||
* `EEE, dd MMM yyyy HH:mm XX`
|
||||
* `EEE, dd MMM yyyy HH:mm XXX`
|
||||
* `EEE, dd MMM yyyy HH:mm:ss XX`
|
||||
* `EEE, dd MMM yyyy HH:mm:ss XXX`
|
||||
* `ISO8601`
|
||||
* `MMM d HH:mm:ss`
|
||||
* `MMM d HH:mm:ss,SSS`
|
||||
* `MMM d YYYY HH:mm:ss`
|
||||
* `MMM d yyyy HH:mm:ss`
|
||||
* `MMM dd HH:mm:ss`
|
||||
* `MMM dd HH:mm:ss,SSS`
|
||||
* `MMM dd YYYY HH:mm:ss`
|
||||
* `MMM dd, YYYY h:mm:ss a`
|
||||
* `MMM dd yyyy HH:mm:ss`
|
||||
* `MMM dd, yyyy h:mm:ss a`
|
||||
* `TAI64N`
|
||||
* `UNIX`
|
||||
* `UNIX_MS`
|
||||
* `YYYY-MM-dd HH:mm:ss`
|
||||
* `YYYY-MM-dd HH:mm:ss,SSS`
|
||||
* `YYYY-MM-dd HH:mm:ss,SSS Z`
|
||||
* `YYYY-MM-dd HH:mm:ss,SSSZ`
|
||||
* `YYYY-MM-dd HH:mm:ss,SSSZZ`
|
||||
* `YYYY-MM-dd HH:mm:ssZ`
|
||||
* `YYYY-MM-dd HH:mm:ssZZ`
|
||||
* `YYYYMMddHHmmss`
|
||||
* `yyyy-MM-dd HH:mm:ss`
|
||||
* `yyyy-MM-dd HH:mm:ss,SSS`
|
||||
* `yyyy-MM-dd HH:mm:ss,SSS XX`
|
||||
* `yyyy-MM-dd HH:mm:ss,SSSXX`
|
||||
* `yyyy-MM-dd HH:mm:ss,SSSXXX`
|
||||
* `yyyy-MM-dd HH:mm:ssXX`
|
||||
* `yyyy-MM-dd HH:mm:ssXXX`
|
||||
* `yyyy-MM-dd'T'HH:mm:ss,SSS`
|
||||
* `yyyy-MM-dd'T'HH:mm:ss,SSSXX`
|
||||
* `yyyy-MM-dd'T'HH:mm:ss,SSSXXX`
|
||||
* `yyyyMMddHHmmss`
|
||||
|
||||
--
|
||||
|
||||
@ -603,11 +606,11 @@ If the request does not encounter errors, you receive the following result:
|
||||
},
|
||||
"tpep_dropoff_datetime" : {
|
||||
"type" : "date",
|
||||
"format" : "YYYY-MM-dd HH:mm:ss"
|
||||
"format" : "8yyyy-MM-dd HH:mm:ss"
|
||||
},
|
||||
"tpep_pickup_datetime" : {
|
||||
"type" : "date",
|
||||
"format" : "YYYY-MM-dd HH:mm:ss"
|
||||
"format" : "8yyyy-MM-dd HH:mm:ss"
|
||||
},
|
||||
"trip_distance" : {
|
||||
"type" : "double"
|
||||
@ -621,7 +624,7 @@ If the request does not encounter errors, you receive the following result:
|
||||
"field" : "tpep_pickup_datetime",
|
||||
"timezone" : "{{ beat.timezone }}",
|
||||
"formats" : [
|
||||
"YYYY-MM-dd HH:mm:ss"
|
||||
"8yyyy-MM-dd HH:mm:ss"
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -1287,10 +1290,9 @@ If the request does not encounter errors, you receive the following result:
|
||||
was chosen because it comes first in the column order. If you prefer
|
||||
`tpep_dropoff_datetime` then force it to be chosen using the
|
||||
`timestamp_field` query parameter.
|
||||
<8> `joda_timestamp_formats` are used to tell Logstash and Ingest pipeline how
|
||||
to parse timestamps.
|
||||
<8> `joda_timestamp_formats` are used to tell Logstash how to parse timestamps.
|
||||
<9> `java_timestamp_formats` are the Java time formats recognized in the time
|
||||
fields. In future Ingest pipeline will switch to use this format.
|
||||
fields. Elasticsearch mappings and Ingest pipeline use this format.
|
||||
<10> The timestamp format in this sample doesn't specify a timezone, so to
|
||||
accurately convert them to UTC timestamps to store in Elasticsearch it's
|
||||
necessary to supply the timezone they relate to. `need_client_timezone`
|
||||
@ -1396,7 +1398,7 @@ this:
|
||||
"field" : "timestamp",
|
||||
"timezone" : "{{ beat.timezone }}",
|
||||
"formats" : [
|
||||
"ISO8601"
|
||||
"8yyyy-MM-dd'T'HH:mm:ss,SSS"
|
||||
]
|
||||
}
|
||||
},
|
||||
@ -1556,7 +1558,7 @@ this:
|
||||
"field" : "timestamp",
|
||||
"timezone" : "{{ beat.timezone }}",
|
||||
"formats" : [
|
||||
"ISO8601"
|
||||
"8yyyy-MM-dd'T'HH:mm:ss,SSS"
|
||||
]
|
||||
}
|
||||
},
|
||||
|
@ -149,7 +149,7 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
|
||||
.setJavaTimestampFormats(timeField.v2().javaTimestampFormats)
|
||||
.setNeedClientTimezone(needClientTimeZone)
|
||||
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, timeField.v1(),
|
||||
timeField.v2().jodaTimestampFormats, needClientTimeZone))
|
||||
timeField.v2().javaTimestampFormats, needClientTimeZone))
|
||||
.setMultilineStartPattern(timeLineRegex);
|
||||
}
|
||||
|
||||
|
@ -353,7 +353,7 @@ public final class FileStructureUtils {
|
||||
if (needClientTimezone) {
|
||||
dateProcessorSettings.put("timezone", "{{ " + BEAT_TIMEZONE_FIELD + " }}");
|
||||
}
|
||||
dateProcessorSettings.put("formats", timestampFormats);
|
||||
dateProcessorSettings.put("formats", jodaBwcJavaTimestampFormatsForIngestPipeline(timestampFormats));
|
||||
processors.add(Collections.singletonMap("date", dateProcessorSettings));
|
||||
}
|
||||
|
||||
@ -365,4 +365,19 @@ public final class FileStructureUtils {
|
||||
pipeline.put(Pipeline.PROCESSORS_KEY, processors);
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// TODO: remove this method when Java time formats are the default
|
||||
static List<String> jodaBwcJavaTimestampFormatsForIngestPipeline(List<String> javaTimestampFormats) {
|
||||
return javaTimestampFormats.stream().map(format -> {
|
||||
switch (format) {
|
||||
case "ISO8601":
|
||||
case "UNIX_MS":
|
||||
case "UNIX":
|
||||
case "TAI64N":
|
||||
return format;
|
||||
default:
|
||||
return "8" + format;
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ public class NdJsonFileStructureFinder implements FileStructureFinder {
|
||||
.setJavaTimestampFormats(timeField.v2().javaTimestampFormats)
|
||||
.setNeedClientTimezone(needClientTimeZone)
|
||||
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, timeField.v1(),
|
||||
timeField.v2().jodaTimestampFormats, needClientTimeZone));
|
||||
timeField.v2().javaTimestampFormats, needClientTimeZone));
|
||||
}
|
||||
|
||||
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
|
||||
|
@ -123,7 +123,7 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
|
||||
.setNeedClientTimezone(needClientTimeZone)
|
||||
.setGrokPattern(grokPattern)
|
||||
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(grokPattern, interimTimestampField,
|
||||
bestTimestamp.v1().jodaTimestampFormats, needClientTimeZone))
|
||||
bestTimestamp.v1().javaTimestampFormats, needClientTimeZone))
|
||||
.setMappings(mappings)
|
||||
.setFieldStats(fieldStats)
|
||||
.setExplanation(explanation)
|
||||
|
@ -457,13 +457,13 @@ public final class TimestampFormatFinder {
|
||||
* and possibly also a "format" setting.
|
||||
*/
|
||||
public Map<String, String> getEsDateMappingTypeWithFormat() {
|
||||
if (jodaTimestampFormats.contains("TAI64N")) {
|
||||
if (javaTimestampFormats.contains("TAI64N")) {
|
||||
// There's no format for TAI64N in the timestamp formats used in mappings
|
||||
return Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword");
|
||||
}
|
||||
Map<String, String> mapping = new LinkedHashMap<>();
|
||||
mapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date");
|
||||
String formats = jodaTimestampFormats.stream().flatMap(format -> {
|
||||
String formats = javaTimestampFormats.stream().flatMap(format -> {
|
||||
switch (format) {
|
||||
case "ISO8601":
|
||||
return Stream.empty();
|
||||
@ -472,7 +472,8 @@ public final class TimestampFormatFinder {
|
||||
case "UNIX":
|
||||
return Stream.of("epoch_second");
|
||||
default:
|
||||
return Stream.of(format);
|
||||
// TODO: remove the "8" prefix when Java time formats are the default
|
||||
return Stream.of("8" + format);
|
||||
}
|
||||
}).collect(Collectors.joining("||"));
|
||||
if (formats.isEmpty() == false) {
|
||||
|
@ -101,7 +101,7 @@ public class XmlFileStructureFinder implements FileStructureFinder {
|
||||
.setJavaTimestampFormats(timeField.v2().javaTimestampFormats)
|
||||
.setNeedClientTimezone(needClientTimeZone)
|
||||
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, topLevelTag + "." + timeField.v1(),
|
||||
timeField.v2().jodaTimestampFormats, needClientTimeZone));
|
||||
timeField.v2().javaTimestampFormats, needClientTimeZone));
|
||||
}
|
||||
|
||||
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
|
||||
|
@ -39,7 +39,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("field1", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("ISO8601"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd'T'HH:mm:ss,SSS"));
|
||||
assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
@ -52,7 +52,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
overrides, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("field1", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("ISO8601"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd'T'HH:mm:ss,SSS"));
|
||||
assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
@ -77,20 +77,20 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
overrides, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("field1", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("ISO8601"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd'T'HH:mm:ss,SSS"));
|
||||
assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
public void testGuessTimestampGivenSingleSampleSingleFieldAndImpossibleTimeFormatOverride() {
|
||||
|
||||
FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("EEE MMM dd HH:mm:ss YYYY").build();
|
||||
FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("EEE MMM dd HH:mm:ss yyyy").build();
|
||||
|
||||
Map<String, String> sample = Collections.singletonMap("field1", "2018-05-24T17:28:31,735");
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> FileStructureUtils.guessTimestampField(explanation, Collections.singletonList(sample), overrides,
|
||||
NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals("Specified timestamp format [EEE MMM dd HH:mm:ss YYYY] does not match for record [{field1=2018-05-24T17:28:31,735}]",
|
||||
assertEquals("Specified timestamp format [EEE MMM dd HH:mm:ss yyyy] does not match for record [{field1=2018-05-24T17:28:31,735}]",
|
||||
e.getMessage());
|
||||
}
|
||||
|
||||
@ -101,7 +101,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("field1", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("ISO8601"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd'T'HH:mm:ss,SSS"));
|
||||
assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
@ -130,7 +130,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("time", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd HH:mm:ss,SSS"));
|
||||
assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
@ -147,7 +147,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("time", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd HH:mm:ss,SSS"));
|
||||
assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
@ -178,7 +178,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("time", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd HH:mm:ss,SSS"));
|
||||
assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
@ -195,7 +195,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("time", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("MMM dd yyyy HH:mm:ss", "MMM d yyyy HH:mm:ss"));
|
||||
assertEquals("CISCOTIMESTAMP", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
@ -228,7 +228,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
assertNotNull(match);
|
||||
assertEquals("time2", match.v1());
|
||||
assertThat(match.v2().jodaTimestampFormats, contains("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss"));
|
||||
assertThat(match.v2().javaTimestampFormats, contains("MMM dd yyyy HH:mm:ss", "MMM d yyyy HH:mm:ss"));
|
||||
assertEquals("CISCOTIMESTAMP", match.v2().grokPatternName);
|
||||
}
|
||||
|
||||
@ -331,7 +331,8 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("foo"));
|
||||
Map<String, String> expectedTimeMapping = new HashMap<>();
|
||||
expectedTimeMapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date");
|
||||
expectedTimeMapping.put(FileStructureUtils.MAPPING_FORMAT_SETTING, "YYYY-MM-dd HH:mm:ss,SSS");
|
||||
// TODO: remove the "8" prefix when Java time formats are the default
|
||||
expectedTimeMapping.put(FileStructureUtils.MAPPING_FORMAT_SETTING, "8" + "yyyy-MM-dd HH:mm:ss,SSS");
|
||||
assertEquals(expectedTimeMapping, mappings.get("time"));
|
||||
assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("bar"));
|
||||
assertNull(mappings.get("nothing"));
|
||||
@ -354,7 +355,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
public void testMakeIngestPipelineDefinitionGivenStructuredWithTimestamp() {
|
||||
|
||||
String timestampField = randomAlphaOfLength(10);
|
||||
List<String> timestampFormats = randomFrom(TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS).jodaTimestampFormats;
|
||||
List<String> timestampFormats = randomFrom(TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS).javaTimestampFormats;
|
||||
boolean needClientTimezone = randomBoolean();
|
||||
|
||||
Map<String, Object> pipeline =
|
||||
@ -371,7 +372,8 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
assertNotNull(dateProcessor);
|
||||
assertEquals(timestampField, dateProcessor.get("field"));
|
||||
assertEquals(needClientTimezone, dateProcessor.containsKey("timezone"));
|
||||
assertEquals(timestampFormats, dateProcessor.get("formats"));
|
||||
// TODO: remove the call to jodaBwcJavaTimestampFormatsForIngestPipeline() when Java time formats are the default
|
||||
assertEquals(FileStructureUtils.jodaBwcJavaTimestampFormatsForIngestPipeline(timestampFormats), dateProcessor.get("formats"));
|
||||
|
||||
// After removing the two expected fields there should be nothing left in the pipeline
|
||||
assertEquals(Collections.emptyMap(), pipeline);
|
||||
@ -382,7 +384,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
|
||||
String grokPattern = randomAlphaOfLength(100);
|
||||
String timestampField = randomAlphaOfLength(10);
|
||||
List<String> timestampFormats = randomFrom(TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS).jodaTimestampFormats;
|
||||
List<String> timestampFormats = randomFrom(TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS).javaTimestampFormats;
|
||||
boolean needClientTimezone = randomBoolean();
|
||||
|
||||
Map<String, Object> pipeline =
|
||||
@ -404,7 +406,8 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
||||
assertNotNull(dateProcessor);
|
||||
assertEquals(timestampField, dateProcessor.get("field"));
|
||||
assertEquals(needClientTimezone, dateProcessor.containsKey("timezone"));
|
||||
assertEquals(timestampFormats, dateProcessor.get("formats"));
|
||||
// TODO: remove the call to jodaBwcJavaTimestampFormatsForIngestPipeline() when Java time formats are the default
|
||||
assertEquals(FileStructureUtils.jodaBwcJavaTimestampFormatsForIngestPipeline(timestampFormats), dateProcessor.get("formats"));
|
||||
|
||||
Map<String, Object> removeProcessor = (Map<String, Object>) processors.get(2).get("remove");
|
||||
assertNotNull(removeProcessor);
|
||||
|
@ -357,7 +357,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
||||
|
||||
public void testMostLikelyTimestampGivenExceptionTraceAndTimestampFormatOverride() {
|
||||
|
||||
FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("YYYY-MM-dd HH:mm:ss").build();
|
||||
FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("yyyy-MM-dd HH:mm:ss").build();
|
||||
|
||||
Tuple<TimestampMatch, Set<String>> mostLikelyMatch =
|
||||
TextLogFileStructureFinder.mostLikelyTimestamp(EXCEPTION_TRACE_SAMPLE.split("\n"), overrides, NOOP_TIMEOUT_CHECKER);
|
||||
|
@ -6,9 +6,17 @@
|
||||
package org.elasticsearch.xpack.ml.filestructurefinder;
|
||||
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.joda.Joda;
|
||||
import org.elasticsearch.common.time.DateFormatter;
|
||||
import org.elasticsearch.xpack.ml.filestructurefinder.TimestampFormatFinder.TimestampMatch;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
import java.time.temporal.ChronoField;
|
||||
import java.time.temporal.TemporalAccessor;
|
||||
import java.time.temporal.TemporalQueries;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
@ -269,32 +277,39 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
||||
assertTrue(expected.simplePattern.matcher(text).find());
|
||||
}
|
||||
|
||||
// This is because parsing timestamps using Joda formats generates warnings.
|
||||
// Eventually we'll probably just remove the checks that the Joda formats
|
||||
// are valid, and at that point this method can be removed too.
|
||||
protected boolean enableWarningsCheck() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// This method is using the Joda BWC layer. When that's removed, this method
|
||||
// can be deleted - we'll just validate the Java time formats after that.
|
||||
// Also remove enableWarningsCheck() above if this method is removed.
|
||||
private void validateJodaTimestampFormats(List<String> jodaTimestampFormats, String text, long expectedEpochMs) {
|
||||
|
||||
// All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London.
|
||||
// This is the timezone that will be used for any text representations that don't include it.
|
||||
org.joda.time.DateTimeZone defaultZone = org.joda.time.DateTimeZone.forID("Europe/London");
|
||||
org.joda.time.DateTime parsed;
|
||||
ZoneId defaultZone = ZoneId.of("Europe/London");
|
||||
long actualEpochMs;
|
||||
for (int i = 0; i < jodaTimestampFormats.size(); ++i) {
|
||||
try {
|
||||
String timestampFormat = jodaTimestampFormats.get(i);
|
||||
switch (timestampFormat) {
|
||||
case "ISO8601":
|
||||
parsed = org.joda.time.format.ISODateTimeFormat.dateTimeParser()
|
||||
.withZone(defaultZone).withDefaultYear(2018).parseDateTime(text);
|
||||
actualEpochMs = Joda.forPattern("date_optional_time").withZone(defaultZone).parseMillis(text);
|
||||
break;
|
||||
default:
|
||||
org.joda.time.format.DateTimeFormatter parser =
|
||||
org.joda.time.format.DateTimeFormat.forPattern(timestampFormat).withZone(defaultZone).withLocale(Locale.ROOT);
|
||||
parsed = parser.withDefaultYear(2018).parseDateTime(text);
|
||||
actualEpochMs = Joda.forPattern(timestampFormat).withYear(2018).withZone(defaultZone).parseMillis(text);
|
||||
break;
|
||||
}
|
||||
if (expectedEpochMs == parsed.getMillis()) {
|
||||
if (expectedEpochMs == actualEpochMs) {
|
||||
break;
|
||||
}
|
||||
// If the last one isn't right then propagate
|
||||
if (i == jodaTimestampFormats.size() - 1) {
|
||||
assertEquals(expectedEpochMs, parsed.getMillis());
|
||||
assertEquals(expectedEpochMs, actualEpochMs);
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
// If the last one throws then propagate
|
||||
@ -309,8 +324,8 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
||||
|
||||
// All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London.
|
||||
// This is the timezone that will be used for any text representations that don't include it.
|
||||
java.time.ZoneId defaultZone = java.time.ZoneId.of("Europe/London");
|
||||
java.time.temporal.TemporalAccessor parsed;
|
||||
ZoneId defaultZone = ZoneId.of("Europe/London");
|
||||
TemporalAccessor parsed;
|
||||
for (int i = 0; i < javaTimestampFormats.size(); ++i) {
|
||||
try {
|
||||
String timestampFormat = javaTimestampFormats.get(i);
|
||||
@ -319,8 +334,8 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
||||
parsed = DateFormatter.forPattern("strict_date_optional_time_nanos").withZone(defaultZone).parse(text);
|
||||
break;
|
||||
default:
|
||||
java.time.format.DateTimeFormatter parser = new java.time.format.DateTimeFormatterBuilder()
|
||||
.appendPattern(timestampFormat).parseDefaulting(java.time.temporal.ChronoField.YEAR_OF_ERA, 2018)
|
||||
DateTimeFormatter parser = new DateTimeFormatterBuilder()
|
||||
.appendPattern(timestampFormat).parseDefaulting(ChronoField.YEAR_OF_ERA, 2018)
|
||||
.toFormatter(Locale.ROOT);
|
||||
// This next line parses the textual date without any default timezone, so if
|
||||
// the text doesn't contain the timezone then the resulting temporal accessor
|
||||
@ -332,14 +347,14 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
||||
// timezone and then again with a default timezone if the first parse didn't
|
||||
// find one in the text.
|
||||
parsed = parser.parse(text);
|
||||
if (parsed.query(java.time.temporal.TemporalQueries.zone()) == null) {
|
||||
if (parsed.query(TemporalQueries.zone()) == null) {
|
||||
// TODO: when Java 8 is no longer supported remove the two
|
||||
// lines and comment above and the closing brace below
|
||||
parsed = parser.withZone(defaultZone).parse(text);
|
||||
}
|
||||
break;
|
||||
}
|
||||
long actualEpochMs = java.time.Instant.from(parsed).toEpochMilli();
|
||||
long actualEpochMs = Instant.from(parsed).toEpochMilli();
|
||||
if (expectedEpochMs == actualEpochMs) {
|
||||
break;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user