[ML] Fix custom timestamp override with dot-separated fractional seconds (#44127)

Custom timestamp overrides provided to the find_file_structure
endpoint produced an invalid Grok pattern if the fractional
seconds separator was a dot rather than a comma or colon.
This commit fixes that problem and adds tests for this sort
of timestamp override.

Fixes #44110
This commit is contained in:
David Roberts 2019-07-10 10:26:30 +01:00
parent 913b6a64e8
commit 853ddb5a07
2 changed files with 22 additions and 10 deletions

View File

@ -279,9 +279,10 @@ public final class TimestampFormatFinder {
}
throw new IllegalArgumentException(msg);
}
// No need to append to the Grok pattern as %{SECOND} already allows for an optional
// fraction, but we need to remove the separator that's included in %{SECOND}
grokPatternBuilder.deleteCharAt(grokPatternBuilder.length() - 1);
// No need to append to the Grok pattern as %{SECOND} already allows for an optional fraction,
// but we need to remove the separator that's included in %{SECOND} (and that might be escaped)
int numCharsToDelete = (PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX.indexOf(prevChar) >= 0) ? 2 : 1;
grokPatternBuilder.delete(grokPatternBuilder.length() - numCharsToDelete, grokPatternBuilder.length());
regexBuilder.append("\\d{").append(endPos - startPos).append('}');
} else {
grokPatternBuilder.append(grokPatternAndRegexForGroup.v1());

View File

@ -737,16 +737,27 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
assertEquals(1, lenientTimestampFormatFinder.getNumMatchedFormats());
}
public void testCustomOverrideNotMatchingBuiltInFormat() {
public void testCustomOverridesNotMatchingBuiltInFormat() {
String overrideFormat = "MM/dd HH.mm.ss,SSSSSS 'in' yyyy";
String text = "05/15 17.14.56,374946 in 2018";
String expectedSimpleRegex = "\\b\\d{2}/\\d{2} \\d{2}\\.\\d{2}\\.\\d{2},\\d{6} in \\d{4}\\b";
String expectedGrokPatternName = "CUSTOM_TIMESTAMP";
Map<String, String> expectedCustomGrokPatternDefinitions =
validateCustomOverrideNotMatchingBuiltInFormat("MM/dd HH.mm.ss,SSSSSS 'in' yyyy", "05/15 17.14.56,374946 in 2018",
"\\b\\d{2}/\\d{2} \\d{2}\\.\\d{2}\\.\\d{2},\\d{6} in \\d{4}\\b", "CUSTOM_TIMESTAMP",
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"%{MONTHNUM2}/%{MONTHDAY} %{HOUR}\\.%{MINUTE}\\.%{SECOND} in %{YEAR}");
"%{MONTHNUM2}/%{MONTHDAY} %{HOUR}\\.%{MINUTE}\\.%{SECOND} in %{YEAR}"));
validateCustomOverrideNotMatchingBuiltInFormat("'some_prefix 'dd.MM.yyyy HH:mm:ss.SSSSSS", "some_prefix 06.01.2018 16:56:14.295748",
"some_prefix \\d{2}\\.\\d{2}\\.\\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}\\b", "CUSTOM_TIMESTAMP",
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"some_prefix %{MONTHDAY}\\.%{MONTHNUM2}\\.%{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"));
validateCustomOverrideNotMatchingBuiltInFormat("dd.MM. yyyy HH:mm:ss.SSSSSS", "06.01. 2018 16:56:14.295748",
"\\b\\d{2}\\.\\d{2}\\. \\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}\\b", "CUSTOM_TIMESTAMP",
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"%{MONTHDAY}\\.%{MONTHNUM2}\\. %{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"));
}
private void validateCustomOverrideNotMatchingBuiltInFormat(String overrideFormat, String text, String expectedSimpleRegex,
String expectedGrokPatternName,
Map<String, String> expectedCustomGrokPatternDefinitions) {
TimestampFormatFinder strictTimestampFormatFinder = new TimestampFormatFinder(explanation, overrideFormat, true, true, true,
NOOP_TIMEOUT_CHECKER);
strictTimestampFormatFinder.addSample(text);