[ML] Fix custom timestamp override with dot-separated fractional seconds (#44127)

Custom timestamp overrides provided to the find_file_structure
endpoint produced an invalid Grok pattern if the fractional
seconds separator was a dot rather than a comma or colon.
This commit fixes that problem and adds tests for this sort
of timestamp override.

Fixes #44110
This commit is contained in:
David Roberts 2019-07-10 10:26:30 +01:00
parent 913b6a64e8
commit 853ddb5a07
2 changed files with 22 additions and 10 deletions

View File

@ -279,9 +279,10 @@ public final class TimestampFormatFinder {
} }
throw new IllegalArgumentException(msg); throw new IllegalArgumentException(msg);
} }
// No need to append to the Grok pattern as %{SECOND} already allows for an optional // No need to append to the Grok pattern as %{SECOND} already allows for an optional fraction,
// fraction, but we need to remove the separator that's included in %{SECOND} // but we need to remove the separator that's included in %{SECOND} (and that might be escaped)
grokPatternBuilder.deleteCharAt(grokPatternBuilder.length() - 1); int numCharsToDelete = (PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX.indexOf(prevChar) >= 0) ? 2 : 1;
grokPatternBuilder.delete(grokPatternBuilder.length() - numCharsToDelete, grokPatternBuilder.length());
regexBuilder.append("\\d{").append(endPos - startPos).append('}'); regexBuilder.append("\\d{").append(endPos - startPos).append('}');
} else { } else {
grokPatternBuilder.append(grokPatternAndRegexForGroup.v1()); grokPatternBuilder.append(grokPatternAndRegexForGroup.v1());

View File

@ -737,16 +737,27 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
assertEquals(1, lenientTimestampFormatFinder.getNumMatchedFormats()); assertEquals(1, lenientTimestampFormatFinder.getNumMatchedFormats());
} }
public void testCustomOverrideNotMatchingBuiltInFormat() { public void testCustomOverridesNotMatchingBuiltInFormat() {
String overrideFormat = "MM/dd HH.mm.ss,SSSSSS 'in' yyyy"; validateCustomOverrideNotMatchingBuiltInFormat("MM/dd HH.mm.ss,SSSSSS 'in' yyyy", "05/15 17.14.56,374946 in 2018",
String text = "05/15 17.14.56,374946 in 2018"; "\\b\\d{2}/\\d{2} \\d{2}\\.\\d{2}\\.\\d{2},\\d{6} in \\d{4}\\b", "CUSTOM_TIMESTAMP",
String expectedSimpleRegex = "\\b\\d{2}/\\d{2} \\d{2}\\.\\d{2}\\.\\d{2},\\d{6} in \\d{4}\\b";
String expectedGrokPatternName = "CUSTOM_TIMESTAMP";
Map<String, String> expectedCustomGrokPatternDefinitions =
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME, Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"%{MONTHNUM2}/%{MONTHDAY} %{HOUR}\\.%{MINUTE}\\.%{SECOND} in %{YEAR}"); "%{MONTHNUM2}/%{MONTHDAY} %{HOUR}\\.%{MINUTE}\\.%{SECOND} in %{YEAR}"));
validateCustomOverrideNotMatchingBuiltInFormat("'some_prefix 'dd.MM.yyyy HH:mm:ss.SSSSSS", "some_prefix 06.01.2018 16:56:14.295748",
"some_prefix \\d{2}\\.\\d{2}\\.\\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}\\b", "CUSTOM_TIMESTAMP",
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"some_prefix %{MONTHDAY}\\.%{MONTHNUM2}\\.%{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"));
validateCustomOverrideNotMatchingBuiltInFormat("dd.MM. yyyy HH:mm:ss.SSSSSS", "06.01. 2018 16:56:14.295748",
"\\b\\d{2}\\.\\d{2}\\. \\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}\\b", "CUSTOM_TIMESTAMP",
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"%{MONTHDAY}\\.%{MONTHNUM2}\\. %{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"));
}
private void validateCustomOverrideNotMatchingBuiltInFormat(String overrideFormat, String text, String expectedSimpleRegex,
String expectedGrokPatternName,
Map<String, String> expectedCustomGrokPatternDefinitions) {
TimestampFormatFinder strictTimestampFormatFinder = new TimestampFormatFinder(explanation, overrideFormat, true, true, true, TimestampFormatFinder strictTimestampFormatFinder = new TimestampFormatFinder(explanation, overrideFormat, true, true, true,
NOOP_TIMEOUT_CHECKER); NOOP_TIMEOUT_CHECKER);
strictTimestampFormatFinder.addSample(text); strictTimestampFormatFinder.addSample(text);