[ML] Fix custom timestamp override with dot-separated fractional seconds (#44127)
Custom timestamp overrides provided to the find_file_structure endpoint produced an invalid Grok pattern if the fractional seconds separator was a dot rather than a comma or colon. This commit fixes that problem and adds tests for this sort of timestamp override. Fixes #44110
This commit is contained in:
parent
913b6a64e8
commit
853ddb5a07
|
@ -279,9 +279,10 @@ public final class TimestampFormatFinder {
|
||||||
}
|
}
|
||||||
throw new IllegalArgumentException(msg);
|
throw new IllegalArgumentException(msg);
|
||||||
}
|
}
|
||||||
// No need to append to the Grok pattern as %{SECOND} already allows for an optional
|
// No need to append to the Grok pattern as %{SECOND} already allows for an optional fraction,
|
||||||
// fraction, but we need to remove the separator that's included in %{SECOND}
|
// but we need to remove the separator that's included in %{SECOND} (and that might be escaped)
|
||||||
grokPatternBuilder.deleteCharAt(grokPatternBuilder.length() - 1);
|
int numCharsToDelete = (PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX.indexOf(prevChar) >= 0) ? 2 : 1;
|
||||||
|
grokPatternBuilder.delete(grokPatternBuilder.length() - numCharsToDelete, grokPatternBuilder.length());
|
||||||
regexBuilder.append("\\d{").append(endPos - startPos).append('}');
|
regexBuilder.append("\\d{").append(endPos - startPos).append('}');
|
||||||
} else {
|
} else {
|
||||||
grokPatternBuilder.append(grokPatternAndRegexForGroup.v1());
|
grokPatternBuilder.append(grokPatternAndRegexForGroup.v1());
|
||||||
|
|
|
@ -737,16 +737,27 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
||||||
assertEquals(1, lenientTimestampFormatFinder.getNumMatchedFormats());
|
assertEquals(1, lenientTimestampFormatFinder.getNumMatchedFormats());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCustomOverrideNotMatchingBuiltInFormat() {
|
public void testCustomOverridesNotMatchingBuiltInFormat() {
|
||||||
|
|
||||||
String overrideFormat = "MM/dd HH.mm.ss,SSSSSS 'in' yyyy";
|
validateCustomOverrideNotMatchingBuiltInFormat("MM/dd HH.mm.ss,SSSSSS 'in' yyyy", "05/15 17.14.56,374946 in 2018",
|
||||||
String text = "05/15 17.14.56,374946 in 2018";
|
"\\b\\d{2}/\\d{2} \\d{2}\\.\\d{2}\\.\\d{2},\\d{6} in \\d{4}\\b", "CUSTOM_TIMESTAMP",
|
||||||
String expectedSimpleRegex = "\\b\\d{2}/\\d{2} \\d{2}\\.\\d{2}\\.\\d{2},\\d{6} in \\d{4}\\b";
|
|
||||||
String expectedGrokPatternName = "CUSTOM_TIMESTAMP";
|
|
||||||
Map<String, String> expectedCustomGrokPatternDefinitions =
|
|
||||||
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
|
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
|
||||||
"%{MONTHNUM2}/%{MONTHDAY} %{HOUR}\\.%{MINUTE}\\.%{SECOND} in %{YEAR}");
|
"%{MONTHNUM2}/%{MONTHDAY} %{HOUR}\\.%{MINUTE}\\.%{SECOND} in %{YEAR}"));
|
||||||
|
|
||||||
|
validateCustomOverrideNotMatchingBuiltInFormat("'some_prefix 'dd.MM.yyyy HH:mm:ss.SSSSSS", "some_prefix 06.01.2018 16:56:14.295748",
|
||||||
|
"some_prefix \\d{2}\\.\\d{2}\\.\\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}\\b", "CUSTOM_TIMESTAMP",
|
||||||
|
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
|
||||||
|
"some_prefix %{MONTHDAY}\\.%{MONTHNUM2}\\.%{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"));
|
||||||
|
|
||||||
|
validateCustomOverrideNotMatchingBuiltInFormat("dd.MM. yyyy HH:mm:ss.SSSSSS", "06.01. 2018 16:56:14.295748",
|
||||||
|
"\\b\\d{2}\\.\\d{2}\\. \\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}\\b", "CUSTOM_TIMESTAMP",
|
||||||
|
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
|
||||||
|
"%{MONTHDAY}\\.%{MONTHNUM2}\\. %{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateCustomOverrideNotMatchingBuiltInFormat(String overrideFormat, String text, String expectedSimpleRegex,
|
||||||
|
String expectedGrokPatternName,
|
||||||
|
Map<String, String> expectedCustomGrokPatternDefinitions) {
|
||||||
TimestampFormatFinder strictTimestampFormatFinder = new TimestampFormatFinder(explanation, overrideFormat, true, true, true,
|
TimestampFormatFinder strictTimestampFormatFinder = new TimestampFormatFinder(explanation, overrideFormat, true, true, true,
|
||||||
NOOP_TIMEOUT_CHECKER);
|
NOOP_TIMEOUT_CHECKER);
|
||||||
strictTimestampFormatFinder.addSample(text);
|
strictTimestampFormatFinder.addSample(text);
|
||||||
|
|
Loading…
Reference in New Issue