[ML] Improve message misformation error in file structure finder (#42175)

This change replaces the extremely unfriendly message
"Number of messages analyzed must be positive" in the
case where the sample lines were incorrectly grouped
into just one message to an error that more helpfully
explains the likely root cause of the problem.
This commit is contained in:
David Roberts 2019-05-16 13:11:23 -04:00
parent 4063701f5e
commit 226df35d96
2 changed files with 27 additions and 0 deletions

View File

@ -76,6 +76,12 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
}
// Don't add the last message, as it might be partial and mess up subsequent pattern finding
if (sampleMessages.isEmpty()) {
throw new IllegalArgumentException("Failed to create more than one message from the sample lines provided. (The "
+ "last is discarded in case the sample is incomplete.) If your sample does contain multiple messages the "
+ "problem is probably that the primary timestamp format has been incorrectly detected, so try overriding it.");
}
FileStructure.Builder structureBuilder = new FileStructure.Builder(FileStructure.Format.SEMI_STRUCTURED_TEXT)
.setCharset(charsetName)
.setHasByteOrderMarker(hasByteOrderMarker)

View File

@ -232,6 +232,27 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
"\\[%{JAVACLASS:class} *\\] %{JAVALOGMESSAGE:message}] does not match sample messages", e.getMessage());
}
public void testErrorOnIncorrectMessageFormation() {
// This sample causes problems because the (very weird) primary timestamp format
// is not detected but a secondary format that only occurs in one line is detected
String sample = "Day 21 Month 1 Year 2019 11:04 INFO [localhost] - starting\n" +
"Day 21 Month 1 Year 2019 11:04 INFO [localhost] - startup date [Mon Jan 21 11:04:19 CET 2019]\n" +
"Day 21 Month 1 Year 2019 11:04 DEBUG [localhost] - details\n" +
"Day 21 Month 1 Year 2019 11:04 DEBUG [localhost] - more details\n" +
"Day 21 Month 1 Year 2019 11:04 WARN [localhost] - something went wrong\n";
String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> factory.createFromSample(explanation, sample, charset, hasByteOrderMarker, FileStructureOverrides.EMPTY_OVERRIDES,
NOOP_TIMEOUT_CHECKER));
assertEquals("Failed to create more than one message from the sample lines provided. (The last is discarded in "
+ "case the sample is incomplete.) If your sample does contain multiple messages the problem is probably that "
+ "the primary timestamp format has been incorrectly detected, so try overriding it.", e.getMessage());
}
public void testCreateMultiLineMessageStartRegexGivenNoPrefaces() {
for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) {
String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern();