[ML] Improve message misformation error in file structure finder (#42175)
This change replaces the extremely unfriendly message "Number of messages analyzed must be positive" in the case where the sample lines were incorrectly grouped into just one message to an error that more helpfully explains the likely root cause of the problem.
This commit is contained in:
parent
4063701f5e
commit
226df35d96
|
@ -76,6 +76,12 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
|
|||
}
|
||||
// Don't add the last message, as it might be partial and mess up subsequent pattern finding
|
||||
|
||||
if (sampleMessages.isEmpty()) {
|
||||
throw new IllegalArgumentException("Failed to create more than one message from the sample lines provided. (The "
|
||||
+ "last is discarded in case the sample is incomplete.) If your sample does contain multiple messages the "
|
||||
+ "problem is probably that the primary timestamp format has been incorrectly detected, so try overriding it.");
|
||||
}
|
||||
|
||||
FileStructure.Builder structureBuilder = new FileStructure.Builder(FileStructure.Format.SEMI_STRUCTURED_TEXT)
|
||||
.setCharset(charsetName)
|
||||
.setHasByteOrderMarker(hasByteOrderMarker)
|
||||
|
|
|
@ -232,6 +232,27 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
|||
"\\[%{JAVACLASS:class} *\\] %{JAVALOGMESSAGE:message}] does not match sample messages", e.getMessage());
|
||||
}
|
||||
|
||||
public void testErrorOnIncorrectMessageFormation() {
|
||||
|
||||
// This sample causes problems because the (very weird) primary timestamp format
|
||||
// is not detected but a secondary format that only occurs in one line is detected
|
||||
String sample = "Day 21 Month 1 Year 2019 11:04 INFO [localhost] - starting\n" +
|
||||
"Day 21 Month 1 Year 2019 11:04 INFO [localhost] - startup date [Mon Jan 21 11:04:19 CET 2019]\n" +
|
||||
"Day 21 Month 1 Year 2019 11:04 DEBUG [localhost] - details\n" +
|
||||
"Day 21 Month 1 Year 2019 11:04 DEBUG [localhost] - more details\n" +
|
||||
"Day 21 Month 1 Year 2019 11:04 WARN [localhost] - something went wrong\n";
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> factory.createFromSample(explanation, sample, charset, hasByteOrderMarker, FileStructureOverrides.EMPTY_OVERRIDES,
|
||||
NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals("Failed to create more than one message from the sample lines provided. (The last is discarded in "
|
||||
+ "case the sample is incomplete.) If your sample does contain multiple messages the problem is probably that "
|
||||
+ "the primary timestamp format has been incorrectly detected, so try overriding it.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testCreateMultiLineMessageStartRegexGivenNoPrefaces() {
|
||||
for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) {
|
||||
String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern();
|
||||
|
|
Loading…
Reference in New Issue