mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 02:14:54 +00:00
[ML] Interrupt Grok in file structure finder timeout (#36588)
The file structure finder has timeout functionality, but prior to this change it would not interrupt a single long-running Grok match attempt. This commit hooks into the ThreadWatchdog facility provided by the Grok library to interrupt individual Grok matches that may be running at the time the file structure finder timeout expires.
This commit is contained in:
parent
a4b32f1143
commit
690b10a4a1
@ -34,8 +34,9 @@ public final class FileStructureUtils {
|
|||||||
|
|
||||||
private static final int NUM_TOP_HITS = 10;
|
private static final int NUM_TOP_HITS = 10;
|
||||||
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
|
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
|
||||||
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$");
|
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$",
|
||||||
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$");
|
TimeoutChecker.watchdog);
|
||||||
|
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog);
|
||||||
private static final int KEYWORD_MAX_LEN = 256;
|
private static final int KEYWORD_MAX_LEN = 256;
|
||||||
private static final int KEYWORD_MAX_SPACES = 5;
|
private static final int KEYWORD_MAX_SPACES = 5;
|
||||||
|
|
||||||
@ -69,7 +70,7 @@ public final class FileStructureUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Accept the first match from the first sample that is compatible with all the other samples
|
// Accept the first match from the first sample that is compatible with all the other samples
|
||||||
for (Tuple<String, TimestampMatch> candidate : findCandidates(explanation, sampleRecords, overrides)) {
|
for (Tuple<String, TimestampMatch> candidate : findCandidates(explanation, sampleRecords, overrides, timeoutChecker)) {
|
||||||
|
|
||||||
boolean allGood = true;
|
boolean allGood = true;
|
||||||
for (Map<String, ?> sampleRecord : sampleRecords.subList(1, sampleRecords.size())) {
|
for (Map<String, ?> sampleRecord : sampleRecords.subList(1, sampleRecords.size())) {
|
||||||
@ -87,7 +88,8 @@ public final class FileStructureUtils {
|
|||||||
|
|
||||||
timeoutChecker.check("timestamp field determination");
|
timeoutChecker.check("timestamp field determination");
|
||||||
|
|
||||||
TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(fieldValue.toString(), overrides.getTimestampFormat());
|
TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(fieldValue.toString(), overrides.getTimestampFormat(),
|
||||||
|
timeoutChecker);
|
||||||
if (match == null || match.candidateIndex != candidate.v2().candidateIndex) {
|
if (match == null || match.candidateIndex != candidate.v2().candidateIndex) {
|
||||||
if (overrides.getTimestampFormat() != null) {
|
if (overrides.getTimestampFormat() != null) {
|
||||||
throw new IllegalArgumentException("Specified timestamp format [" + overrides.getTimestampFormat() +
|
throw new IllegalArgumentException("Specified timestamp format [" + overrides.getTimestampFormat() +
|
||||||
@ -111,7 +113,7 @@ public final class FileStructureUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static List<Tuple<String, TimestampMatch>> findCandidates(List<String> explanation, List<Map<String, ?>> sampleRecords,
|
private static List<Tuple<String, TimestampMatch>> findCandidates(List<String> explanation, List<Map<String, ?>> sampleRecords,
|
||||||
FileStructureOverrides overrides) {
|
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) {
|
||||||
|
|
||||||
assert sampleRecords.isEmpty() == false;
|
assert sampleRecords.isEmpty() == false;
|
||||||
Map<String, ?> firstRecord = sampleRecords.get(0);
|
Map<String, ?> firstRecord = sampleRecords.get(0);
|
||||||
@ -130,7 +132,8 @@ public final class FileStructureUtils {
|
|||||||
if (onlyConsiderField == null || onlyConsiderField.equals(fieldName)) {
|
if (onlyConsiderField == null || onlyConsiderField.equals(fieldName)) {
|
||||||
Object value = field.getValue();
|
Object value = field.getValue();
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(value.toString(), overrides.getTimestampFormat());
|
TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(value.toString(), overrides.getTimestampFormat(),
|
||||||
|
timeoutChecker);
|
||||||
if (match != null) {
|
if (match != null) {
|
||||||
Tuple<String, TimestampMatch> candidate = new Tuple<>(fieldName, match);
|
Tuple<String, TimestampMatch> candidate = new Tuple<>(fieldName, match);
|
||||||
candidates.add(candidate);
|
candidates.add(candidate);
|
||||||
@ -211,7 +214,7 @@ public final class FileStructureUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Collection<String> fieldValuesAsStrings = fieldValues.stream().map(Object::toString).collect(Collectors.toList());
|
Collection<String> fieldValuesAsStrings = fieldValues.stream().map(Object::toString).collect(Collectors.toList());
|
||||||
Map<String, String> mapping = guessScalarMapping(explanation, fieldName, fieldValuesAsStrings);
|
Map<String, String> mapping = guessScalarMapping(explanation, fieldName, fieldValuesAsStrings, timeoutChecker);
|
||||||
timeoutChecker.check("mapping determination");
|
timeoutChecker.check("mapping determination");
|
||||||
return new Tuple<>(mapping, calculateFieldStats(fieldValuesAsStrings, timeoutChecker));
|
return new Tuple<>(mapping, calculateFieldStats(fieldValuesAsStrings, timeoutChecker));
|
||||||
}
|
}
|
||||||
@ -238,10 +241,12 @@ public final class FileStructureUtils {
|
|||||||
* @param fieldValues Values of the field for which mappings are to be guessed. The guessed
|
* @param fieldValues Values of the field for which mappings are to be guessed. The guessed
|
||||||
* mapping will be compatible with all the provided values. Must not be
|
* mapping will be compatible with all the provided values. Must not be
|
||||||
* empty.
|
* empty.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The sub-section of the index mappings most appropriate for the field,
|
* @return The sub-section of the index mappings most appropriate for the field,
|
||||||
* for example <code>{ "type" : "keyword" }</code>.
|
* for example <code>{ "type" : "keyword" }</code>.
|
||||||
*/
|
*/
|
||||||
static Map<String, String> guessScalarMapping(List<String> explanation, String fieldName, Collection<String> fieldValues) {
|
static Map<String, String> guessScalarMapping(List<String> explanation, String fieldName, Collection<String> fieldValues,
|
||||||
|
TimeoutChecker timeoutChecker) {
|
||||||
|
|
||||||
assert fieldValues.isEmpty() == false;
|
assert fieldValues.isEmpty() == false;
|
||||||
|
|
||||||
@ -251,11 +256,12 @@ public final class FileStructureUtils {
|
|||||||
|
|
||||||
// This checks if a date mapping would be appropriate, and, if so, finds the correct format
|
// This checks if a date mapping would be appropriate, and, if so, finds the correct format
|
||||||
Iterator<String> iter = fieldValues.iterator();
|
Iterator<String> iter = fieldValues.iterator();
|
||||||
TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(iter.next());
|
TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(iter.next(), timeoutChecker);
|
||||||
while (timestampMatch != null && iter.hasNext()) {
|
while (timestampMatch != null && iter.hasNext()) {
|
||||||
// To be mapped as type date all the values must match the same timestamp format - it is
|
// To be mapped as type date all the values must match the same timestamp format - it is
|
||||||
// not acceptable for all values to be dates, but with different formats
|
// not acceptable for all values to be dates, but with different formats
|
||||||
if (timestampMatch.equals(TimestampFormatFinder.findFirstFullMatch(iter.next(), timestampMatch.candidateIndex)) == false) {
|
if (timestampMatch.equals(TimestampFormatFinder.findFirstFullMatch(iter.next(), timestampMatch.candidateIndex,
|
||||||
|
timeoutChecker)) == false) {
|
||||||
timestampMatch = null;
|
timestampMatch = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -453,7 +453,7 @@ public final class GrokPatternCreator {
|
|||||||
this.fieldName = fieldName;
|
this.fieldName = fieldName;
|
||||||
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
|
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
|
||||||
grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + preBreak +
|
grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + preBreak +
|
||||||
"%{" + grokPatternName + ":" + VALUE + "}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}");
|
"%{" + grokPatternName + ":" + VALUE + "}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -472,7 +472,7 @@ public final class GrokPatternCreator {
|
|||||||
TimeoutChecker timeoutChecker) {
|
TimeoutChecker timeoutChecker) {
|
||||||
Collection<String> values = new ArrayList<>();
|
Collection<String> values = new ArrayList<>();
|
||||||
for (String snippet : snippets) {
|
for (String snippet : snippets) {
|
||||||
Map<String, Object> captures = grok.captures(snippet);
|
Map<String, Object> captures = timeoutChecker.grokCaptures(grok, snippet, "full message Grok pattern field extraction");
|
||||||
// If the pattern doesn't match then captures will be null
|
// If the pattern doesn't match then captures will be null
|
||||||
if (captures == null) {
|
if (captures == null) {
|
||||||
throw new IllegalStateException("[%{" + grokPatternName + "}] does not match snippet [" + snippet + "]");
|
throw new IllegalStateException("[%{" + grokPatternName + "}] does not match snippet [" + snippet + "]");
|
||||||
@ -480,14 +480,13 @@ public final class GrokPatternCreator {
|
|||||||
prefaces.add(captures.getOrDefault(PREFACE, "").toString());
|
prefaces.add(captures.getOrDefault(PREFACE, "").toString());
|
||||||
values.add(captures.getOrDefault(VALUE, "").toString());
|
values.add(captures.getOrDefault(VALUE, "").toString());
|
||||||
epilogues.add(captures.getOrDefault(EPILOGUE, "").toString());
|
epilogues.add(captures.getOrDefault(EPILOGUE, "").toString());
|
||||||
timeoutChecker.check("full message Grok pattern field extraction");
|
|
||||||
}
|
}
|
||||||
String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName);
|
String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName);
|
||||||
if (mappings != null) {
|
if (mappings != null) {
|
||||||
Map<String, String> fullMappingType = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType);
|
Map<String, String> fullMappingType = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType);
|
||||||
if ("date".equals(mappingType)) {
|
if ("date".equals(mappingType)) {
|
||||||
assert values.isEmpty() == false;
|
assert values.isEmpty() == false;
|
||||||
TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(values.iterator().next());
|
TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(values.iterator().next(), timeoutChecker);
|
||||||
if (timestampMatch != null) {
|
if (timestampMatch != null) {
|
||||||
fullMappingType = timestampMatch.getEsDateMappingTypeWithFormat();
|
fullMappingType = timestampMatch.getEsDateMappingTypeWithFormat();
|
||||||
}
|
}
|
||||||
@ -548,7 +547,7 @@ public final class GrokPatternCreator {
|
|||||||
throw new IllegalStateException("Cannot process KV matches until a field name has been determined");
|
throw new IllegalStateException("Cannot process KV matches until a field name has been determined");
|
||||||
}
|
}
|
||||||
Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" +
|
Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" +
|
||||||
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}");
|
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
|
||||||
Collection<String> values = new ArrayList<>();
|
Collection<String> values = new ArrayList<>();
|
||||||
for (String snippet : snippets) {
|
for (String snippet : snippets) {
|
||||||
Map<String, Object> captures = grok.captures(snippet);
|
Map<String, Object> captures = grok.captures(snippet);
|
||||||
@ -563,7 +562,8 @@ public final class GrokPatternCreator {
|
|||||||
}
|
}
|
||||||
String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName);
|
String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName);
|
||||||
if (mappings != null) {
|
if (mappings != null) {
|
||||||
mappings.put(adjustedFieldName, FileStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values));
|
mappings.put(adjustedFieldName,
|
||||||
|
FileStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values, timeoutChecker));
|
||||||
timeoutChecker.check("mapping determination");
|
timeoutChecker.check("mapping determination");
|
||||||
}
|
}
|
||||||
if (fieldStats != null) {
|
if (fieldStats != null) {
|
||||||
@ -610,7 +610,7 @@ public final class GrokPatternCreator {
|
|||||||
private FullMatchGrokPatternCandidate(String grokPattern, String timeField) {
|
private FullMatchGrokPatternCandidate(String grokPattern, String timeField) {
|
||||||
this.grokPattern = grokPattern;
|
this.grokPattern = grokPattern;
|
||||||
this.timeField = timeField;
|
this.timeField = timeField;
|
||||||
grok = new Grok(Grok.getBuiltinPatterns(), grokPattern);
|
grok = new Grok(Grok.getBuiltinPatterns(), grokPattern, TimeoutChecker.watchdog);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTimeField() {
|
public String getTimeField() {
|
||||||
@ -640,7 +640,8 @@ public final class GrokPatternCreator {
|
|||||||
Map<String, Collection<String>> valuesPerField = new HashMap<>();
|
Map<String, Collection<String>> valuesPerField = new HashMap<>();
|
||||||
|
|
||||||
for (String sampleMessage : sampleMessages) {
|
for (String sampleMessage : sampleMessages) {
|
||||||
Map<String, Object> captures = grok.captures(sampleMessage);
|
Map<String, Object> captures = timeoutChecker.grokCaptures(grok, sampleMessage,
|
||||||
|
"full message Grok pattern field extraction");
|
||||||
// If the pattern doesn't match then captures will be null
|
// If the pattern doesn't match then captures will be null
|
||||||
if (captures == null) {
|
if (captures == null) {
|
||||||
throw new IllegalStateException("[" + grokPattern + "] does not match snippet [" + sampleMessage + "]");
|
throw new IllegalStateException("[" + grokPattern + "] does not match snippet [" + sampleMessage + "]");
|
||||||
@ -658,7 +659,6 @@ public final class GrokPatternCreator {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
timeoutChecker.check("full message Grok pattern field extraction");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Map.Entry<String, Collection<String>> valuesForField : valuesPerField.entrySet()) {
|
for (Map.Entry<String, Collection<String>> valuesForField : valuesPerField.entrySet()) {
|
||||||
@ -667,7 +667,7 @@ public final class GrokPatternCreator {
|
|||||||
// Exclude the time field because that will be dropped and replaced with @timestamp
|
// Exclude the time field because that will be dropped and replaced with @timestamp
|
||||||
if (fieldName.equals(timeField) == false) {
|
if (fieldName.equals(timeField) == false) {
|
||||||
mappings.put(fieldName,
|
mappings.put(fieldName,
|
||||||
FileStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue()));
|
FileStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue(), timeoutChecker));
|
||||||
timeoutChecker.check("mapping determination");
|
timeoutChecker.check("mapping determination");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -155,7 +155,7 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
|
|||||||
int remainingLines = sampleLines.length;
|
int remainingLines = sampleLines.length;
|
||||||
double differenceBetweenTwoHighestWeights = 0.0;
|
double differenceBetweenTwoHighestWeights = 0.0;
|
||||||
for (String sampleLine : sampleLines) {
|
for (String sampleLine : sampleLines) {
|
||||||
TimestampMatch match = TimestampFormatFinder.findFirstMatch(sampleLine, overrides.getTimestampFormat());
|
TimestampMatch match = TimestampFormatFinder.findFirstMatch(sampleLine, overrides.getTimestampFormat(), timeoutChecker);
|
||||||
if (match != null) {
|
if (match != null) {
|
||||||
TimestampMatch pureMatch = new TimestampMatch(match.candidateIndex, "", match.jodaTimestampFormats,
|
TimestampMatch pureMatch = new TimestampMatch(match.candidateIndex, "", match.jodaTimestampFormats,
|
||||||
match.javaTimestampFormats, match.simplePattern, match.grokPatternName, "");
|
match.javaTimestampFormats, match.simplePattern, match.grokPatternName, "");
|
||||||
|
@ -6,14 +6,19 @@
|
|||||||
package org.elasticsearch.xpack.ml.filestructurefinder;
|
package org.elasticsearch.xpack.ml.filestructurefinder;
|
||||||
|
|
||||||
import org.elasticsearch.ElasticsearchTimeoutException;
|
import org.elasticsearch.ElasticsearchTimeoutException;
|
||||||
|
import org.elasticsearch.common.collect.Tuple;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
import org.elasticsearch.common.util.concurrent.FutureUtils;
|
import org.elasticsearch.common.util.concurrent.FutureUtils;
|
||||||
import org.elasticsearch.grok.Grok;
|
import org.elasticsearch.grok.Grok;
|
||||||
|
import org.elasticsearch.grok.ThreadWatchdog;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
import java.util.concurrent.ScheduledFuture;
|
import java.util.concurrent.ScheduledFuture;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class can be used to keep track of when a long running operation started and
|
* This class can be used to keep track of when a long running operation started and
|
||||||
@ -32,9 +37,13 @@ import java.util.concurrent.TimeUnit;
|
|||||||
*/
|
*/
|
||||||
public class TimeoutChecker implements Closeable {
|
public class TimeoutChecker implements Closeable {
|
||||||
|
|
||||||
|
private static final TimeoutCheckerWatchdog timeoutCheckerWatchdog = new TimeoutCheckerWatchdog();
|
||||||
|
public static final ThreadWatchdog watchdog = timeoutCheckerWatchdog;
|
||||||
|
|
||||||
private final String operation;
|
private final String operation;
|
||||||
private final ScheduledFuture<?> future;
|
|
||||||
private final TimeValue timeout;
|
private final TimeValue timeout;
|
||||||
|
private final Thread checkedThread;
|
||||||
|
private final ScheduledFuture<?> future;
|
||||||
private volatile boolean timeoutExceeded;
|
private volatile boolean timeoutExceeded;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -48,6 +57,8 @@ public class TimeoutChecker implements Closeable {
|
|||||||
public TimeoutChecker(String operation, TimeValue timeout, ScheduledExecutorService scheduler) {
|
public TimeoutChecker(String operation, TimeValue timeout, ScheduledExecutorService scheduler) {
|
||||||
this.operation = operation;
|
this.operation = operation;
|
||||||
this.timeout = timeout;
|
this.timeout = timeout;
|
||||||
|
this.checkedThread = Thread.currentThread();
|
||||||
|
timeoutCheckerWatchdog.add(checkedThread, timeout);
|
||||||
this.future = (timeout != null) ? scheduler.schedule(this::setTimeoutExceeded, timeout.nanos(), TimeUnit.NANOSECONDS) : null;
|
this.future = (timeout != null) ? scheduler.schedule(this::setTimeoutExceeded, timeout.nanos(), TimeUnit.NANOSECONDS) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,6 +68,7 @@ public class TimeoutChecker implements Closeable {
|
|||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
FutureUtils.cancel(future);
|
FutureUtils.cancel(future);
|
||||||
|
timeoutCheckerWatchdog.remove(checkedThread);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -72,7 +84,80 @@ public class TimeoutChecker implements Closeable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper around {@link Grok#captures} that translates any timeout exception
|
||||||
|
* to the style thrown by this class's {@link #check} method.
|
||||||
|
* @param grok The grok pattern from which captures are to be extracted.
|
||||||
|
* @param text The text to match and extract values from.
|
||||||
|
* @param where Which stage of the operation is currently in progress?
|
||||||
|
* @return A map containing field names and their respective coerced values that matched.
|
||||||
|
* @throws ElasticsearchTimeoutException If the operation is found to have taken longer than the permitted time.
|
||||||
|
*/
|
||||||
|
public Map<String, Object> grokCaptures(Grok grok, String text, String where) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
return grok.captures(text);
|
||||||
|
} finally {
|
||||||
|
// If a timeout has occurred then this check will overwrite any timeout exception thrown by Grok.captures() and this
|
||||||
|
// is intentional - the exception from this class makes more sense in the context of the find file structure API
|
||||||
|
check(where);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void setTimeoutExceeded() {
|
private void setTimeoutExceeded() {
|
||||||
timeoutExceeded = true;
|
timeoutExceeded = true;
|
||||||
|
timeoutCheckerWatchdog.interruptLongRunningThreadIfRegistered(checkedThread);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of the type of watchdog used by the {@link Grok} class to interrupt
|
||||||
|
* matching operations that take too long. Rather than have a timeout per match operation
|
||||||
|
* like the {@link ThreadWatchdog.Default} implementation, the interruption is governed by
|
||||||
|
* a {@link TimeoutChecker} associated with the thread doing the matching.
|
||||||
|
*/
|
||||||
|
static class TimeoutCheckerWatchdog implements ThreadWatchdog {
|
||||||
|
|
||||||
|
final ConcurrentHashMap<Thread, Tuple<AtomicBoolean, TimeValue>> registry = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
void add(Thread thread, TimeValue timeout) {
|
||||||
|
Tuple<AtomicBoolean, TimeValue> previousValue = registry.put(thread, new Tuple<>(new AtomicBoolean(false), timeout));
|
||||||
|
assert previousValue == null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void register() {
|
||||||
|
Tuple<AtomicBoolean, TimeValue> value = registry.get(Thread.currentThread());
|
||||||
|
if (value != null) {
|
||||||
|
boolean wasFalse = value.v1().compareAndSet(false, true);
|
||||||
|
assert wasFalse;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long maxExecutionTimeInMillis() {
|
||||||
|
Tuple<AtomicBoolean, TimeValue> value = registry.get(Thread.currentThread());
|
||||||
|
return value != null ? value.v2().getMillis() : Long.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void unregister() {
|
||||||
|
Tuple<AtomicBoolean, TimeValue> value = registry.get(Thread.currentThread());
|
||||||
|
if (value != null) {
|
||||||
|
boolean wasTrue = value.v1().compareAndSet(true, false);
|
||||||
|
assert wasTrue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void remove(Thread thread) {
|
||||||
|
Tuple<AtomicBoolean, TimeValue> previousValue = registry.remove(thread);
|
||||||
|
assert previousValue != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
void interruptLongRunningThreadIfRegistered(Thread thread) {
|
||||||
|
Tuple<AtomicBoolean, TimeValue> value = registry.get(thread);
|
||||||
|
if (value.v1().get()) {
|
||||||
|
thread.interrupt();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -173,20 +173,22 @@ public final class TimestampFormatFinder {
|
|||||||
/**
|
/**
|
||||||
* Find the first timestamp format that matches part of the supplied value.
|
* Find the first timestamp format that matches part of the supplied value.
|
||||||
* @param text The value that the returned timestamp format must exist within.
|
* @param text The value that the returned timestamp format must exist within.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The timestamp format, or <code>null</code> if none matches.
|
* @return The timestamp format, or <code>null</code> if none matches.
|
||||||
*/
|
*/
|
||||||
public static TimestampMatch findFirstMatch(String text) {
|
public static TimestampMatch findFirstMatch(String text, TimeoutChecker timeoutChecker) {
|
||||||
return findFirstMatch(text, 0);
|
return findFirstMatch(text, 0, timeoutChecker);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the first timestamp format that matches part of the supplied value.
|
* Find the first timestamp format that matches part of the supplied value.
|
||||||
* @param text The value that the returned timestamp format must exist within.
|
* @param text The value that the returned timestamp format must exist within.
|
||||||
* @param requiredFormat A timestamp format that any returned match must support.
|
* @param requiredFormat A timestamp format that any returned match must support.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The timestamp format, or <code>null</code> if none matches.
|
* @return The timestamp format, or <code>null</code> if none matches.
|
||||||
*/
|
*/
|
||||||
public static TimestampMatch findFirstMatch(String text, String requiredFormat) {
|
public static TimestampMatch findFirstMatch(String text, String requiredFormat, TimeoutChecker timeoutChecker) {
|
||||||
return findFirstMatch(text, 0, requiredFormat);
|
return findFirstMatch(text, 0, requiredFormat, timeoutChecker);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -194,10 +196,11 @@ public final class TimestampFormatFinder {
|
|||||||
* excluding a specified number of candidate formats.
|
* excluding a specified number of candidate formats.
|
||||||
* @param text The value that the returned timestamp format must exist within.
|
* @param text The value that the returned timestamp format must exist within.
|
||||||
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The timestamp format, or <code>null</code> if none matches.
|
* @return The timestamp format, or <code>null</code> if none matches.
|
||||||
*/
|
*/
|
||||||
public static TimestampMatch findFirstMatch(String text, int ignoreCandidates) {
|
public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, TimeoutChecker timeoutChecker) {
|
||||||
return findFirstMatch(text, ignoreCandidates, null);
|
return findFirstMatch(text, ignoreCandidates, null, timeoutChecker);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -206,9 +209,10 @@ public final class TimestampFormatFinder {
|
|||||||
* @param text The value that the returned timestamp format must exist within.
|
* @param text The value that the returned timestamp format must exist within.
|
||||||
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
||||||
* @param requiredFormat A timestamp format that any returned match must support.
|
* @param requiredFormat A timestamp format that any returned match must support.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The timestamp format, or <code>null</code> if none matches.
|
* @return The timestamp format, or <code>null</code> if none matches.
|
||||||
*/
|
*/
|
||||||
public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, String requiredFormat) {
|
public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, String requiredFormat, TimeoutChecker timeoutChecker) {
|
||||||
if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) {
|
if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -229,7 +233,8 @@ public final class TimestampFormatFinder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (quicklyRuledOut == false) {
|
if (quicklyRuledOut == false) {
|
||||||
Map<String, Object> captures = candidate.strictSearchGrok.captures(text);
|
Map<String, Object> captures = timeoutChecker.grokCaptures(candidate.strictSearchGrok, text,
|
||||||
|
"timestamp format determination");
|
||||||
if (captures != null) {
|
if (captures != null) {
|
||||||
String preface = captures.getOrDefault(PREFACE, "").toString();
|
String preface = captures.getOrDefault(PREFACE, "").toString();
|
||||||
String epilogue = captures.getOrDefault(EPILOGUE, "").toString();
|
String epilogue = captures.getOrDefault(EPILOGUE, "").toString();
|
||||||
@ -246,20 +251,22 @@ public final class TimestampFormatFinder {
|
|||||||
/**
|
/**
|
||||||
* Find the best timestamp format for matching an entire field value.
|
* Find the best timestamp format for matching an entire field value.
|
||||||
* @param text The value that the returned timestamp format must match in its entirety.
|
* @param text The value that the returned timestamp format must match in its entirety.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The timestamp format, or <code>null</code> if none matches.
|
* @return The timestamp format, or <code>null</code> if none matches.
|
||||||
*/
|
*/
|
||||||
public static TimestampMatch findFirstFullMatch(String text) {
|
public static TimestampMatch findFirstFullMatch(String text, TimeoutChecker timeoutChecker) {
|
||||||
return findFirstFullMatch(text, 0);
|
return findFirstFullMatch(text, 0, timeoutChecker);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the best timestamp format for matching an entire field value.
|
* Find the best timestamp format for matching an entire field value.
|
||||||
* @param text The value that the returned timestamp format must match in its entirety.
|
* @param text The value that the returned timestamp format must match in its entirety.
|
||||||
* @param requiredFormat A timestamp format that any returned match must support.
|
* @param requiredFormat A timestamp format that any returned match must support.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The timestamp format, or <code>null</code> if none matches.
|
* @return The timestamp format, or <code>null</code> if none matches.
|
||||||
*/
|
*/
|
||||||
public static TimestampMatch findFirstFullMatch(String text, String requiredFormat) {
|
public static TimestampMatch findFirstFullMatch(String text, String requiredFormat, TimeoutChecker timeoutChecker) {
|
||||||
return findFirstFullMatch(text, 0, requiredFormat);
|
return findFirstFullMatch(text, 0, requiredFormat, timeoutChecker);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -267,10 +274,11 @@ public final class TimestampFormatFinder {
|
|||||||
* excluding a specified number of candidate formats.
|
* excluding a specified number of candidate formats.
|
||||||
* @param text The value that the returned timestamp format must match in its entirety.
|
* @param text The value that the returned timestamp format must match in its entirety.
|
||||||
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The timestamp format, or <code>null</code> if none matches.
|
* @return The timestamp format, or <code>null</code> if none matches.
|
||||||
*/
|
*/
|
||||||
public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates) {
|
public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, TimeoutChecker timeoutChecker) {
|
||||||
return findFirstFullMatch(text, ignoreCandidates, null);
|
return findFirstFullMatch(text, ignoreCandidates, null, timeoutChecker);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -279,9 +287,11 @@ public final class TimestampFormatFinder {
|
|||||||
* @param text The value that the returned timestamp format must match in its entirety.
|
* @param text The value that the returned timestamp format must match in its entirety.
|
||||||
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
||||||
* @param requiredFormat A timestamp format that any returned match must support.
|
* @param requiredFormat A timestamp format that any returned match must support.
|
||||||
|
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||||
* @return The timestamp format, or <code>null</code> if none matches.
|
* @return The timestamp format, or <code>null</code> if none matches.
|
||||||
*/
|
*/
|
||||||
public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, String requiredFormat) {
|
public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, String requiredFormat,
|
||||||
|
TimeoutChecker timeoutChecker) {
|
||||||
if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) {
|
if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -290,7 +300,8 @@ public final class TimestampFormatFinder {
|
|||||||
for (CandidateTimestampFormat candidate : ORDERED_CANDIDATE_FORMATS.subList(ignoreCandidates, ORDERED_CANDIDATE_FORMATS.size())) {
|
for (CandidateTimestampFormat candidate : ORDERED_CANDIDATE_FORMATS.subList(ignoreCandidates, ORDERED_CANDIDATE_FORMATS.size())) {
|
||||||
if (adjustedRequiredFormat == null || candidate.jodaTimestampFormats.contains(adjustedRequiredFormat) ||
|
if (adjustedRequiredFormat == null || candidate.jodaTimestampFormats.contains(adjustedRequiredFormat) ||
|
||||||
candidate.javaTimestampFormats.contains(adjustedRequiredFormat)) {
|
candidate.javaTimestampFormats.contains(adjustedRequiredFormat)) {
|
||||||
Map<String, Object> captures = candidate.strictFullMatchGrok.captures(text);
|
Map<String, Object> captures = timeoutChecker.grokCaptures(candidate.strictFullMatchGrok, text,
|
||||||
|
"timestamp format determination");
|
||||||
if (captures != null) {
|
if (captures != null) {
|
||||||
return makeTimestampMatch(candidate, index, "", text, "");
|
return makeTimestampMatch(candidate, index, "", text, "");
|
||||||
}
|
}
|
||||||
@ -540,8 +551,8 @@ public final class TimestampFormatFinder {
|
|||||||
this.simplePattern = Pattern.compile(simpleRegex, Pattern.MULTILINE);
|
this.simplePattern = Pattern.compile(simpleRegex, Pattern.MULTILINE);
|
||||||
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
|
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
|
||||||
this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +
|
this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +
|
||||||
"%{GREEDYDATA:" + EPILOGUE + "}");
|
"%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
|
||||||
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$");
|
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog);
|
||||||
this.standardGrokPatternName = standardGrokPatternName;
|
this.standardGrokPatternName = standardGrokPatternName;
|
||||||
assert quickRuleOutIndices.stream()
|
assert quickRuleOutIndices.stream()
|
||||||
.noneMatch(quickRuleOutIndex -> quickRuleOutIndex < 0 || quickRuleOutIndex >= QUICK_RULE_OUT_PATTERNS.size());
|
.noneMatch(quickRuleOutIndex -> quickRuleOutIndex < 0 || quickRuleOutIndex >= QUICK_RULE_OUT_PATTERNS.size());
|
||||||
|
@ -7,6 +7,7 @@ package org.elasticsearch.xpack.ml.filestructurefinder;
|
|||||||
|
|
||||||
import org.elasticsearch.ElasticsearchTimeoutException;
|
import org.elasticsearch.ElasticsearchTimeoutException;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.grok.Grok;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
@ -57,4 +58,35 @@ public class TimeoutCheckerTests extends FileStructureTestCase {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWatchdog() {
|
||||||
|
|
||||||
|
assertFalse(Thread.interrupted());
|
||||||
|
|
||||||
|
TimeValue timeout = TimeValue.timeValueMillis(1);
|
||||||
|
try (TimeoutChecker timeoutChecker = new TimeoutChecker("watchdog test", timeout, scheduler)) {
|
||||||
|
|
||||||
|
TimeoutChecker.watchdog.register();
|
||||||
|
try {
|
||||||
|
expectThrows(InterruptedException.class, () -> Thread.sleep(10000));
|
||||||
|
} finally {
|
||||||
|
TimeoutChecker.watchdog.unregister();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testGrokCaptures() {
|
||||||
|
|
||||||
|
assertFalse(Thread.interrupted());
|
||||||
|
Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog);
|
||||||
|
|
||||||
|
TimeValue timeout = TimeValue.timeValueMillis(1);
|
||||||
|
try (TimeoutChecker timeoutChecker = new TimeoutChecker("grok captures test", timeout, scheduler)) {
|
||||||
|
|
||||||
|
ElasticsearchTimeoutException e = expectThrows(ElasticsearchTimeoutException.class,
|
||||||
|
() -> timeoutChecker.grokCaptures(grok, randomAlphaOfLength(1000000), "should timeout"));
|
||||||
|
assertEquals("Aborting grok captures test during [should timeout] as it has taken longer than the timeout of [" +
|
||||||
|
timeout + "]", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,10 +17,10 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||||||
|
|
||||||
public void testFindFirstMatchGivenNoMatch() {
|
public void testFindFirstMatchGivenNoMatch() {
|
||||||
|
|
||||||
assertNull(TimestampFormatFinder.findFirstMatch(""));
|
assertNull(TimestampFormatFinder.findFirstMatch("", NOOP_TIMEOUT_CHECKER));
|
||||||
assertNull(TimestampFormatFinder.findFirstMatch("no timestamps in here"));
|
assertNull(TimestampFormatFinder.findFirstMatch("no timestamps in here", NOOP_TIMEOUT_CHECKER));
|
||||||
assertNull(TimestampFormatFinder.findFirstMatch(":::"));
|
assertNull(TimestampFormatFinder.findFirstMatch(":::", NOOP_TIMEOUT_CHECKER));
|
||||||
assertNull(TimestampFormatFinder.findFirstMatch("/+"));
|
assertNull(TimestampFormatFinder.findFirstMatch("/+", NOOP_TIMEOUT_CHECKER));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFindFirstMatchGivenOnlyIso8601() {
|
public void testFindFirstMatchGivenOnlyIso8601() {
|
||||||
@ -132,23 +132,23 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||||||
public void testFindFirstMatchGivenOnlySystemDate() {
|
public void testFindFirstMatchGivenOnlySystemDate() {
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
|
assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
|
||||||
TimestampFormatFinder.findFirstMatch("1526400896374"));
|
TimestampFormatFinder.findFirstMatch("1526400896374", NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
|
assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
|
||||||
TimestampFormatFinder.findFirstFullMatch("1526400896374"));
|
TimestampFormatFinder.findFirstFullMatch("1526400896374", NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
|
assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
|
||||||
TimestampFormatFinder.findFirstMatch("1526400896.736"));
|
TimestampFormatFinder.findFirstMatch("1526400896.736", NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
|
assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
|
||||||
TimestampFormatFinder.findFirstFullMatch("1526400896.736"));
|
TimestampFormatFinder.findFirstFullMatch("1526400896.736", NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
|
assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
|
||||||
TimestampFormatFinder.findFirstMatch("1526400896"));
|
TimestampFormatFinder.findFirstMatch("1526400896", NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
|
assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
|
||||||
TimestampFormatFinder.findFirstFullMatch("1526400896"));
|
TimestampFormatFinder.findFirstFullMatch("1526400896", NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
|
assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
|
||||||
TimestampFormatFinder.findFirstMatch("400000005afb159a164ac980"));
|
TimestampFormatFinder.findFirstMatch("400000005afb159a164ac980", NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
|
assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
|
||||||
TimestampFormatFinder.findFirstFullMatch("400000005afb159a164ac980"));
|
TimestampFormatFinder.findFirstFullMatch("400000005afb159a164ac980", NOOP_TIMEOUT_CHECKER));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFindFirstMatchGivenRealLogMessages() {
|
public void testFindFirstMatchGivenRealLogMessages() {
|
||||||
@ -157,45 +157,47 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||||||
"\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601",
|
"\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601",
|
||||||
"][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [3.9gb], compressed ordinary object pointers [true]"),
|
"][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [3.9gb], compressed ordinary object pointers [true]"),
|
||||||
TimestampFormatFinder.findFirstMatch("[2018-05-11T17:07:29,553][INFO ][o.e.e.NodeEnvironment ] [node-0] " +
|
TimestampFormatFinder.findFirstMatch("[2018-05-11T17:07:29,553][INFO ][o.e.e.NodeEnvironment ] [node-0] " +
|
||||||
"heap size [3.9gb], compressed ordinary object pointers [true]"));
|
"heap size [3.9gb], compressed ordinary object pointers [true]", NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(23, "192.168.62.101 - - [", "dd/MMM/YYYY:HH:mm:ss Z", "dd/MMM/yyyy:HH:mm:ss XX",
|
assertEquals(new TimestampMatch(23, "192.168.62.101 - - [", "dd/MMM/YYYY:HH:mm:ss Z", "dd/MMM/yyyy:HH:mm:ss XX",
|
||||||
"\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", "HTTPDATE",
|
"\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", "HTTPDATE",
|
||||||
"] \"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"),
|
"] \"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"),
|
||||||
TimestampFormatFinder.findFirstMatch("192.168.62.101 - - [29/Jun/2016:12:11:31 +0000] " +
|
TimestampFormatFinder.findFirstMatch("192.168.62.101 - - [29/Jun/2016:12:11:31 +0000] " +
|
||||||
"\"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"));
|
"\"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384", NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(24, "", "MMM dd, YYYY h:mm:ss a", "MMM dd, yyyy h:mm:ss a",
|
assertEquals(new TimestampMatch(24, "", "MMM dd, YYYY h:mm:ss a", "MMM dd, yyyy h:mm:ss a",
|
||||||
"\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", "CATALINA_DATESTAMP",
|
"\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", "CATALINA_DATESTAMP",
|
||||||
" org.apache.tomcat.util.http.Parameters processParameters"),
|
" org.apache.tomcat.util.http.Parameters processParameters"),
|
||||||
TimestampFormatFinder.findFirstMatch("Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters"));
|
TimestampFormatFinder.findFirstMatch("Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters",
|
||||||
|
NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
||||||
Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
||||||
"\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", " esxi1.acme.com Vpxa: " +
|
"\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", " esxi1.acme.com Vpxa: " +
|
||||||
"[3CB3FB90 verbose 'vpxavpxaInvtVm' opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"),
|
"[3CB3FB90 verbose 'vpxavpxaInvtVm' opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"),
|
||||||
TimestampFormatFinder.findFirstMatch("Oct 19 17:04:44 esxi1.acme.com Vpxa: [3CB3FB90 verbose 'vpxavpxaInvtVm' " +
|
TimestampFormatFinder.findFirstMatch("Oct 19 17:04:44 esxi1.acme.com Vpxa: [3CB3FB90 verbose 'vpxavpxaInvtVm' " +
|
||||||
"opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"));
|
"opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed", NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(10, "559550912540598297\t", "ISO8601", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}",
|
assertEquals(new TimestampMatch(10, "559550912540598297\t", "ISO8601", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}",
|
||||||
"TIMESTAMP_ISO8601",
|
"TIMESTAMP_ISO8601",
|
||||||
"\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"),
|
"\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"),
|
||||||
TimestampFormatFinder.findFirstMatch("559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t" +
|
TimestampFormatFinder.findFirstMatch("559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t" +
|
||||||
"192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"));
|
"192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp", NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
||||||
Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
||||||
"\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP",
|
"\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP",
|
||||||
" dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53"),
|
" dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53"),
|
||||||
TimestampFormatFinder.findFirstMatch("Sep 8 11:55:35 dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving " +
|
TimestampFormatFinder.findFirstMatch("Sep 8 11:55:35 dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving " +
|
||||||
"'www.elastic.co/A/IN': 95.110.68.206#53"));
|
"'www.elastic.co/A/IN': 95.110.68.206#53", NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS",
|
assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS",
|
||||||
"\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", "TIMESTAMP_ISO8601",
|
"\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", "TIMESTAMP_ISO8601",
|
||||||
"|INFO |VirtualServer |1 |client 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client " +
|
"|INFO |VirtualServer |1 |client 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client " +
|
||||||
"'User1'(id:2) in channel '3er Instanz'(id:2)"),
|
"'User1'(id:2) in channel '3er Instanz'(id:2)"),
|
||||||
TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
||||||
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)"));
|
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
|
||||||
|
NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
// Differs from the above as the required format is specified
|
// Differs from the above as the required format is specified
|
||||||
assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS",
|
assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS",
|
||||||
@ -204,12 +206,12 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||||||
"'User1'(id:2) in channel '3er Instanz'(id:2)"),
|
"'User1'(id:2) in channel '3er Instanz'(id:2)"),
|
||||||
TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
||||||
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
|
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
|
||||||
randomFrom("YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS")));
|
randomFrom("YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS"), NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
// Non-matching required format specified
|
// Non-matching required format specified
|
||||||
assertNull(TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
assertNull(TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
||||||
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
|
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
|
||||||
randomFrom("UNIX", "EEE MMM dd YYYY HH:mm zzz")));
|
randomFrom("UNIX", "EEE MMM dd YYYY HH:mm zzz"), NOOP_TIMEOUT_CHECKER));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAdjustRequiredFormat() {
|
public void testAdjustRequiredFormat() {
|
||||||
@ -246,18 +248,20 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||||||
|
|
||||||
private void validateTimestampMatch(TimestampMatch expected, String text, long expectedEpochMs) {
|
private void validateTimestampMatch(TimestampMatch expected, String text, long expectedEpochMs) {
|
||||||
|
|
||||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text));
|
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text));
|
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, expected.candidateIndex));
|
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, expected.candidateIndex, NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, expected.candidateIndex));
|
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, expected.candidateIndex, NOOP_TIMEOUT_CHECKER));
|
||||||
assertNull(TimestampFormatFinder.findFirstMatch(text, Integer.MAX_VALUE));
|
assertNull(TimestampFormatFinder.findFirstMatch(text, Integer.MAX_VALUE, NOOP_TIMEOUT_CHECKER));
|
||||||
assertNull(TimestampFormatFinder.findFirstFullMatch(text, Integer.MAX_VALUE));
|
assertNull(TimestampFormatFinder.findFirstFullMatch(text, Integer.MAX_VALUE, NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.jodaTimestampFormats)));
|
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.jodaTimestampFormats), NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.jodaTimestampFormats)));
|
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.jodaTimestampFormats),
|
||||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.javaTimestampFormats)));
|
NOOP_TIMEOUT_CHECKER));
|
||||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.javaTimestampFormats)));
|
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.javaTimestampFormats), NOOP_TIMEOUT_CHECKER));
|
||||||
assertNull(TimestampFormatFinder.findFirstMatch(text, "wrong format"));
|
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.javaTimestampFormats),
|
||||||
assertNull(TimestampFormatFinder.findFirstFullMatch(text, "wrong format"));
|
NOOP_TIMEOUT_CHECKER));
|
||||||
|
assertNull(TimestampFormatFinder.findFirstMatch(text, "wrong format", NOOP_TIMEOUT_CHECKER));
|
||||||
|
assertNull(TimestampFormatFinder.findFirstFullMatch(text, "wrong format", NOOP_TIMEOUT_CHECKER));
|
||||||
|
|
||||||
validateJodaTimestampFormats(expected.jodaTimestampFormats, text, expectedEpochMs);
|
validateJodaTimestampFormats(expected.jodaTimestampFormats, text, expectedEpochMs);
|
||||||
validateJavaTimestampFormats(expected.javaTimestampFormats, text, expectedEpochMs);
|
validateJavaTimestampFormats(expected.javaTimestampFormats, text, expectedEpochMs);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user