[ML] Interrupt Grok in file structure finder timeout (#36588)
The file structure finder has timeout functionality, but prior to this change it would not interrupt a single long-running Grok match attempt. This commit hooks into the ThreadWatchdog facility provided by the Grok library to interrupt individual Grok matches that may be running at the time the file structure finder timeout expires.
This commit is contained in:
parent
a4b32f1143
commit
690b10a4a1
|
@ -34,8 +34,9 @@ public final class FileStructureUtils {
|
|||
|
||||
private static final int NUM_TOP_HITS = 10;
|
||||
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
|
||||
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$");
|
||||
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$");
|
||||
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$",
|
||||
TimeoutChecker.watchdog);
|
||||
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog);
|
||||
private static final int KEYWORD_MAX_LEN = 256;
|
||||
private static final int KEYWORD_MAX_SPACES = 5;
|
||||
|
||||
|
@ -69,7 +70,7 @@ public final class FileStructureUtils {
|
|||
}
|
||||
|
||||
// Accept the first match from the first sample that is compatible with all the other samples
|
||||
for (Tuple<String, TimestampMatch> candidate : findCandidates(explanation, sampleRecords, overrides)) {
|
||||
for (Tuple<String, TimestampMatch> candidate : findCandidates(explanation, sampleRecords, overrides, timeoutChecker)) {
|
||||
|
||||
boolean allGood = true;
|
||||
for (Map<String, ?> sampleRecord : sampleRecords.subList(1, sampleRecords.size())) {
|
||||
|
@ -87,7 +88,8 @@ public final class FileStructureUtils {
|
|||
|
||||
timeoutChecker.check("timestamp field determination");
|
||||
|
||||
TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(fieldValue.toString(), overrides.getTimestampFormat());
|
||||
TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(fieldValue.toString(), overrides.getTimestampFormat(),
|
||||
timeoutChecker);
|
||||
if (match == null || match.candidateIndex != candidate.v2().candidateIndex) {
|
||||
if (overrides.getTimestampFormat() != null) {
|
||||
throw new IllegalArgumentException("Specified timestamp format [" + overrides.getTimestampFormat() +
|
||||
|
@ -111,7 +113,7 @@ public final class FileStructureUtils {
|
|||
}
|
||||
|
||||
private static List<Tuple<String, TimestampMatch>> findCandidates(List<String> explanation, List<Map<String, ?>> sampleRecords,
|
||||
FileStructureOverrides overrides) {
|
||||
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) {
|
||||
|
||||
assert sampleRecords.isEmpty() == false;
|
||||
Map<String, ?> firstRecord = sampleRecords.get(0);
|
||||
|
@ -130,7 +132,8 @@ public final class FileStructureUtils {
|
|||
if (onlyConsiderField == null || onlyConsiderField.equals(fieldName)) {
|
||||
Object value = field.getValue();
|
||||
if (value != null) {
|
||||
TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(value.toString(), overrides.getTimestampFormat());
|
||||
TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(value.toString(), overrides.getTimestampFormat(),
|
||||
timeoutChecker);
|
||||
if (match != null) {
|
||||
Tuple<String, TimestampMatch> candidate = new Tuple<>(fieldName, match);
|
||||
candidates.add(candidate);
|
||||
|
@ -211,7 +214,7 @@ public final class FileStructureUtils {
|
|||
}
|
||||
|
||||
Collection<String> fieldValuesAsStrings = fieldValues.stream().map(Object::toString).collect(Collectors.toList());
|
||||
Map<String, String> mapping = guessScalarMapping(explanation, fieldName, fieldValuesAsStrings);
|
||||
Map<String, String> mapping = guessScalarMapping(explanation, fieldName, fieldValuesAsStrings, timeoutChecker);
|
||||
timeoutChecker.check("mapping determination");
|
||||
return new Tuple<>(mapping, calculateFieldStats(fieldValuesAsStrings, timeoutChecker));
|
||||
}
|
||||
|
@ -238,10 +241,12 @@ public final class FileStructureUtils {
|
|||
* @param fieldValues Values of the field for which mappings are to be guessed. The guessed
|
||||
* mapping will be compatible with all the provided values. Must not be
|
||||
* empty.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The sub-section of the index mappings most appropriate for the field,
|
||||
* for example <code>{ "type" : "keyword" }</code>.
|
||||
*/
|
||||
static Map<String, String> guessScalarMapping(List<String> explanation, String fieldName, Collection<String> fieldValues) {
|
||||
static Map<String, String> guessScalarMapping(List<String> explanation, String fieldName, Collection<String> fieldValues,
|
||||
TimeoutChecker timeoutChecker) {
|
||||
|
||||
assert fieldValues.isEmpty() == false;
|
||||
|
||||
|
@ -251,11 +256,12 @@ public final class FileStructureUtils {
|
|||
|
||||
// This checks if a date mapping would be appropriate, and, if so, finds the correct format
|
||||
Iterator<String> iter = fieldValues.iterator();
|
||||
TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(iter.next());
|
||||
TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(iter.next(), timeoutChecker);
|
||||
while (timestampMatch != null && iter.hasNext()) {
|
||||
// To be mapped as type date all the values must match the same timestamp format - it is
|
||||
// not acceptable for all values to be dates, but with different formats
|
||||
if (timestampMatch.equals(TimestampFormatFinder.findFirstFullMatch(iter.next(), timestampMatch.candidateIndex)) == false) {
|
||||
if (timestampMatch.equals(TimestampFormatFinder.findFirstFullMatch(iter.next(), timestampMatch.candidateIndex,
|
||||
timeoutChecker)) == false) {
|
||||
timestampMatch = null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -453,7 +453,7 @@ public final class GrokPatternCreator {
|
|||
this.fieldName = fieldName;
|
||||
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
|
||||
grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + preBreak +
|
||||
"%{" + grokPatternName + ":" + VALUE + "}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}");
|
||||
"%{" + grokPatternName + ":" + VALUE + "}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -472,7 +472,7 @@ public final class GrokPatternCreator {
|
|||
TimeoutChecker timeoutChecker) {
|
||||
Collection<String> values = new ArrayList<>();
|
||||
for (String snippet : snippets) {
|
||||
Map<String, Object> captures = grok.captures(snippet);
|
||||
Map<String, Object> captures = timeoutChecker.grokCaptures(grok, snippet, "full message Grok pattern field extraction");
|
||||
// If the pattern doesn't match then captures will be null
|
||||
if (captures == null) {
|
||||
throw new IllegalStateException("[%{" + grokPatternName + "}] does not match snippet [" + snippet + "]");
|
||||
|
@ -480,14 +480,13 @@ public final class GrokPatternCreator {
|
|||
prefaces.add(captures.getOrDefault(PREFACE, "").toString());
|
||||
values.add(captures.getOrDefault(VALUE, "").toString());
|
||||
epilogues.add(captures.getOrDefault(EPILOGUE, "").toString());
|
||||
timeoutChecker.check("full message Grok pattern field extraction");
|
||||
}
|
||||
String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName);
|
||||
if (mappings != null) {
|
||||
Map<String, String> fullMappingType = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType);
|
||||
if ("date".equals(mappingType)) {
|
||||
assert values.isEmpty() == false;
|
||||
TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(values.iterator().next());
|
||||
TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(values.iterator().next(), timeoutChecker);
|
||||
if (timestampMatch != null) {
|
||||
fullMappingType = timestampMatch.getEsDateMappingTypeWithFormat();
|
||||
}
|
||||
|
@ -548,7 +547,7 @@ public final class GrokPatternCreator {
|
|||
throw new IllegalStateException("Cannot process KV matches until a field name has been determined");
|
||||
}
|
||||
Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" +
|
||||
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}");
|
||||
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
|
||||
Collection<String> values = new ArrayList<>();
|
||||
for (String snippet : snippets) {
|
||||
Map<String, Object> captures = grok.captures(snippet);
|
||||
|
@ -563,7 +562,8 @@ public final class GrokPatternCreator {
|
|||
}
|
||||
String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName);
|
||||
if (mappings != null) {
|
||||
mappings.put(adjustedFieldName, FileStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values));
|
||||
mappings.put(adjustedFieldName,
|
||||
FileStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values, timeoutChecker));
|
||||
timeoutChecker.check("mapping determination");
|
||||
}
|
||||
if (fieldStats != null) {
|
||||
|
@ -610,7 +610,7 @@ public final class GrokPatternCreator {
|
|||
private FullMatchGrokPatternCandidate(String grokPattern, String timeField) {
|
||||
this.grokPattern = grokPattern;
|
||||
this.timeField = timeField;
|
||||
grok = new Grok(Grok.getBuiltinPatterns(), grokPattern);
|
||||
grok = new Grok(Grok.getBuiltinPatterns(), grokPattern, TimeoutChecker.watchdog);
|
||||
}
|
||||
|
||||
public String getTimeField() {
|
||||
|
@ -640,7 +640,8 @@ public final class GrokPatternCreator {
|
|||
Map<String, Collection<String>> valuesPerField = new HashMap<>();
|
||||
|
||||
for (String sampleMessage : sampleMessages) {
|
||||
Map<String, Object> captures = grok.captures(sampleMessage);
|
||||
Map<String, Object> captures = timeoutChecker.grokCaptures(grok, sampleMessage,
|
||||
"full message Grok pattern field extraction");
|
||||
// If the pattern doesn't match then captures will be null
|
||||
if (captures == null) {
|
||||
throw new IllegalStateException("[" + grokPattern + "] does not match snippet [" + sampleMessage + "]");
|
||||
|
@ -658,7 +659,6 @@ public final class GrokPatternCreator {
|
|||
}
|
||||
});
|
||||
}
|
||||
timeoutChecker.check("full message Grok pattern field extraction");
|
||||
}
|
||||
|
||||
for (Map.Entry<String, Collection<String>> valuesForField : valuesPerField.entrySet()) {
|
||||
|
@ -667,7 +667,7 @@ public final class GrokPatternCreator {
|
|||
// Exclude the time field because that will be dropped and replaced with @timestamp
|
||||
if (fieldName.equals(timeField) == false) {
|
||||
mappings.put(fieldName,
|
||||
FileStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue()));
|
||||
FileStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue(), timeoutChecker));
|
||||
timeoutChecker.check("mapping determination");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -155,7 +155,7 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
|
|||
int remainingLines = sampleLines.length;
|
||||
double differenceBetweenTwoHighestWeights = 0.0;
|
||||
for (String sampleLine : sampleLines) {
|
||||
TimestampMatch match = TimestampFormatFinder.findFirstMatch(sampleLine, overrides.getTimestampFormat());
|
||||
TimestampMatch match = TimestampFormatFinder.findFirstMatch(sampleLine, overrides.getTimestampFormat(), timeoutChecker);
|
||||
if (match != null) {
|
||||
TimestampMatch pureMatch = new TimestampMatch(match.candidateIndex, "", match.jodaTimestampFormats,
|
||||
match.javaTimestampFormats, match.simplePattern, match.grokPatternName, "");
|
||||
|
|
|
@ -6,14 +6,19 @@
|
|||
package org.elasticsearch.xpack.ml.filestructurefinder;
|
||||
|
||||
import org.elasticsearch.ElasticsearchTimeoutException;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.common.util.concurrent.FutureUtils;
|
||||
import org.elasticsearch.grok.Grok;
|
||||
import org.elasticsearch.grok.ThreadWatchdog;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
/**
|
||||
* This class can be used to keep track of when a long running operation started and
|
||||
|
@ -32,9 +37,13 @@ import java.util.concurrent.TimeUnit;
|
|||
*/
|
||||
public class TimeoutChecker implements Closeable {
|
||||
|
||||
private static final TimeoutCheckerWatchdog timeoutCheckerWatchdog = new TimeoutCheckerWatchdog();
|
||||
public static final ThreadWatchdog watchdog = timeoutCheckerWatchdog;
|
||||
|
||||
private final String operation;
|
||||
private final ScheduledFuture<?> future;
|
||||
private final TimeValue timeout;
|
||||
private final Thread checkedThread;
|
||||
private final ScheduledFuture<?> future;
|
||||
private volatile boolean timeoutExceeded;
|
||||
|
||||
/**
|
||||
|
@ -48,6 +57,8 @@ public class TimeoutChecker implements Closeable {
|
|||
public TimeoutChecker(String operation, TimeValue timeout, ScheduledExecutorService scheduler) {
|
||||
this.operation = operation;
|
||||
this.timeout = timeout;
|
||||
this.checkedThread = Thread.currentThread();
|
||||
timeoutCheckerWatchdog.add(checkedThread, timeout);
|
||||
this.future = (timeout != null) ? scheduler.schedule(this::setTimeoutExceeded, timeout.nanos(), TimeUnit.NANOSECONDS) : null;
|
||||
}
|
||||
|
||||
|
@ -57,6 +68,7 @@ public class TimeoutChecker implements Closeable {
|
|||
@Override
|
||||
public void close() {
|
||||
FutureUtils.cancel(future);
|
||||
timeoutCheckerWatchdog.remove(checkedThread);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -72,7 +84,80 @@ public class TimeoutChecker implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around {@link Grok#captures} that translates any timeout exception
|
||||
* to the style thrown by this class's {@link #check} method.
|
||||
* @param grok The grok pattern from which captures are to be extracted.
|
||||
* @param text The text to match and extract values from.
|
||||
* @param where Which stage of the operation is currently in progress?
|
||||
* @return A map containing field names and their respective coerced values that matched.
|
||||
* @throws ElasticsearchTimeoutException If the operation is found to have taken longer than the permitted time.
|
||||
*/
|
||||
public Map<String, Object> grokCaptures(Grok grok, String text, String where) {
|
||||
|
||||
try {
|
||||
return grok.captures(text);
|
||||
} finally {
|
||||
// If a timeout has occurred then this check will overwrite any timeout exception thrown by Grok.captures() and this
|
||||
// is intentional - the exception from this class makes more sense in the context of the find file structure API
|
||||
check(where);
|
||||
}
|
||||
}
|
||||
|
||||
private void setTimeoutExceeded() {
|
||||
timeoutExceeded = true;
|
||||
timeoutCheckerWatchdog.interruptLongRunningThreadIfRegistered(checkedThread);
|
||||
}
|
||||
|
||||
/**
|
||||
* An implementation of the type of watchdog used by the {@link Grok} class to interrupt
|
||||
* matching operations that take too long. Rather than have a timeout per match operation
|
||||
* like the {@link ThreadWatchdog.Default} implementation, the interruption is governed by
|
||||
* a {@link TimeoutChecker} associated with the thread doing the matching.
|
||||
*/
|
||||
static class TimeoutCheckerWatchdog implements ThreadWatchdog {
|
||||
|
||||
final ConcurrentHashMap<Thread, Tuple<AtomicBoolean, TimeValue>> registry = new ConcurrentHashMap<>();
|
||||
|
||||
void add(Thread thread, TimeValue timeout) {
|
||||
Tuple<AtomicBoolean, TimeValue> previousValue = registry.put(thread, new Tuple<>(new AtomicBoolean(false), timeout));
|
||||
assert previousValue == null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void register() {
|
||||
Tuple<AtomicBoolean, TimeValue> value = registry.get(Thread.currentThread());
|
||||
if (value != null) {
|
||||
boolean wasFalse = value.v1().compareAndSet(false, true);
|
||||
assert wasFalse;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxExecutionTimeInMillis() {
|
||||
Tuple<AtomicBoolean, TimeValue> value = registry.get(Thread.currentThread());
|
||||
return value != null ? value.v2().getMillis() : Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unregister() {
|
||||
Tuple<AtomicBoolean, TimeValue> value = registry.get(Thread.currentThread());
|
||||
if (value != null) {
|
||||
boolean wasTrue = value.v1().compareAndSet(true, false);
|
||||
assert wasTrue;
|
||||
}
|
||||
}
|
||||
|
||||
void remove(Thread thread) {
|
||||
Tuple<AtomicBoolean, TimeValue> previousValue = registry.remove(thread);
|
||||
assert previousValue != null;
|
||||
}
|
||||
|
||||
void interruptLongRunningThreadIfRegistered(Thread thread) {
|
||||
Tuple<AtomicBoolean, TimeValue> value = registry.get(thread);
|
||||
if (value.v1().get()) {
|
||||
thread.interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -173,20 +173,22 @@ public final class TimestampFormatFinder {
|
|||
/**
|
||||
* Find the first timestamp format that matches part of the supplied value.
|
||||
* @param text The value that the returned timestamp format must exist within.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The timestamp format, or <code>null</code> if none matches.
|
||||
*/
|
||||
public static TimestampMatch findFirstMatch(String text) {
|
||||
return findFirstMatch(text, 0);
|
||||
public static TimestampMatch findFirstMatch(String text, TimeoutChecker timeoutChecker) {
|
||||
return findFirstMatch(text, 0, timeoutChecker);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the first timestamp format that matches part of the supplied value.
|
||||
* @param text The value that the returned timestamp format must exist within.
|
||||
* @param requiredFormat A timestamp format that any returned match must support.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The timestamp format, or <code>null</code> if none matches.
|
||||
*/
|
||||
public static TimestampMatch findFirstMatch(String text, String requiredFormat) {
|
||||
return findFirstMatch(text, 0, requiredFormat);
|
||||
public static TimestampMatch findFirstMatch(String text, String requiredFormat, TimeoutChecker timeoutChecker) {
|
||||
return findFirstMatch(text, 0, requiredFormat, timeoutChecker);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -194,10 +196,11 @@ public final class TimestampFormatFinder {
|
|||
* excluding a specified number of candidate formats.
|
||||
* @param text The value that the returned timestamp format must exist within.
|
||||
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The timestamp format, or <code>null</code> if none matches.
|
||||
*/
|
||||
public static TimestampMatch findFirstMatch(String text, int ignoreCandidates) {
|
||||
return findFirstMatch(text, ignoreCandidates, null);
|
||||
public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, TimeoutChecker timeoutChecker) {
|
||||
return findFirstMatch(text, ignoreCandidates, null, timeoutChecker);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -206,9 +209,10 @@ public final class TimestampFormatFinder {
|
|||
* @param text The value that the returned timestamp format must exist within.
|
||||
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
||||
* @param requiredFormat A timestamp format that any returned match must support.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The timestamp format, or <code>null</code> if none matches.
|
||||
*/
|
||||
public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, String requiredFormat) {
|
||||
public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, String requiredFormat, TimeoutChecker timeoutChecker) {
|
||||
if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) {
|
||||
return null;
|
||||
}
|
||||
|
@ -229,7 +233,8 @@ public final class TimestampFormatFinder {
|
|||
}
|
||||
}
|
||||
if (quicklyRuledOut == false) {
|
||||
Map<String, Object> captures = candidate.strictSearchGrok.captures(text);
|
||||
Map<String, Object> captures = timeoutChecker.grokCaptures(candidate.strictSearchGrok, text,
|
||||
"timestamp format determination");
|
||||
if (captures != null) {
|
||||
String preface = captures.getOrDefault(PREFACE, "").toString();
|
||||
String epilogue = captures.getOrDefault(EPILOGUE, "").toString();
|
||||
|
@ -246,20 +251,22 @@ public final class TimestampFormatFinder {
|
|||
/**
|
||||
* Find the best timestamp format for matching an entire field value.
|
||||
* @param text The value that the returned timestamp format must match in its entirety.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The timestamp format, or <code>null</code> if none matches.
|
||||
*/
|
||||
public static TimestampMatch findFirstFullMatch(String text) {
|
||||
return findFirstFullMatch(text, 0);
|
||||
public static TimestampMatch findFirstFullMatch(String text, TimeoutChecker timeoutChecker) {
|
||||
return findFirstFullMatch(text, 0, timeoutChecker);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the best timestamp format for matching an entire field value.
|
||||
* @param text The value that the returned timestamp format must match in its entirety.
|
||||
* @param requiredFormat A timestamp format that any returned match must support.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The timestamp format, or <code>null</code> if none matches.
|
||||
*/
|
||||
public static TimestampMatch findFirstFullMatch(String text, String requiredFormat) {
|
||||
return findFirstFullMatch(text, 0, requiredFormat);
|
||||
public static TimestampMatch findFirstFullMatch(String text, String requiredFormat, TimeoutChecker timeoutChecker) {
|
||||
return findFirstFullMatch(text, 0, requiredFormat, timeoutChecker);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -267,10 +274,11 @@ public final class TimestampFormatFinder {
|
|||
* excluding a specified number of candidate formats.
|
||||
* @param text The value that the returned timestamp format must match in its entirety.
|
||||
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The timestamp format, or <code>null</code> if none matches.
|
||||
*/
|
||||
public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates) {
|
||||
return findFirstFullMatch(text, ignoreCandidates, null);
|
||||
public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, TimeoutChecker timeoutChecker) {
|
||||
return findFirstFullMatch(text, ignoreCandidates, null, timeoutChecker);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -279,9 +287,11 @@ public final class TimestampFormatFinder {
|
|||
* @param text The value that the returned timestamp format must match in its entirety.
|
||||
* @param ignoreCandidates The number of candidate formats to exclude from the search.
|
||||
* @param requiredFormat A timestamp format that any returned match must support.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
* @return The timestamp format, or <code>null</code> if none matches.
|
||||
*/
|
||||
public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, String requiredFormat) {
|
||||
public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, String requiredFormat,
|
||||
TimeoutChecker timeoutChecker) {
|
||||
if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) {
|
||||
return null;
|
||||
}
|
||||
|
@ -290,7 +300,8 @@ public final class TimestampFormatFinder {
|
|||
for (CandidateTimestampFormat candidate : ORDERED_CANDIDATE_FORMATS.subList(ignoreCandidates, ORDERED_CANDIDATE_FORMATS.size())) {
|
||||
if (adjustedRequiredFormat == null || candidate.jodaTimestampFormats.contains(adjustedRequiredFormat) ||
|
||||
candidate.javaTimestampFormats.contains(adjustedRequiredFormat)) {
|
||||
Map<String, Object> captures = candidate.strictFullMatchGrok.captures(text);
|
||||
Map<String, Object> captures = timeoutChecker.grokCaptures(candidate.strictFullMatchGrok, text,
|
||||
"timestamp format determination");
|
||||
if (captures != null) {
|
||||
return makeTimestampMatch(candidate, index, "", text, "");
|
||||
}
|
||||
|
@ -540,8 +551,8 @@ public final class TimestampFormatFinder {
|
|||
this.simplePattern = Pattern.compile(simpleRegex, Pattern.MULTILINE);
|
||||
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
|
||||
this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +
|
||||
"%{GREEDYDATA:" + EPILOGUE + "}");
|
||||
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$");
|
||||
"%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
|
||||
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog);
|
||||
this.standardGrokPatternName = standardGrokPatternName;
|
||||
assert quickRuleOutIndices.stream()
|
||||
.noneMatch(quickRuleOutIndex -> quickRuleOutIndex < 0 || quickRuleOutIndex >= QUICK_RULE_OUT_PATTERNS.size());
|
||||
|
|
|
@ -7,6 +7,7 @@ package org.elasticsearch.xpack.ml.filestructurefinder;
|
|||
|
||||
import org.elasticsearch.ElasticsearchTimeoutException;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.grok.Grok;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
|
@ -57,4 +58,35 @@ public class TimeoutCheckerTests extends FileStructureTestCase {
|
|||
});
|
||||
}
|
||||
}
|
||||
|
||||
public void testWatchdog() {
|
||||
|
||||
assertFalse(Thread.interrupted());
|
||||
|
||||
TimeValue timeout = TimeValue.timeValueMillis(1);
|
||||
try (TimeoutChecker timeoutChecker = new TimeoutChecker("watchdog test", timeout, scheduler)) {
|
||||
|
||||
TimeoutChecker.watchdog.register();
|
||||
try {
|
||||
expectThrows(InterruptedException.class, () -> Thread.sleep(10000));
|
||||
} finally {
|
||||
TimeoutChecker.watchdog.unregister();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testGrokCaptures() {
|
||||
|
||||
assertFalse(Thread.interrupted());
|
||||
Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog);
|
||||
|
||||
TimeValue timeout = TimeValue.timeValueMillis(1);
|
||||
try (TimeoutChecker timeoutChecker = new TimeoutChecker("grok captures test", timeout, scheduler)) {
|
||||
|
||||
ElasticsearchTimeoutException e = expectThrows(ElasticsearchTimeoutException.class,
|
||||
() -> timeoutChecker.grokCaptures(grok, randomAlphaOfLength(1000000), "should timeout"));
|
||||
assertEquals("Aborting grok captures test during [should timeout] as it has taken longer than the timeout of [" +
|
||||
timeout + "]", e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,10 +17,10 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||
|
||||
public void testFindFirstMatchGivenNoMatch() {
|
||||
|
||||
assertNull(TimestampFormatFinder.findFirstMatch(""));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch("no timestamps in here"));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch(":::"));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch("/+"));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch("", NOOP_TIMEOUT_CHECKER));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch("no timestamps in here", NOOP_TIMEOUT_CHECKER));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch(":::", NOOP_TIMEOUT_CHECKER));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch("/+", NOOP_TIMEOUT_CHECKER));
|
||||
}
|
||||
|
||||
public void testFindFirstMatchGivenOnlyIso8601() {
|
||||
|
@ -132,23 +132,23 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||
public void testFindFirstMatchGivenOnlySystemDate() {
|
||||
|
||||
assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
|
||||
TimestampFormatFinder.findFirstMatch("1526400896374"));
|
||||
TimestampFormatFinder.findFirstMatch("1526400896374", NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
|
||||
TimestampFormatFinder.findFirstFullMatch("1526400896374"));
|
||||
TimestampFormatFinder.findFirstFullMatch("1526400896374", NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
|
||||
TimestampFormatFinder.findFirstMatch("1526400896.736"));
|
||||
TimestampFormatFinder.findFirstMatch("1526400896.736", NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
|
||||
TimestampFormatFinder.findFirstFullMatch("1526400896.736"));
|
||||
TimestampFormatFinder.findFirstFullMatch("1526400896.736", NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
|
||||
TimestampFormatFinder.findFirstMatch("1526400896"));
|
||||
TimestampFormatFinder.findFirstMatch("1526400896", NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
|
||||
TimestampFormatFinder.findFirstFullMatch("1526400896"));
|
||||
TimestampFormatFinder.findFirstFullMatch("1526400896", NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
|
||||
TimestampFormatFinder.findFirstMatch("400000005afb159a164ac980"));
|
||||
TimestampFormatFinder.findFirstMatch("400000005afb159a164ac980", NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
|
||||
TimestampFormatFinder.findFirstFullMatch("400000005afb159a164ac980"));
|
||||
TimestampFormatFinder.findFirstFullMatch("400000005afb159a164ac980", NOOP_TIMEOUT_CHECKER));
|
||||
}
|
||||
|
||||
public void testFindFirstMatchGivenRealLogMessages() {
|
||||
|
@ -157,45 +157,47 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||
"\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601",
|
||||
"][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [3.9gb], compressed ordinary object pointers [true]"),
|
||||
TimestampFormatFinder.findFirstMatch("[2018-05-11T17:07:29,553][INFO ][o.e.e.NodeEnvironment ] [node-0] " +
|
||||
"heap size [3.9gb], compressed ordinary object pointers [true]"));
|
||||
"heap size [3.9gb], compressed ordinary object pointers [true]", NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals(new TimestampMatch(23, "192.168.62.101 - - [", "dd/MMM/YYYY:HH:mm:ss Z", "dd/MMM/yyyy:HH:mm:ss XX",
|
||||
"\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", "HTTPDATE",
|
||||
"] \"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"),
|
||||
TimestampFormatFinder.findFirstMatch("192.168.62.101 - - [29/Jun/2016:12:11:31 +0000] " +
|
||||
"\"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"));
|
||||
"\"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384", NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals(new TimestampMatch(24, "", "MMM dd, YYYY h:mm:ss a", "MMM dd, yyyy h:mm:ss a",
|
||||
"\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", "CATALINA_DATESTAMP",
|
||||
" org.apache.tomcat.util.http.Parameters processParameters"),
|
||||
TimestampFormatFinder.findFirstMatch("Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters"));
|
||||
TimestampFormatFinder.findFirstMatch("Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters",
|
||||
NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
||||
Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
||||
"\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", " esxi1.acme.com Vpxa: " +
|
||||
"[3CB3FB90 verbose 'vpxavpxaInvtVm' opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"),
|
||||
TimestampFormatFinder.findFirstMatch("Oct 19 17:04:44 esxi1.acme.com Vpxa: [3CB3FB90 verbose 'vpxavpxaInvtVm' " +
|
||||
"opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"));
|
||||
"opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed", NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals(new TimestampMatch(10, "559550912540598297\t", "ISO8601", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}",
|
||||
"TIMESTAMP_ISO8601",
|
||||
"\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"),
|
||||
TimestampFormatFinder.findFirstMatch("559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t" +
|
||||
"192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"));
|
||||
"192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp", NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
||||
Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"),
|
||||
"\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP",
|
||||
" dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53"),
|
||||
TimestampFormatFinder.findFirstMatch("Sep 8 11:55:35 dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving " +
|
||||
"'www.elastic.co/A/IN': 95.110.68.206#53"));
|
||||
"'www.elastic.co/A/IN': 95.110.68.206#53", NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS",
|
||||
"\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", "TIMESTAMP_ISO8601",
|
||||
"|INFO |VirtualServer |1 |client 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client " +
|
||||
"'User1'(id:2) in channel '3er Instanz'(id:2)"),
|
||||
TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
||||
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)"));
|
||||
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
|
||||
NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
// Differs from the above as the required format is specified
|
||||
assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS",
|
||||
|
@ -204,12 +206,12 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||
"'User1'(id:2) in channel '3er Instanz'(id:2)"),
|
||||
TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
||||
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
|
||||
randomFrom("YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS")));
|
||||
randomFrom("YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS"), NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
// Non-matching required format specified
|
||||
assertNull(TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " +
|
||||
" 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
|
||||
randomFrom("UNIX", "EEE MMM dd YYYY HH:mm zzz")));
|
||||
randomFrom("UNIX", "EEE MMM dd YYYY HH:mm zzz"), NOOP_TIMEOUT_CHECKER));
|
||||
}
|
||||
|
||||
public void testAdjustRequiredFormat() {
|
||||
|
@ -246,18 +248,20 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
|
|||
|
||||
private void validateTimestampMatch(TimestampMatch expected, String text, long expectedEpochMs) {
|
||||
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, expected.candidateIndex));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, expected.candidateIndex));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch(text, Integer.MAX_VALUE));
|
||||
assertNull(TimestampFormatFinder.findFirstFullMatch(text, Integer.MAX_VALUE));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.jodaTimestampFormats)));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.jodaTimestampFormats)));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.javaTimestampFormats)));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.javaTimestampFormats)));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch(text, "wrong format"));
|
||||
assertNull(TimestampFormatFinder.findFirstFullMatch(text, "wrong format"));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, expected.candidateIndex, NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, expected.candidateIndex, NOOP_TIMEOUT_CHECKER));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch(text, Integer.MAX_VALUE, NOOP_TIMEOUT_CHECKER));
|
||||
assertNull(TimestampFormatFinder.findFirstFullMatch(text, Integer.MAX_VALUE, NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.jodaTimestampFormats), NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.jodaTimestampFormats),
|
||||
NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.javaTimestampFormats), NOOP_TIMEOUT_CHECKER));
|
||||
assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.javaTimestampFormats),
|
||||
NOOP_TIMEOUT_CHECKER));
|
||||
assertNull(TimestampFormatFinder.findFirstMatch(text, "wrong format", NOOP_TIMEOUT_CHECKER));
|
||||
assertNull(TimestampFormatFinder.findFirstFullMatch(text, "wrong format", NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
validateJodaTimestampFormats(expected.jodaTimestampFormats, text, expectedEpochMs);
|
||||
validateJavaTimestampFormats(expected.javaTimestampFormats, text, expectedEpochMs);
|
||||
|
|
Loading…
Reference in New Issue