diff --git a/x-pack/docs/en/rest-api/ml/get-category.asciidoc b/x-pack/docs/en/rest-api/ml/get-category.asciidoc index 37d0a95c14c..9e69083355b 100644 --- a/x-pack/docs/en/rest-api/ml/get-category.asciidoc +++ b/x-pack/docs/en/rest-api/ml/get-category.asciidoc @@ -62,11 +62,11 @@ roles provide these privileges. For more information, see ==== Examples The following example gets information about one category for the -`it_ops_new_logs` job: +`esxi_log` job: [source,js] -------------------------------------------------- -GET _xpack/ml/anomaly_detectors/it_ops_new_logs/results/categories +GET _xpack/ml/anomaly_detectors/esxi_log/results/categories { "page":{ "size": 1 @@ -83,14 +83,18 @@ In this example, the API returns the following information: "count": 11, "categories": [ { - "job_id": "it_ops_new_logs", - "category_id": 1, - "terms": "Actual Transaction Already Voided Reversed hostname dbserver.acme.com physicalhost esxserver1.acme.com vmhost app1.acme.com", - "regex": ".*?Actual.+?Transaction.+?Already.+?Voided.+?Reversed.+?hostname.+?dbserver.acme.com.+?physicalhost.+?esxserver1.acme.com.+?vmhost.+?app1.acme.com.*", - "max_matching_length": 137, - "examples": [ - "Actual Transaction Already Voided / Reversed;hostname=dbserver.acme.com;physicalhost=esxserver1.acme.com;vmhost=app1.acme.com" - ] + "job_id" : "esxi_log", + "category_id" : 1, + "terms" : "Vpxa verbose vpxavpxaInvtVm opID VpxaInvtVmChangeListener Guest DiskInfo Changed", + "regex" : ".*?Vpxa.+?verbose.+?vpxavpxaInvtVm.+?opID.+?VpxaInvtVmChangeListener.+?Guest.+?DiskInfo.+?Changed.*", + "max_matching_length": 154, + "examples" : [ + "Oct 19 17:04:44 esxi1.acme.com Vpxa: [3CB3FB90 verbose 'vpxavpxaInvtVm' opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed", + "Oct 19 17:04:45 esxi2.acme.com Vpxa: [3CA66B90 verbose 'vpxavpxaInvtVm' opID=WFU-33927856] [VpxaInvtVmChangeListener] Guest DiskInfo Changed", + "Oct 19 17:04:51 esxi1.acme.com Vpxa: [FFDBAB90 verbose 'vpxavpxaInvtVm' opID=WFU-25e0d447] [VpxaInvtVmChangeListener] Guest DiskInfo Changed", + "Oct 19 17:04:58 esxi2.acme.com Vpxa: [FFDDBB90 verbose 'vpxavpxaInvtVm' opID=WFU-bbff0134] [VpxaInvtVmChangeListener] Guest DiskInfo Changed" + ], + "grok_pattern" : ".*?%{SYSLOGTIMESTAMP:timestamp}.+?Vpxa.+?%{BASE16NUM:field}.+?verbose.+?vpxavpxaInvtVm.+?opID.+?VpxaInvtVmChangeListener.+?Guest.+?DiskInfo.+?Changed.*" } ] } diff --git a/x-pack/docs/en/rest-api/ml/resultsresource.asciidoc b/x-pack/docs/en/rest-api/ml/resultsresource.asciidoc index fba6522141b..c28ed72aedb 100644 --- a/x-pack/docs/en/rest-api/ml/resultsresource.asciidoc +++ b/x-pack/docs/en/rest-api/ml/resultsresource.asciidoc @@ -405,6 +405,13 @@ A category resource has the following properties: `examples`:: (array) A list of examples of actual values that matched the category. +`grok_pattern`:: + experimental[] (string) A Grok pattern that could be used in Logstash or an + Ingest Pipeline to extract fields from messages that match the category. This + field is experimental and may be changed or removed in a future release. The + Grok patterns that are found are not optimal, but are often a good starting + point for manual tweaking. + `job_id`:: (string) The unique identifier for the job that these results belong to. diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/CategoryDefinition.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/CategoryDefinition.java index 98c38241856..90d01f66f63 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/CategoryDefinition.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/CategoryDefinition.java @@ -5,6 +5,7 @@ */ package org.elasticsearch.xpack.core.ml.job.results; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -34,6 +35,7 @@ public class CategoryDefinition implements ToXContentObject, Writeable { public static final ParseField REGEX = new ParseField("regex"); public static final ParseField MAX_MATCHING_LENGTH = new ParseField("max_matching_length"); public static final ParseField EXAMPLES = new ParseField("examples"); + public static final ParseField GROK_PATTERN = new ParseField("grok_pattern"); // Used for QueryPage public static final ParseField RESULTS_FIELD = new ParseField("categories"); @@ -51,6 +53,7 @@ public class CategoryDefinition implements ToXContentObject, Writeable { parser.declareString(CategoryDefinition::setRegex, REGEX); parser.declareLong(CategoryDefinition::setMaxMatchingLength, MAX_MATCHING_LENGTH); parser.declareStringArray(CategoryDefinition::setExamples, EXAMPLES); + parser.declareString(CategoryDefinition::setGrokPattern, GROK_PATTERN); return parser; } @@ -61,6 +64,7 @@ public class CategoryDefinition implements ToXContentObject, Writeable { private String regex = ""; private long maxMatchingLength = 0L; private final Set examples; + private String grokPattern; public CategoryDefinition(String jobId) { this.jobId = jobId; @@ -74,6 +78,9 @@ public class CategoryDefinition implements ToXContentObject, Writeable { regex = in.readString(); maxMatchingLength = in.readLong(); examples = new TreeSet<>(in.readList(StreamInput::readString)); + if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { + grokPattern = in.readOptionalString(); + } } @Override @@ -84,6 +91,9 @@ public class CategoryDefinition implements ToXContentObject, Writeable { out.writeString(regex); out.writeLong(maxMatchingLength); out.writeStringList(new ArrayList<>(examples)); + if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { + out.writeOptionalString(grokPattern); + } } public String getJobId() { @@ -139,6 +149,14 @@ public class CategoryDefinition implements ToXContentObject, Writeable { examples.add(example); } + public String getGrokPattern() { + return grokPattern; + } + + public void setGrokPattern(String grokPattern) { + this.grokPattern = grokPattern; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); @@ -148,6 +166,9 @@ public class CategoryDefinition implements ToXContentObject, Writeable { builder.field(REGEX.getPreferredName(), regex); builder.field(MAX_MATCHING_LENGTH.getPreferredName(), maxMatchingLength); builder.field(EXAMPLES.getPreferredName(), examples); + if (grokPattern != null) { + builder.field(GROK_PATTERN.getPreferredName(), grokPattern); + } builder.endObject(); return builder; } @@ -166,11 +187,12 @@ public class CategoryDefinition implements ToXContentObject, Writeable { && Objects.equals(this.terms, that.terms) && Objects.equals(this.regex, that.regex) && Objects.equals(this.maxMatchingLength, that.maxMatchingLength) - && Objects.equals(this.examples, that.examples); + && Objects.equals(this.examples, that.examples) + && Objects.equals(this.grokPattern, that.grokPattern); } @Override public int hashCode() { - return Objects.hash(jobId, categoryId, terms, regex, maxMatchingLength, examples); + return Objects.hash(jobId, categoryId, terms, regex, maxMatchingLength, examples, grokPattern); } } diff --git a/x-pack/plugin/ml/build.gradle b/x-pack/plugin/ml/build.gradle index d9d4882b00e..8b991555c06 100644 --- a/x-pack/plugin/ml/build.gradle +++ b/x-pack/plugin/ml/build.gradle @@ -46,6 +46,7 @@ dependencies { testCompile project(path: xpackModule('security'), configuration: 'testArtifacts') // ml deps + compile project(':libs:grok') compile 'net.sf.supercsv:super-csv:2.4.0' nativeBundle "org.elasticsearch.ml:ml-cpp:${project.version}@zip" testCompile 'org.ini4j:ini4j:0.5.2' diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetCategoriesAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetCategoriesAction.java index 25d0cc0cdf8..abf3a330529 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetCategoriesAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetCategoriesAction.java @@ -41,7 +41,7 @@ public class TransportGetCategoriesAction extends HandledTransportAction listener.onResponse(new GetCategoriesAction.Response(r)), listener::onFailure, client); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java new file mode 100644 index 00000000000..04280261b26 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java @@ -0,0 +1,243 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.job.categorization; + +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.grok.Grok; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +/** + * Creates Grok patterns that will match all the examples in a given category_definition. + * + * The choice of field names is quite primitive. The intention is that a human will edit these. + */ +public final class GrokPatternCreator { + + private static String PREFACE = "preface"; + private static String EPILOGUE = "epilogue"; + + /** + * The first match in this list will be chosen, so it needs to be ordered + * such that more generic patterns come after more specific patterns. + */ + private static final List ORDERED_CANDIDATE_GROK_PATTERNS = Arrays.asList( + new GrokPatternCandidate("TIMESTAMP_ISO8601", "timestamp"), + new GrokPatternCandidate("DATESTAMP_RFC822", "timestamp"), + new GrokPatternCandidate("DATESTAMP_RFC2822", "timestamp"), + new GrokPatternCandidate("DATESTAMP_OTHER", "timestamp"), + new GrokPatternCandidate("DATESTAMP_EVENTLOG", "timestamp"), + new GrokPatternCandidate("SYSLOGTIMESTAMP", "timestamp"), + new GrokPatternCandidate("HTTPDATE", "timestamp"), + new GrokPatternCandidate("CATALINA_DATESTAMP", "timestamp"), + new GrokPatternCandidate("TOMCAT_DATESTAMP", "timestamp"), + new GrokPatternCandidate("CISCOTIMESTAMP", "timestamp"), + new GrokPatternCandidate("DATE", "date"), + new GrokPatternCandidate("TIME", "time"), + new GrokPatternCandidate("LOGLEVEL", "loglevel"), + new GrokPatternCandidate("URI", "uri"), + new GrokPatternCandidate("UUID", "uuid"), + new GrokPatternCandidate("MAC", "macaddress"), + // Can't use \b as the breaks, because slashes are not "word" characters + new GrokPatternCandidate("PATH", "path", "(? examples) { + + // The first string in this array will end up being the empty string, and it doesn't correspond + // to an "in between" bit. Although it could be removed for "neatness", it actually makes the + // loops below slightly neater if it's left in. + // + // E.g., ".*?cat.+?sat.+?mat.*" -> [ "", "cat", "sat", "mat" ] + String[] fixedRegexBits = regex.split("\\.[*+]\\??"); + + // Create a pattern that will capture the bits in between the fixed parts of the regex + // + // E.g., ".*?cat.+?sat.+?mat.*" -> Pattern (.*?)cat(.+?)sat(.+?)mat(.*) + Pattern exampleProcessor = Pattern.compile(regex.replaceAll("(\\.[*+]\\??)", "($1)"), Pattern.DOTALL); + + List> groupsMatchesFromExamples = new ArrayList<>(fixedRegexBits.length); + for (int i = 0; i < fixedRegexBits.length; ++i) { + groupsMatchesFromExamples.add(new ArrayList<>(examples.size())); + } + for (String example : examples) { + Matcher matcher = exampleProcessor.matcher(example); + if (matcher.matches()) { + assert matcher.groupCount() == fixedRegexBits.length; + // E.g., if the input regex was ".*?cat.+?sat.+?mat.*" then the example + // "the cat sat on the mat" will result in "the ", " ", " on the ", and "" + // being added to the 4 "in between" collections in that order + for (int groupNum = 1; groupNum <= matcher.groupCount(); ++groupNum) { + groupsMatchesFromExamples.get(groupNum - 1).add(matcher.group(groupNum)); + } + } else { + // We should never get here. If we do it implies a bug in the original categorization, + // as it's produced a regex that doesn't match the examples. + assert matcher.matches() : exampleProcessor.pattern() + " did not match " + example; + Loggers.getLogger(GrokPatternCreator.class).error("[{}] Pattern [{}] did not match example [{}]", jobId, + exampleProcessor.pattern(), example); + } + } + + Map fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + // Finally, for each collection of "in between" bits we look for the best Grok pattern and incorporate + // it into the overall Grok pattern that will match the each example in its entirety + for (int inBetweenBitNum = 0; inBetweenBitNum < groupsMatchesFromExamples.size(); ++inBetweenBitNum) { + // Remember (from the first comment in this method) that the first element in this array is + // always the empty string + overallGrokPatternBuilder.append(fixedRegexBits[inBetweenBitNum]); + appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, inBetweenBitNum == 0, + inBetweenBitNum == fixedRegexBits.length - 1, groupsMatchesFromExamples.get(inBetweenBitNum)); + } + return overallGrokPatternBuilder.toString(); + } + + /** + * Given a collection of strings, work out which (if any) of the grok patterns we're allowed + * to use matches it best. Then append the appropriate grok language to represent that finding + * onto the supplied string builder. + */ + static void appendBestGrokMatchForStrings(Map fieldNameCountStore, StringBuilder overallGrokPatternBuilder, + boolean isFirst, boolean isLast, Collection mustMatchStrings) { + + GrokPatternCandidate bestCandidate = null; + if (mustMatchStrings.isEmpty() == false) { + for (GrokPatternCandidate candidate : ORDERED_CANDIDATE_GROK_PATTERNS) { + if (mustMatchStrings.stream().allMatch(candidate.grok::match)) { + bestCandidate = candidate; + break; + } + } + } + + if (bestCandidate == null) { + if (isLast) { + overallGrokPatternBuilder.append(".*"); + } else if (isFirst || mustMatchStrings.stream().anyMatch(String::isEmpty)) { + overallGrokPatternBuilder.append(".*?"); + } else { + overallGrokPatternBuilder.append(".+?"); + } + } else { + Collection prefaces = new ArrayList<>(); + Collection epilogues = new ArrayList<>(); + populatePrefacesAndEpilogues(mustMatchStrings, bestCandidate.grok, prefaces, epilogues); + appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, isFirst, false, prefaces); + overallGrokPatternBuilder.append("%{").append(bestCandidate.grokPatternName).append(':') + .append(buildFieldName(fieldNameCountStore, bestCandidate.fieldName)).append('}'); + appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, isLast, epilogues); + } + } + + /** + * Given a collection of strings, and a grok pattern that matches some part of them all, + * return collections of the bits that come before (prefaces) and after (epilogues) the + * bit that matches. + */ + static void populatePrefacesAndEpilogues(Collection matchingStrings, Grok grok, Collection prefaces, + Collection epilogues) { + for (String s : matchingStrings) { + Map captures = grok.captures(s); + // If the pattern doesn't match then captures will be null. But we expect this + // method to only be called after validating that the pattern does match. + assert captures != null; + prefaces.add(captures.getOrDefault(PREFACE, "").toString()); + epilogues.add(captures.getOrDefault(EPILOGUE, "").toString()); + } + } + + /** + * The first time a particular field name is passed, simply return it. + * The second time return it with "2" appended. + * The third time return it with "3" appended. + * Etc. + */ + static String buildFieldName(Map fieldNameCountStore, String fieldName) { + Integer numberSeen = fieldNameCountStore.compute(fieldName, (k, v) -> 1 + ((v == null) ? 0 : v)); + if (numberSeen > 1) { + return fieldName + numberSeen; + } else { + return fieldName; + } + } + + static class GrokPatternCandidate { + + final String grokPatternName; + final String fieldName; + final Grok grok; + + /** + * Pre/post breaks default to \b, but this may not be appropriate for Grok patterns that start or + * end with a non "word" character (i.e. letter, number or underscore). For such patterns use one + * of the other constructors. + * + * In cases where the Grok pattern defined by Logstash already includes conditions on what must + * come before and after the match, use one of the other constructors and specify an empty string + * for the pre and/or post breaks. + * @param grokPatternName Name of the Grok pattern to try to match - must match one defined in Logstash. + * @param fieldName Name of the field to extract from the match. + */ + GrokPatternCandidate(String grokPatternName, String fieldName) { + this(grokPatternName, fieldName, "\\b", "\\b"); + } + + GrokPatternCandidate(String grokPatternName, String fieldName, String preBreak) { + this(grokPatternName, fieldName, preBreak, "\\b"); + } + + /** + * @param grokPatternName Name of the Grok pattern to try to match - must match one defined in Logstash. + * @param fieldName Name of the field to extract from the match. + * @param preBreak Only consider the match if it's broken from the previous text by this. + * @param postBreak Only consider the match if it's broken from the following text by this. + */ + GrokPatternCandidate(String grokPatternName, String fieldName, String preBreak, String postBreak) { + this.grokPatternName = grokPatternName; + this.fieldName = fieldName; + this.grok = new Grok(Grok.getBuiltinPatterns(), "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" + + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}"); + } + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobProvider.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobProvider.java index 4b15ef36e6a..d7b10fb622b 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobProvider.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobProvider.java @@ -98,6 +98,7 @@ import org.elasticsearch.xpack.core.ml.job.results.Result; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.core.ml.utils.MlIndicesUtils; import org.elasticsearch.xpack.core.security.support.Exceptions; +import org.elasticsearch.xpack.ml.job.categorization.GrokPatternCreator; import org.elasticsearch.xpack.ml.job.persistence.InfluencersQueryBuilder.InfluencersQuery; import org.elasticsearch.xpack.ml.job.process.autodetect.params.AutodetectParams; @@ -486,7 +487,7 @@ public class JobProvider { } } - private T parseGetHit(GetResponse getResponse, BiFunction objectParser, + private T parseGetHit(GetResponse getResponse, BiFunction objectParser, Consumer errorHandler) { BytesReference source = getResponse.getSourceAsBytesRef(); @@ -626,10 +627,11 @@ public class JobProvider { * Get a page of {@linkplain CategoryDefinition}s for the given jobId. * Uses a supplied client, so may run as the currently authenticated user * @param jobId the job id + * @param augment Should the category definition be augmented with a Grok pattern? * @param from Skip the first N categories. This parameter is for paging * @param size Take only this number of categories */ - public void categoryDefinitions(String jobId, Long categoryId, Integer from, Integer size, + public void categoryDefinitions(String jobId, Long categoryId, boolean augment, Integer from, Integer size, Consumer> handler, Consumer errorHandler, Client client) { if (categoryId != null && (from != null || size != null)) { @@ -663,6 +665,9 @@ public class JobProvider { XContentParser parser = XContentFactory.xContent(XContentHelper.xContentType(source)) .createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, stream)) { CategoryDefinition categoryDefinition = CategoryDefinition.LENIENT_PARSER.apply(parser, null); + if (augment) { + augmentWithGrokPattern(categoryDefinition); + } results.add(categoryDefinition); } catch (IOException e) { throw new ElasticsearchParseException("failed to parse category definition", e); @@ -674,6 +679,17 @@ public class JobProvider { }, e -> errorHandler.accept(mapAuthFailure(e, jobId, GetCategoriesAction.NAME))), client::search); } + void augmentWithGrokPattern(CategoryDefinition categoryDefinition) { + List examples = categoryDefinition.getExamples(); + String regex = categoryDefinition.getRegex(); + if (examples.isEmpty() || regex.isEmpty()) { + categoryDefinition.setGrokPattern(""); + } else { + categoryDefinition.setGrokPattern(GrokPatternCreator.findBestGrokMatchFromExamples(categoryDefinition.getJobId(), + regex, examples)); + } + } + /** * Search for anomaly records with the parameters in the * {@link RecordsQueryBuilder} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/integration/AutodetectResultProcessorIT.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/integration/AutodetectResultProcessorIT.java index 484d1648fbb..09bb3f75916 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/integration/AutodetectResultProcessorIT.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/integration/AutodetectResultProcessorIT.java @@ -461,7 +461,7 @@ public class AutodetectResultProcessorIT extends MlSingleNodeTestCase { AtomicReference errorHolder = new AtomicReference<>(); AtomicReference> resultHolder = new AtomicReference<>(); CountDownLatch latch = new CountDownLatch(1); - jobProvider.categoryDefinitions(JOB_ID, categoryId, null, null, r -> { + jobProvider.categoryDefinitions(JOB_ID, categoryId, false, null, null, r -> { resultHolder.set(r); latch.countDown(); }, e -> { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java new file mode 100644 index 00000000000..4189dc35f0c --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java @@ -0,0 +1,232 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.job.categorization; + +import org.elasticsearch.grok.Grok; +import org.elasticsearch.test.ESTestCase; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.containsInAnyOrder; + +public class GrokPatternCreatorTests extends ESTestCase { + + public void testBuildFieldName() { + Map fieldNameCountStore = new HashMap<>(); + assertEquals("field", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + assertEquals("field2", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + assertEquals("field3", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + assertEquals("timestamp", GrokPatternCreator.buildFieldName(fieldNameCountStore, "timestamp")); + assertEquals("field4", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + assertEquals("uri", GrokPatternCreator.buildFieldName(fieldNameCountStore, "uri")); + assertEquals("timestamp2", GrokPatternCreator.buildFieldName(fieldNameCountStore, "timestamp")); + assertEquals("field5", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + } + + public void testPopulatePrefacesAndEpiloguesGivenTimestamp() { + + Collection matchingStrings = Arrays.asList("[2018-01-25T15:33:23] DEBUG ", + "[2018-01-24T12:33:23] ERROR ", + "junk [2018-01-22T07:33:23] INFO ", + "[2018-01-21T03:33:23] DEBUG "); + Grok grok = new GrokPatternCreator.GrokPatternCandidate("TIMESTAMP_ISO8601", "timestamp").grok; + Collection prefaces = new ArrayList<>(); + Collection epilogues = new ArrayList<>(); + + GrokPatternCreator.populatePrefacesAndEpilogues(matchingStrings, grok, prefaces, epilogues); + + assertThat(prefaces, containsInAnyOrder("[", "[", "junk [", "[")); + assertThat(epilogues, containsInAnyOrder("] DEBUG ", "] ERROR ", "] INFO ", "] DEBUG ")); + } + + public void testPopulatePrefacesAndEpiloguesGivenEmailAddress() { + + Collection matchingStrings = Arrays.asList("before alice@acme.com after", + "abc bob@acme.com xyz", + "carol@acme.com"); + Grok grok = new GrokPatternCreator.GrokPatternCandidate("EMAILADDRESS", "email").grok; + Collection prefaces = new ArrayList<>(); + Collection epilogues = new ArrayList<>(); + + GrokPatternCreator.populatePrefacesAndEpilogues(matchingStrings, grok, prefaces, epilogues); + + assertThat(prefaces, containsInAnyOrder("before ", "abc ", "")); + assertThat(epilogues, containsInAnyOrder(" after", " xyz", "")); + } + + public void testAppendBestGrokMatchForStringsGivenTimestampsAndLogLevels() { + + Collection mustMatchStrings = Arrays.asList("[2018-01-25T15:33:23] DEBUG ", + "[2018-01-24T12:33:23] ERROR ", + "junk [2018-01-22T07:33:23] INFO ", + "[2018-01-21T03:33:23] DEBUG "); + + Map fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings); + + assertEquals(".+?%{TIMESTAMP_ISO8601:timestamp}.+?%{LOGLEVEL:loglevel}.+?", overallGrokPatternBuilder.toString()); + } + + public void testAppendBestGrokMatchForStringsGivenNumbersInBrackets() { + + Collection mustMatchStrings = Arrays.asList("(-2)", + " (-3)", + " (4)", + " (-5) "); + + Map fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings); + + assertEquals(".+?%{NUMBER:field}.+?", overallGrokPatternBuilder.toString()); + } + + public void testAppendBestGrokMatchForStringsGivenNegativeNumbersWithoutBreak() { + + Collection mustMatchStrings = Arrays.asList("before-2 ", + "prior to-3", + "-4"); + + Map fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings); + + // It seems sensible that we don't detect these suffices as either base 10 or base 16 numbers + assertEquals(".+?", overallGrokPatternBuilder.toString()); + } + + public void testAppendBestGrokMatchForStringsGivenHexNumbers() { + + Collection mustMatchStrings = Arrays.asList(" abc", + " 123", + " -123", + "1f is hex"); + + Map fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings); + + assertEquals(".*?%{BASE16NUM:field}.*?", overallGrokPatternBuilder.toString()); + } + + public void testAppendBestGrokMatchForStringsGivenHostnamesWithNumbers() { + + Collection mustMatchStrings = Arrays.asList(" fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings); + + // We don't want the .1. in the middle to get detected as a hex number + assertEquals(".+?", overallGrokPatternBuilder.toString()); + } + + public void testAppendBestGrokMatchForStringsGivenEmailAddresses() { + + Collection mustMatchStrings = Arrays.asList("before alice@acme.com after", + "abc bob@acme.com xyz", + "carol@acme.com"); + + Map fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings); + + assertEquals(".*?%{EMAILADDRESS:email}.*?", overallGrokPatternBuilder.toString()); + } + + public void testAppendBestGrokMatchForStringsGivenUris() { + + Collection mustMatchStrings = Arrays.asList("main site https://www.elastic.co/ with trailing slash", + "https://www.elastic.co/guide/en/x-pack/current/ml-configuring-categories.html#ml-configuring-categories is a section", + "download today from https://www.elastic.co/downloads"); + + Map fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings); + + assertEquals(".*?%{URI:uri}.*?", overallGrokPatternBuilder.toString()); + } + + public void testAppendBestGrokMatchForStringsGivenPaths() { + + Collection mustMatchStrings = Arrays.asList("on Mac /Users/dave", + "on Windows C:\\Users\\dave", + "on Linux /home/dave"); + + Map fieldNameCountStore = new HashMap<>(); + StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings); + + assertEquals(".+?%{PATH:path}.*?", overallGrokPatternBuilder.toString()); + } + + public void testFindBestGrokMatchFromExamplesGivenNamedLogs() { + + String regex = ".*?linux.+?named.+?error.+?unexpected.+?RCODE.+?REFUSED.+?resolving.*"; + Collection examples = Arrays.asList( + "Sep 8 11:55:06 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'elastic.slack.com/A/IN': 95.110.64.205#53", + "Sep 8 11:55:08 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'slack-imgs.com/A/IN': 95.110.64.205#53", + "Sep 8 11:55:35 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53", + "Sep 8 11:55:42 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'b.akamaiedge.net/A/IN': 95.110.64.205#53"); + + assertEquals(".*?%{SYSLOGTIMESTAMP:timestamp}.+?linux.+?named.+?%{NUMBER:field}.+?error.+?" + + "unexpected.+?RCODE.+?REFUSED.+?resolving.+?%{QUOTEDSTRING:field2}.+?%{IP:ipaddress}.+?%{NUMBER:field3}.*", + GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples)); + } + + public void testFindBestGrokMatchFromExamplesGivenCatalinaLogs() { + + String regex = ".*?org\\.apache\\.tomcat\\.util\\.http\\.Parameters.+?processParameters.+?WARNING.+?Parameters.+?" + + "Invalid.+?chunk.+?ignored.*"; + // The embedded newline ensures the regular expressions we're using are compiled with Pattern.DOTALL + Collection examples = Arrays.asList( + "Aug 29, 2009 12:03:33 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " + + "Invalid chunk ignored.", + "Aug 29, 2009 12:03:40 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " + + "Invalid chunk ignored.", + "Aug 29, 2009 12:03:45 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " + + "Invalid chunk ignored.", + "Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " + + "Invalid chunk ignored."); + + assertEquals(".*?%{CATALINA_DATESTAMP:timestamp}.+?org\\.apache\\.tomcat\\.util\\.http\\.Parameters.+?processParameters.+?" + + "WARNING.+?Parameters.+?Invalid.+?chunk.+?ignored.*", + GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples)); + } + + public void testFindBestGrokMatchFromExamplesGivenMultiTimestampLogs() { + + String regex = ".*?Authpriv.+?Info.+?sshd.+?subsystem.+?request.+?for.+?sftp.*"; + // Two timestamps: one local, one UTC + Collection examples = Arrays.asList( + "559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\t" + + "Info\tsshd\tsubsystem request for sftp", + "559550912548986880\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t9049724\tserv02nw03\t10.120.48.147\tAuthpriv\t" + + "Info\tsshd\tsubsystem request for sftp", + "559550912548986887\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t884343\tserv02tw03\t192.168.121.189\tAuthpriv\t" + + "Info\tsshd\tsubsystem request for sftp", + "559550912603512850\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t8907014\tserv02nw01\t192.168.118.208\tAuthpriv\t" + + "Info\tsshd\tsubsystem request for sftp"); + + assertEquals(".*?%{NUMBER:field}.+?%{TIMESTAMP_ISO8601:timestamp}.+?%{TIMESTAMP_ISO8601:timestamp2}.+?%{NUMBER:field2}.+?" + + "%{IP:ipaddress}.+?Authpriv.+?Info.+?sshd.+?subsystem.+?request.+?for.+?sftp.*", + GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples)); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/JobProviderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/JobProviderTests.java index 485fe44a95f..9fea904a99f 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/JobProviderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/JobProviderTests.java @@ -61,7 +61,6 @@ import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; @@ -235,8 +234,7 @@ public class JobProviderTests extends ESTestCase { }); } - public void testBuckets_OneBucketNoInterim() - throws InterruptedException, ExecutionException, IOException { + public void testBuckets_OneBucketNoInterim() throws IOException { String jobId = "TestJobIdentification"; Date now = new Date(); List> source = new ArrayList<>(); @@ -268,8 +266,7 @@ public class JobProviderTests extends ESTestCase { ".*")); } - public void testBuckets_OneBucketInterim() - throws InterruptedException, ExecutionException, IOException { + public void testBuckets_OneBucketInterim() throws IOException { String jobId = "TestJobIdentification"; Date now = new Date(); List> source = new ArrayList<>(); @@ -302,8 +299,7 @@ public class JobProviderTests extends ESTestCase { assertFalse(queryString.matches("(?s).*is_interim.*")); } - public void testBuckets_UsingBuilder() - throws InterruptedException, ExecutionException, IOException { + public void testBuckets_UsingBuilder() throws IOException { String jobId = "TestJobIdentification"; Date now = new Date(); List> source = new ArrayList<>(); @@ -339,8 +335,7 @@ public class JobProviderTests extends ESTestCase { assertFalse(queryString.matches("(?s).*is_interim.*")); } - public void testBucket_NoBucketNoExpand() - throws InterruptedException, ExecutionException, IOException { + public void testBucket_NoBucketNoExpand() throws IOException { String jobId = "TestJobIdentification"; Long timestamp = 98765432123456789L; List> source = new ArrayList<>(); @@ -357,8 +352,7 @@ public class JobProviderTests extends ESTestCase { assertEquals(ResourceNotFoundException.class, holder[0].getClass()); } - public void testBucket_OneBucketNoExpand() - throws InterruptedException, ExecutionException, IOException { + public void testBucket_OneBucketNoExpand() throws IOException { String jobId = "TestJobIdentification"; Date now = new Date(); List> source = new ArrayList<>(); @@ -384,7 +378,7 @@ public class JobProviderTests extends ESTestCase { assertEquals(now, b.getTimestamp()); } - public void testRecords() throws InterruptedException, ExecutionException, IOException { + public void testRecords() throws IOException { String jobId = "TestJobIdentification"; Date now = new Date(); List> source = new ArrayList<>(); @@ -431,8 +425,7 @@ public class JobProviderTests extends ESTestCase { assertEquals("irrascible", records.get(1).getFunction()); } - public void testRecords_UsingBuilder() - throws InterruptedException, ExecutionException, IOException { + public void testRecords_UsingBuilder() throws IOException { String jobId = "TestJobIdentification"; Date now = new Date(); List> source = new ArrayList<>(); @@ -485,7 +478,7 @@ public class JobProviderTests extends ESTestCase { assertEquals("irrascible", records.get(1).getFunction()); } - public void testBucketRecords() throws InterruptedException, ExecutionException, IOException { + public void testBucketRecords() throws IOException { String jobId = "TestJobIdentification"; Date now = new Date(); Bucket bucket = mock(Bucket.class); @@ -532,7 +525,7 @@ public class JobProviderTests extends ESTestCase { assertEquals("irrascible", records.get(1).getFunction()); } - public void testexpandBucket() throws InterruptedException, ExecutionException, IOException { + public void testexpandBucket() throws IOException { String jobId = "TestJobIdentification"; Date now = new Date(); Bucket bucket = new Bucket("foo", now, 22); @@ -559,8 +552,7 @@ public class JobProviderTests extends ESTestCase { assertEquals(400L, records); } - public void testCategoryDefinitions() - throws InterruptedException, ExecutionException, IOException { + public void testCategoryDefinitions() throws IOException { String jobId = "TestJobIdentification"; String terms = "the terms and conditions are not valid here"; List> source = new ArrayList<>(); @@ -580,15 +572,14 @@ public class JobProviderTests extends ESTestCase { JobProvider provider = createProvider(client); @SuppressWarnings({"unchecked", "rawtypes"}) QueryPage[] holder = new QueryPage[1]; - provider.categoryDefinitions(jobId, null, from, size, r -> holder[0] = r, + provider.categoryDefinitions(jobId, null, false, from, size, r -> holder[0] = r, e -> {throw new RuntimeException(e);}, client); QueryPage categoryDefinitions = holder[0]; assertEquals(1L, categoryDefinitions.count()); assertEquals(terms, categoryDefinitions.results().get(0).getTerms()); } - public void testCategoryDefinition() - throws InterruptedException, ExecutionException, IOException { + public void testCategoryDefinition() throws IOException { String jobId = "TestJobIdentification"; String terms = "the terms and conditions are not valid here"; @@ -603,14 +594,14 @@ public class JobProviderTests extends ESTestCase { JobProvider provider = createProvider(client); @SuppressWarnings({"unchecked", "rawtypes"}) QueryPage[] holder = new QueryPage[1]; - provider.categoryDefinitions(jobId, categoryId, null, null, + provider.categoryDefinitions(jobId, categoryId, false, null, null, r -> holder[0] = r, e -> {throw new RuntimeException(e);}, client); QueryPage categoryDefinitions = holder[0]; assertEquals(1L, categoryDefinitions.count()); assertEquals(terms, categoryDefinitions.results().get(0).getTerms()); } - public void testInfluencers_NoInterim() throws InterruptedException, ExecutionException, IOException { + public void testInfluencers_NoInterim() throws IOException { String jobId = "TestJobIdentificationForInfluencers"; Date now = new Date(); List> source = new ArrayList<>(); @@ -670,7 +661,7 @@ public class JobProviderTests extends ESTestCase { assertEquals(5.0, records.get(1).getInitialInfluencerScore(), 0.00001); } - public void testInfluencers_WithInterim() throws InterruptedException, ExecutionException, IOException { + public void testInfluencers_WithInterim() throws IOException { String jobId = "TestJobIdentificationForInfluencers"; Date now = new Date(); List> source = new ArrayList<>(); @@ -730,7 +721,7 @@ public class JobProviderTests extends ESTestCase { assertEquals(5.0, records.get(1).getInitialInfluencerScore(), 0.00001); } - public void testModelSnapshots() throws InterruptedException, ExecutionException, IOException { + public void testModelSnapshots() throws IOException { String jobId = "TestJobIdentificationForInfluencers"; Date now = new Date(); List> source = new ArrayList<>(); @@ -851,8 +842,7 @@ public class JobProviderTests extends ESTestCase { return getResponse; } - private static SearchResponse createSearchResponse(List> source) - throws IOException { + private static SearchResponse createSearchResponse(List> source) throws IOException { SearchResponse response = mock(SearchResponse.class); List list = new ArrayList<>(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/CategoryDefinitionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/CategoryDefinitionTests.java index fdaa2850823..ee7d4ad4b7a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/CategoryDefinitionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/CategoryDefinitionTests.java @@ -25,6 +25,9 @@ public class CategoryDefinitionTests extends AbstractSerializingTestCase