diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java index 3151e9badb6..9c9c5425f00 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java @@ -59,9 +59,6 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.bucket.range.Range; import org.elasticsearch.search.aggregations.bucket.range.RangeAggregationBuilder; -import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms; -import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregationBuilder; -import org.elasticsearch.search.aggregations.bucket.significant.heuristics.PercentageScore; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.matrix.stats.MatrixStats; @@ -270,33 +267,6 @@ public class SearchIT extends ESRestHighLevelClientTestCase { assertEquals(2, type2.getDocCount()); assertEquals(0, type2.getAggregations().asList().size()); } - - public void testSearchWithSignificantTermsAgg() throws IOException { - SearchRequest searchRequest = new SearchRequest(); - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(new MatchQueryBuilder("num","50")); - searchSourceBuilder.aggregation(new SignificantTermsAggregationBuilder("agg1", ValueType.STRING) - .field("type.keyword") - .minDocCount(1) - .significanceHeuristic(new PercentageScore())); - searchSourceBuilder.size(0); - searchRequest.source(searchSourceBuilder); - SearchResponse searchResponse = execute(searchRequest, highLevelClient()::search, highLevelClient()::searchAsync); - assertSearchHeader(searchResponse); - assertNull(searchResponse.getSuggest()); - assertEquals(Collections.emptyMap(), searchResponse.getProfileResults()); - assertEquals(0, searchResponse.getHits().getHits().length); - assertEquals(0f, searchResponse.getHits().getMaxScore(), 0f); - SignificantTerms significantTermsAgg = searchResponse.getAggregations().get("agg1"); - assertEquals("agg1", significantTermsAgg.getName()); - assertEquals(1, significantTermsAgg.getBuckets().size()); - SignificantTerms.Bucket type1 = significantTermsAgg.getBucketByKey("type1"); - assertEquals(1, type1.getDocCount()); - assertEquals(1, type1.getSubsetDf()); - assertEquals(1, type1.getSubsetSize()); - assertEquals(3, type1.getSupersetDf()); - assertEquals(1d/3d, type1.getSignificanceScore(), 0d); - } public void testSearchWithRangeAgg() throws IOException { { diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java new file mode 100644 index 00000000000..97bee813938 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -0,0 +1,121 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.documentation; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.LatchedActionListener; +import org.elasticsearch.client.ESRestHighLevelClientTestCase; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.protocol.xpack.ml.PutJobRequest; +import org.elasticsearch.protocol.xpack.ml.PutJobResponse; +import org.elasticsearch.protocol.xpack.ml.job.config.AnalysisConfig; +import org.elasticsearch.protocol.xpack.ml.job.config.DataDescription; +import org.elasticsearch.protocol.xpack.ml.job.config.Detector; +import org.elasticsearch.protocol.xpack.ml.job.config.Job; + +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static org.hamcrest.Matchers.greaterThan; + +public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { + + public void testCreateJob() throws Exception { + RestHighLevelClient client = highLevelClient(); + + //tag::x-pack-ml-put-job-detector + Detector.Builder detectorBuilder = new Detector.Builder() + .setFunction("sum") // <1> + .setFieldName("total") // <2> + .setDetectorDescription("Sum of total"); // <3> + //end::x-pack-ml-put-job-detector + + //tag::x-pack-ml-put-job-analysis-config + List detectors = Collections.singletonList(detectorBuilder.build()); // <1> + AnalysisConfig.Builder analysisConfigBuilder = new AnalysisConfig.Builder(detectors) // <2> + .setBucketSpan(TimeValue.timeValueMinutes(10)); // <3> + //end::x-pack-ml-put-job-analysis-config + + //tag::x-pack-ml-put-job-data-description + DataDescription.Builder dataDescriptionBuilder = new DataDescription.Builder() + .setTimeField("timestamp"); // <1> + //end::x-pack-ml-put-job-data-description + + { + String id = "job_1"; + + //tag::x-pack-ml-put-job-config + Job.Builder jobBuilder = new Job.Builder(id) // <1> + .setAnalysisConfig(analysisConfigBuilder) // <2> + .setDataDescription(dataDescriptionBuilder) // <3> + .setDescription("Total sum of requests"); // <4> + //end::x-pack-ml-put-job-config + + //tag::x-pack-ml-put-job-request + PutJobRequest request = new PutJobRequest(jobBuilder.build()); // <1> + //end::x-pack-ml-put-job-request + + //tag::x-pack-ml-put-job-execute + PutJobResponse response = client.machineLearning().putJob(request, RequestOptions.DEFAULT); + //end::x-pack-ml-put-job-execute + + //tag::x-pack-ml-put-job-response + Date createTime = response.getResponse().getCreateTime(); // <1> + //end::x-pack-ml-put-job-response + assertThat(createTime.getTime(), greaterThan(0L)); + } + { + String id = "job_2"; + Job.Builder jobBuilder = new Job.Builder(id) + .setAnalysisConfig(analysisConfigBuilder) + .setDataDescription(dataDescriptionBuilder) + .setDescription("Total sum of requests"); + + PutJobRequest request = new PutJobRequest(jobBuilder.build()); + // tag::x-pack-ml-put-job-execute-listener + ActionListener listener = new ActionListener() { + @Override + public void onResponse(PutJobResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::x-pack-ml-put-job-execute-listener + + // Replace the empty listener by a blocking listener in test + final CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::x-pack-ml-put-job-execute-async + client.machineLearning().putJobAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::x-pack-ml-put-job-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } +} diff --git a/distribution/bwc/build.gradle b/distribution/bwc/build.gradle index b84bf1df2fe..b515c606cc3 100644 --- a/distribution/bwc/build.gradle +++ b/distribution/bwc/build.gradle @@ -157,7 +157,7 @@ subprojects { environment('JAVA_HOME', getJavaHome(it, 8)) } else if ("6.2".equals(bwcBranch)) { environment('JAVA_HOME', getJavaHome(it, 9)) - } else if (["6.3", "6.x"].contains(bwcBranch)) { + } else if (["6.3", "6.4", "6.x"].contains(bwcBranch)) { environment('JAVA_HOME', getJavaHome(it, 10)) } else { environment('JAVA_HOME', project.compilerJavaHome) diff --git a/docs/java-rest/high-level/ml/put_job.asciidoc b/docs/java-rest/high-level/ml/put_job.asciidoc new file mode 100644 index 00000000000..d51bb63d405 --- /dev/null +++ b/docs/java-rest/high-level/ml/put_job.asciidoc @@ -0,0 +1,161 @@ +[[java-rest-high-x-pack-ml-put-job]] +=== Put Job API + +The Put Job API can be used to create a new {ml} job +in the cluster. The API accepts a `PutJobRequest` object +as a request and returns a `PutJobResponse`. + +[[java-rest-high-x-pack-ml-put-job-request]] +==== Put Job Request + +A `PutJobRequest` requires the following argument: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-request] +-------------------------------------------------- +<1> The configuration of the {ml} job to create as a `Job` + +[[java-rest-high-x-pack-ml-put-job-config]] +==== Job Configuration + +The `Job` object contains all the details about the {ml} job +configuration. + +A `Job` requires the following arguments: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-config] +-------------------------------------------------- +<1> The job ID +<2> An analysis configuration +<3> A data description +<4> Optionally, a human-readable description + +[[java-rest-high-x-pack-ml-put-job-analysis-config]] +==== Analysis Configuration + +The analysis configuration of the {ml} job is defined in the `AnalysisConfig`. +`AnalysisConfig` reflects all the configuration +settings that can be defined using the REST API. + +Using the REST API, we could define this analysis configuration: + +[source,js] +-------------------------------------------------- +"analysis_config" : { + "bucket_span" : "10m", + "detectors" : [ + { + "detector_description" : "Sum of total", + "function" : "sum", + "field_name" : "total" + } + ] +} +-------------------------------------------------- +// NOTCONSOLE + +Using the `AnalysisConfig` object and the high level REST client, the list +of detectors must be built first. + +An example of building a `Detector` instance is as follows: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-detector] +-------------------------------------------------- +<1> The function to use +<2> The field to apply the function to +<3> Optionally, a human-readable description + +Then the same configuration would be: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-analysis-config] +-------------------------------------------------- +<1> Create a list of detectors +<2> Pass the list of detectors to the analysis config builder constructor +<3> The bucket span + +[[java-rest-high-x-pack-ml-put-job-data-description]] +==== Data Description + +After defining the analysis config, the next thing to define is the +data description, using a `DataDescription` instance. `DataDescription` +reflects all the configuration settings that can be defined using the +REST API. + +Using the REST API, we could define this metrics configuration: + +[source,js] +-------------------------------------------------- +"data_description" : { + "time_field" : "timestamp" +} +-------------------------------------------------- +// NOTCONSOLE + +Using the `DataDescription` object and the high level REST client, the same +configuration would be: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-data-description] +-------------------------------------------------- +<1> The time field + +[[java-rest-high-x-pack-ml-put-job-execution]] +==== Execution + +The Put Job API can be executed through a `MachineLearningClient` +instance. Such an instance can be retrieved from a `RestHighLevelClient` +using the `machineLearning()` method: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-execute] +-------------------------------------------------- + +[[java-rest-high-x-pack-ml-put-job-response]] +==== Response + +The returned `PutJobResponse` returns the full representation of +the new {ml} job if it has been successfully created. This will +contain the creation time and other fields initialized using +default values: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-response] +-------------------------------------------------- +<1> The creation time is a field that was not passed in the `Job` object in the request + +[[java-rest-high-x-pack-ml-put-job-async]] +==== Asynchronous Execution + +This request can be executed asynchronously: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-execute-async] +-------------------------------------------------- +<1> The `PutMlJobRequest` to execute and the `ActionListener` to use when +the execution completes + +The asynchronous method does not block and returns immediately. Once it is +completed the `ActionListener` is called back using the `onResponse` method +if the execution successfully completed or using the `onFailure` method if +it failed. + +A typical listener for `PutJobResponse` looks like: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-put-job-execute-listener] +-------------------------------------------------- +<1> Called when the execution is successfully completed. The response is +provided as an argument +<2> Called in case of failure. The raised exception is provided as an argument diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index 9d7d66434f7..808546f2c27 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -200,6 +200,14 @@ include::licensing/put-license.asciidoc[] include::licensing/get-license.asciidoc[] include::licensing/delete-license.asciidoc[] +== Machine Learning APIs + +The Java High Level REST Client supports the following Machine Learning APIs: + +* <> + +include::ml/put_job.asciidoc[] + == Migration APIs The Java High Level REST Client supports the following Migration APIs: diff --git a/libs/dissect/build.gradle b/libs/dissect/build.gradle new file mode 100644 index 00000000000..c09a2a4ebd1 --- /dev/null +++ b/libs/dissect/build.gradle @@ -0,0 +1,50 @@ +import org.elasticsearch.gradle.precommit.PrecommitTasks + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +archivesBaseName = 'elasticsearch-dissect' + +dependencies { + if (isEclipse == false || project.path == ":libs:dissect-tests") { + testCompile("org.elasticsearch.test:framework:${version}") { + exclude group: 'org.elasticsearch', module: 'dissect' + } + } + testCompile "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" + testCompile("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}") + testCompile("com.fasterxml.jackson.core:jackson-databind:${versions.jackson}") +} + +forbiddenApisMain { + signaturesURLs = [PrecommitTasks.getResource('/forbidden/jdk-signatures.txt')] +} + +if (isEclipse) { + // in eclipse the project is under a fake root, we need to change around the source sets + sourceSets { + if (project.path == ":libs:dissect") { + main.java.srcDirs = ['java'] + main.resources.srcDirs = ['resources'] + } else { + test.java.srcDirs = ['java'] + test.resources.srcDirs = ['resources'] + } + } +} diff --git a/libs/dissect/src/main/eclipse-build.gradle b/libs/dissect/src/main/eclipse-build.gradle new file mode 100644 index 00000000000..c2b72bd21e1 --- /dev/null +++ b/libs/dissect/src/main/eclipse-build.gradle @@ -0,0 +1,3 @@ + +// this is just shell gradle file for eclipse to have separate projects for dissect src and tests +apply from: '../../build.gradle' diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java new file mode 100644 index 00000000000..a2f1ab33640 --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java @@ -0,0 +1,57 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +/** + * Parent class for all dissect related exceptions. Consumers may catch this exception or more specific child exceptions. + */ +public abstract class DissectException extends RuntimeException { + DissectException(String message) { + super(message); + } + + /** + * Error while parsing a dissect pattern + */ + static class PatternParse extends DissectException { + PatternParse(String pattern, String reason) { + super("Unable to parse pattern: " + pattern + " Reason: " + reason); + } + } + + /** + * Error while parsing a dissect key + */ + static class KeyParse extends DissectException { + KeyParse(String key, String reason) { + super("Unable to parse key: " + key + " Reason: " + reason); + } + } + + /** + * Unable to find a match between pattern and source string + */ + static class FindMatch extends DissectException { + FindMatch(String pattern, String source) { + super("Unable to find match for dissect pattern: " + pattern + " against source: " + source); + + } + } +} diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java new file mode 100644 index 00000000000..67a6842182d --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java @@ -0,0 +1,191 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import java.util.EnumSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + *

A Key of a dissect pattern. This class models the name and modifiers and provides some validation.

+ *

For dissect pattern of {@code %{a} %{+a} %{b}} the dissect keys are: + *

    + *
  • {@code a}
  • + *
  • {@code +a}
  • + *
  • {@code b}
  • + *
+ * This class represents a single key. + *

A single key is composed of a name and it's modifiers. For the key {@code +a}, {@code a} is the name and {@code +} is the modifier. + * @see DissectParser + */ +public final class DissectKey { + private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+*&?])(.*?)(->)?$", Pattern.DOTALL); + private static final Pattern RIGHT_PADDING_PATTERN = Pattern.compile("^(.*?)(->)?$", Pattern.DOTALL); + private static final Pattern APPEND_WITH_ORDER_PATTERN = Pattern.compile("[+](.*?)(/)([0-9]+)(->)?$", Pattern.DOTALL); + private final Modifier modifier; + private boolean skip; + private boolean skipRightPadding; + private int appendPosition; + private String name; + + /** + * Constructor - parses the String key into it's name and modifier(s) + * + * @param key The key without the leading %{ or trailing }, for example {@code a->} + */ + DissectKey(String key) { + skip = key == null || key.isEmpty(); + modifier = Modifier.findModifier(key); + switch (modifier) { + case NONE: + Matcher matcher = RIGHT_PADDING_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(1); + skipRightPadding = matcher.group(2) != null; + } + skip = name.isEmpty(); + break; + case NAMED_SKIP: + matcher = LEFT_MODIFIER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(2); + skipRightPadding = matcher.group(3) != null; + } + skip = true; + break; + case APPEND: + matcher = LEFT_MODIFIER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(2); + skipRightPadding = matcher.group(3) != null; + } + break; + case FIELD_NAME: + matcher = LEFT_MODIFIER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(2); + skipRightPadding = matcher.group(3) != null; + } + break; + case FIELD_VALUE: + matcher = LEFT_MODIFIER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(2); + skipRightPadding = matcher.group(3) != null; + } + break; + case APPEND_WITH_ORDER: + matcher = APPEND_WITH_ORDER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(1); + appendPosition = Short.valueOf(matcher.group(3)); + skipRightPadding = matcher.group(4) != null; + } + break; + } + + if (name == null || (name.isEmpty() && !skip)) { + throw new DissectException.KeyParse(key, "The key name could be determined"); + } + } + + /** + * Copy constructor to explicitly override the modifier. + * @param key The key to copy (except for the modifier) + * @param modifier the modifer to use for this copy + */ + DissectKey(DissectKey key, DissectKey.Modifier modifier){ + this.modifier = modifier; + this.skipRightPadding = key.skipRightPadding; + this.skip = key.skip; + this.name = key.name; + this.appendPosition = key.appendPosition; + } + + Modifier getModifier() { + return modifier; + } + + boolean skip() { + return skip; + } + + boolean skipRightPadding() { + return skipRightPadding; + } + + int getAppendPosition() { + return appendPosition; + } + + String getName() { + return name; + } + + //generated + @Override + public String toString() { + return "DissectKey{" + + "modifier=" + modifier + + ", skip=" + skip + + ", appendPosition=" + appendPosition + + ", name='" + name + '\'' + + '}'; + } + + public enum Modifier { + NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("*"), FIELD_VALUE("&"), NAMED_SKIP("?"); + + private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+*&?]"); + + private final String modifier; + + @Override + public String toString() { + return modifier; + } + + Modifier(final String modifier) { + this.modifier = modifier; + } + + //package private for testing + static Modifier fromString(String modifier) { + return EnumSet.allOf(Modifier.class).stream().filter(km -> km.modifier.equals(modifier)) + .findFirst().orElseThrow(() -> new IllegalArgumentException("Found invalid modifier.")); //throw should never happen + } + + private static Modifier findModifier(String key) { + Modifier modifier = Modifier.NONE; + if (key != null && !key.isEmpty()) { + Matcher matcher = MODIFIER_PATTERN.matcher(key); + int matches = 0; + while (matcher.find()) { + Modifier priorModifier = modifier; + modifier = Modifier.fromString(matcher.group()); + if (++matches > 1 && !(APPEND.equals(priorModifier) && APPEND_WITH_ORDER.equals(modifier))) { + throw new DissectException.KeyParse(key, "multiple modifiers are not allowed."); + } + } + } + return modifier; + } + } +} diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java new file mode 100644 index 00000000000..9217413e075 --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java @@ -0,0 +1,198 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Represents the matches of a {@link DissectParser#parse(String)}. Handles the appending and referencing based on the key instruction. + */ +final class DissectMatch { + + private final String appendSeparator; + private final Map results; + private final Map simpleResults; + private final Map referenceResults; + private final Map appendResults; + private int implicitAppendOrder = -1000; + private final int maxMatches; + private final int maxResults; + private final int appendCount; + private final int referenceCount; + private final int simpleCount; + private int matches = 0; + + DissectMatch(String appendSeparator, int maxMatches, int maxResults, int appendCount, int referenceCount) { + if (maxMatches <= 0 || maxResults <= 0) { + throw new IllegalArgumentException("Expected results are zero, can not construct DissectMatch");//should never happen + } + this.maxMatches = maxMatches; + this.maxResults = maxResults; + this.appendCount = appendCount; + this.referenceCount = referenceCount; + this.appendSeparator = appendSeparator; + results = new HashMap<>(maxResults); + this.simpleCount = maxMatches - referenceCount - appendCount; + simpleResults = simpleCount <= 0 ? null : new HashMap<>(simpleCount); + referenceResults = referenceCount <= 0 ? null : new HashMap<>(referenceCount); + appendResults = appendCount <= 0 ? null : new HashMap<>(appendCount); + } + + /** + * Add the key/value that was found as result of the parsing + * @param key the {@link DissectKey} + * @param value the discovered value for the key + */ + void add(DissectKey key, String value) { + matches++; + if (key.skip()) { + return; + } + switch (key.getModifier()) { + case NONE: + simpleResults.put(key.getName(), value); + break; + case APPEND: + appendResults.computeIfAbsent(key.getName(), k -> new AppendResult(appendSeparator)).addValue(value, implicitAppendOrder++); + break; + case APPEND_WITH_ORDER: + appendResults.computeIfAbsent(key.getName(), + k -> new AppendResult(appendSeparator)).addValue(value, key.getAppendPosition()); + break; + case FIELD_NAME: + referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setKey(value); + break; + case FIELD_VALUE: + referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setValue(value); + break; + } + } + + boolean fullyMatched() { + return matches == maxMatches; + } + + /** + * Checks if results are valid. + * @param results the results to check + * @return true if all dissect keys have been matched and the results are of the expected size. + */ + boolean isValid(Map results) { + return fullyMatched() && results.size() == maxResults; + } + + /** + * Gets all the current matches. Pass the results of this to isValid to determine if a fully successful match has occured. + * + * @return the map of the results. + */ + Map getResults() { + results.clear(); + if (simpleCount > 0) { + results.putAll(simpleResults); + } + if (referenceCount > 0) { + referenceResults.forEach((k, v) -> results.put(v.getKey(), v.getValue())); + } + if (appendCount > 0) { + appendResults.forEach((k, v) -> results.put(k, v.getAppendResult())); + } + + return results; + } + + /** + * a result that will need to be part of an append operation. + */ + private final class AppendResult { + private final List values = new ArrayList<>(); + private final String appendSeparator; + + private AppendResult(String appendSeparator) { + this.appendSeparator = appendSeparator; + } + + private void addValue(String value, int order) { + values.add(new AppendValue(value, order)); + } + + private String getAppendResult() { + Collections.sort(values); + return values.stream().map(AppendValue::getValue).collect(Collectors.joining(appendSeparator)); + } + } + + /** + * An appendable value that can be sorted based on the provided order + */ + private final class AppendValue implements Comparable { + private final String value; + private final int order; + + private AppendValue(String value, int order) { + this.value = value; + this.order = order; + } + + private String getValue() { + return value; + } + + private int getOrder() { + return order; + } + + @Override + public int compareTo(AppendValue o) { + return Integer.compare(this.order, o.getOrder()); + } + } + + /** + * A result that needs to be converted to a key/value reference + */ + private final class ReferenceResult { + + private String key; + + private String getKey() { + return key; + } + + private String getValue() { + return value; + } + + private String value; + + private void setValue(String value) { + this.value = value; + } + + private void setKey(String key) { + this.key = key; + } + } +} diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java new file mode 100644 index 00000000000..407d73134b6 --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -0,0 +1,310 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + *

Splits (dissects) a string into its parts based on a pattern.

A dissect pattern is composed of a set of keys and delimiters. + * For example the dissect pattern:

%{a} %{b},%{c}
has 3 keys (a,b,c) and two delimiters (space and comma). This pattern will + * match a string of the form:
foo bar,baz
and will result a key/value pairing of
a=foo, b=bar, and c=baz.
+ *

Matches are all or nothing. For example, the same pattern will NOT match

foo bar baz
since all of the delimiters did not + * match. (the comma did not match) + *

Dissect patterns can optionally have modifiers. These modifiers instruct the parser to change it's behavior. For example the + * dissect pattern of

%{a},%{b}:%{c}
would not match
foo,bar,baz
since there the colon never matches. + *

Modifiers appear to the left or the right of the key name. The supported modifiers are: + *

    + *
  • {@code ->} Instructs the parser to ignore repeating delimiters to the right of the key. Example:
    + * pattern: {@code %{a->} %{b} %{c}}
    + * string: {@code foo         bar baz}
    + * result: {@code a=foo, b=bar, c=baz}
    + * 
  • + *
  • {@code +} Instructs the parser to appends this key's value to value of prior key with the same name. + * Example:
    + * pattern: {@code %{a} %{+a} %{+a}}
    + * string: {@code foo bar baz}
    + * result: {@code a=foobarbaz}
    + * 
  • + *
  • {@code /} Instructs the parser to appends this key's value to value of a key based based on the order specified after the + * {@code /}. Requires the {@code +} modifier to also be present in the key. Example:
    + * pattern: {@code %{a} %{+a/2} %{+a/1}}
    + * string: {@code foo bar baz}
    + * result: {@code a=foobazbar}
    + * 
    + *
  • + *
  • {@code *} Instructs the parser to ignore the name of this key, instead use the value of key as the key name. + * Requires another key with the same name and the {@code &} modifier to be the value. Example:
    + * pattern: {@code %{*a} %{b} %{&a}}
    + * string: {@code foo bar baz}
    + * result: {@code foo=baz, b=bar}
    + * 
  • + *
  • {@code &} Instructs the parser to ignore this key and place the matched value to a key of the same name with the {@code *} modifier. + * Requires another key with the same name and the {@code *} modifier. + * Example:
    + * pattern: {@code %{*a} %{b} %{&a}}
    + * string: {@code foo bar baz}
    + * result: {@code foo=baz, b=bar}
    + * 
  • + *
  • {@code ?} Instructs the parser to ignore this key. The key name exists only for the purpose of human readability. Example + *
    + *  pattern: {@code %{a} %{?skipme} %{c}}
    + *  string: {@code foo bar baz}
    + *  result: {@code a=foo, c=baz}
    + * 
    + *
+ *

Empty key names patterns are also supported. They behave just like the {@code ?} modifier, except the name is not required. + * The result will simply be ignored. Example + *

+ * pattern: {@code %{a} %{} %{c}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foo, c=baz}
+ * 
+ + *

+ * Inspired by the Logstash Dissect Filter by Guy Boertje + */ +public final class DissectParser { + private static final Pattern LEADING_DELIMITER_PATTERN = Pattern.compile("^(.*?)%"); + private static final Pattern KEY_DELIMITER_FIELD_PATTERN = Pattern.compile("%\\{([^}]*?)}([^%]*)", Pattern.DOTALL); + private static final EnumSet ASSOCIATE_MODIFIERS = EnumSet.of( + DissectKey.Modifier.FIELD_NAME, + DissectKey.Modifier.FIELD_VALUE); + private static final EnumSet APPEND_MODIFIERS = EnumSet.of( + DissectKey.Modifier.APPEND, + DissectKey.Modifier.APPEND_WITH_ORDER); + private static final Function KEY_NAME = val -> val.getKey().getName(); + private final List matchPairs; + private final String pattern; + private String leadingDelimiter = ""; + private final int maxMatches; + private final int maxResults; + private final int appendCount; + private final int referenceCount; + private final String appendSeparator; + + public DissectParser(String pattern, String appendSeparator) { + this.pattern = pattern; + this.appendSeparator = appendSeparator == null ? "" : appendSeparator; + Matcher matcher = LEADING_DELIMITER_PATTERN.matcher(pattern); + while (matcher.find()) { + leadingDelimiter = matcher.group(1); + } + List matchPairs = new ArrayList<>(); + matcher = KEY_DELIMITER_FIELD_PATTERN.matcher(pattern.substring(leadingDelimiter.length())); + while (matcher.find()) { + DissectKey key = new DissectKey(matcher.group(1)); + String delimiter = matcher.group(2); + matchPairs.add(new DissectPair(key, delimiter)); + } + this.maxMatches = matchPairs.size(); + this.maxResults = Long.valueOf(matchPairs.stream() + .filter(dissectPair -> !dissectPair.getKey().skip()).map(KEY_NAME).distinct().count()).intValue(); + if (this.maxMatches == 0 || maxResults == 0) { + throw new DissectException.PatternParse(pattern, "Unable to find any keys or delimiters."); + } + //append validation - look through all of the keys to see if there are any keys that need to participate in an append operation + // but don't have the '+' defined + Set appendKeyNames = matchPairs.stream() + .filter(dissectPair -> APPEND_MODIFIERS.contains(dissectPair.getKey().getModifier())) + .map(KEY_NAME).distinct().collect(Collectors.toSet()); + if (appendKeyNames.size() > 0) { + List modifiedMatchPairs = new ArrayList<>(matchPairs.size()); + for (DissectPair p : matchPairs) { + if (p.getKey().getModifier().equals(DissectKey.Modifier.NONE) && appendKeyNames.contains(p.getKey().getName())) { + modifiedMatchPairs.add(new DissectPair(new DissectKey(p.getKey(), DissectKey.Modifier.APPEND), p.getDelimiter())); + } else { + modifiedMatchPairs.add(p); + } + } + matchPairs = modifiedMatchPairs; + } + appendCount = appendKeyNames.size(); + + //reference validation - ensure that '*' and '&' come in pairs + Map> referenceGroupings = matchPairs.stream() + .filter(dissectPair -> ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier())) + .collect(Collectors.groupingBy(KEY_NAME)); + for (Map.Entry> entry : referenceGroupings.entrySet()) { + if (entry.getValue().size() != 2) { + throw new DissectException.PatternParse(pattern, "Found invalid key/reference associations: '" + + entry.getValue().stream().map(KEY_NAME).collect(Collectors.joining(",")) + + "' Please ensure each '*' is matched with a matching '&"); + } + } + + referenceCount = referenceGroupings.size() * 2; + this.matchPairs = Collections.unmodifiableList(matchPairs); + } + + + /** + *

Entry point to dissect a string into it's parts.

+ * + * @param inputString The string to dissect + * @return the key/value Map of the results + * @throws DissectException if unable to dissect a pair into it's parts. + */ + public Map parse(String inputString) { + /** + * + * This implements a naive string matching algorithm. The string is walked left to right, comparing each byte against + * another string's bytes looking for matches. If the bytes match, then a second cursor looks ahead to see if all the bytes + * of the other string matches. If they all match, record it and advances the primary cursor to the match point. If it can not match + * all of the bytes then progress the main cursor. Repeat till the end of the input string. Since the string being searching for + * (the delimiter) is generally small and rare the naive approach is efficient. + * + * In this case the the string that is walked is the input string, and the string being searched for is the current delimiter. + * For example for a dissect pattern of {@code %{a},%{b}:%{c}} the delimiters (comma then colon) are searched for in the + * input string. At class construction the list of keys+delimiters are found (dissectPairs), which allows the use of that ordered + * list to know which delimiter to use for the search. The delimiters is progressed once the current delimiter is matched. + * + * There are two special cases that requires additional parsing beyond the standard naive algorithm. Consecutive delimiters should + * results in a empty matches unless the {@code ->} is provided. For example given the dissect pattern of + * {@code %{a},%{b},%{c},%{d}} and input string of {@code foo,,,} the match should be successful with empty values for b,c and d. + * However, if the key modifier {@code ->}, is present it will simply skip over any delimiters just to the right of the key + * without assigning any values. For example {@code %{a->},{%b}} will match the input string of {@code foo,,,,,,bar} with a=foo and + * b=bar. + * + */ + DissectMatch dissectMatch = new DissectMatch(appendSeparator, maxMatches, maxResults, appendCount, referenceCount); + Iterator it = matchPairs.iterator(); + //ensure leading delimiter matches + if (inputString != null && inputString.length() > leadingDelimiter.length() + && leadingDelimiter.equals(inputString.substring(0, leadingDelimiter.length()))) { + byte[] input = inputString.getBytes(StandardCharsets.UTF_8); + //grab the first key/delimiter pair + DissectPair dissectPair = it.next(); + DissectKey key = dissectPair.getKey(); + byte[] delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8); + //start dissection after the first delimiter + int i = leadingDelimiter.length(); + int valueStart = i; + int lookAheadMatches; + //start walking the input string byte by byte, look ahead for matches where needed + //if a match is found jump forward to the end of the match + for (; i < input.length; i++) { + lookAheadMatches = 0; + //potential match between delimiter and input string + if (delimiter.length > 0 && input[i] == delimiter[0]) { + //look ahead to see if the entire delimiter matches the input string + for (int j = 0; j < delimiter.length; j++) { + if (i + j < input.length && input[i + j] == delimiter[j]) { + lookAheadMatches++; + } + } + //found a full delimiter match + if (lookAheadMatches == delimiter.length) { + //record the key/value tuple + byte[] value = Arrays.copyOfRange(input, valueStart, i); + dissectMatch.add(key, new String(value, StandardCharsets.UTF_8)); + //jump to the end of the match + i += lookAheadMatches; + //look for consecutive delimiters (e.g. a,,,,d,e) + while (i < input.length) { + lookAheadMatches = 0; + for (int j = 0; j < delimiter.length; j++) { + if (i + j < input.length && input[i + j] == delimiter[j]) { + lookAheadMatches++; + } + } + //found consecutive delimiters + if (lookAheadMatches == delimiter.length) { + //jump to the end of the match + i += lookAheadMatches; + if (!key.skipRightPadding()) { + //progress the keys/delimiter if possible + if (!it.hasNext()) { + break; //the while loop + } + dissectPair = it.next(); + key = dissectPair.getKey(); + //add the key with an empty value for the empty delimiter + dissectMatch.add(key, ""); + } + } else { + break; //the while loop + } + } + //progress the keys/delimiter if possible + if (!it.hasNext()) { + break; //the for loop + } + dissectPair = it.next(); + key = dissectPair.getKey(); + delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8); + //i is always one byte after the last found delimiter, aka the start of the next value + valueStart = i; + } + } + } + //the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) + //and there is no trailing delimiter + if (!dissectMatch.fullyMatched() && delimiter.length == 0 ) { + byte[] value = Arrays.copyOfRange(input, valueStart, input.length); + String valueString = new String(value, StandardCharsets.UTF_8); + dissectMatch.add(key, valueString); + } + } + Map results = dissectMatch.getResults(); + + if (!dissectMatch.isValid(results)) { + throw new DissectException.FindMatch(pattern, inputString); + } + return results; + } + + /** + * A tuple class to hold the dissect key and delimiter + */ + private class DissectPair { + + private final DissectKey key; + private final String delimiter; + + private DissectPair(DissectKey key, String delimiter) { + this.key = key; + this.delimiter = delimiter; + } + + private DissectKey getKey() { + return key; + } + + private String getDelimiter() { + return delimiter; + } + } + +} + + + diff --git a/libs/dissect/src/test/eclipse-build.gradle b/libs/dissect/src/test/eclipse-build.gradle new file mode 100644 index 00000000000..56d632f23b1 --- /dev/null +++ b/libs/dissect/src/test/eclipse-build.gradle @@ -0,0 +1,7 @@ + +// this is just shell gradle file for eclipse to have separate projects for dissect src and tests +apply from: '../../build.gradle' + +dependencies { + testCompile project(':libs:dissect') +} diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java new file mode 100644 index 00000000000..0f3f7ed041d --- /dev/null +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java @@ -0,0 +1,178 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.CoreMatchers; + +import java.util.EnumSet; +import java.util.List; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class DissectKeyTests extends ESTestCase { + + public void testNoModifier() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey(keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testAppendModifier() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey("+" + keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testAppendWithOrderModifier() { + String keyName = randomAlphaOfLengthBetween(1, 10); + int length = randomIntBetween(1, 100); + DissectKey dissectKey = new DissectKey("+" + keyName + "/" + length); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND_WITH_ORDER)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getAppendPosition(), equalTo(length)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testAppendWithOrderModifierNoName() { + int length = randomIntBetween(1, 100); + DissectException e = expectThrows(DissectException.class, () -> new DissectKey("+/" + length)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); + } + + public void testOrderModifierWithoutAppend() { + String keyName = randomAlphaOfLengthBetween(1, 10); + int length = randomIntBetween(1, 100); + DissectException e = expectThrows(DissectException.class, () -> new DissectKey(keyName + "/" + length)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); + } + + public void testFieldNameModifier() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey("*" + keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_NAME)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testFieldValueModifiers() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey("&" + keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_VALUE)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testRightPaddingModifiers() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey(keyName + "->"); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(true)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + + dissectKey = new DissectKey("*" + keyName + "->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + + dissectKey = new DissectKey("&" + keyName + "->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + + dissectKey = new DissectKey("+" + keyName + "->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + + dissectKey = new DissectKey("?" + keyName + "->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + + dissectKey = new DissectKey("+" + keyName + "/2->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + } + + public void testMultipleLeftModifiers() { + String keyName = randomAlphaOfLengthBetween(1, 10); + List validModifiers = EnumSet.allOf(DissectKey.Modifier.class).stream() + .filter(m -> !m.equals(DissectKey.Modifier.NONE)) + .map(DissectKey.Modifier::toString) + .collect(Collectors.toList()); + String modifier1 = randomFrom(validModifiers); + String modifier2 = randomFrom(validModifiers); + DissectException e = expectThrows(DissectException.class, () -> new DissectKey(modifier1 + modifier2 + keyName)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); + } + + public void testSkipKey() { + String keyName = ""; + DissectKey dissectKey = new DissectKey(keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); + assertThat(dissectKey.skip(), is(true)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + public void testNamedSkipKey() { + String keyName = "myname"; + DissectKey dissectKey = new DissectKey("?" +keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP)); + assertThat(dissectKey.skip(), is(true)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testSkipKeyWithPadding() { + String keyName = ""; + DissectKey dissectKey = new DissectKey(keyName + "->"); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); + assertThat(dissectKey.skip(), is(true)); + assertThat(dissectKey.skipRightPadding(), is(true)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + public void testNamedEmptySkipKeyWithPadding() { + String keyName = ""; + DissectKey dissectKey = new DissectKey("?" +keyName + "->"); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP)); + assertThat(dissectKey.skip(), is(true)); + assertThat(dissectKey.skipRightPadding(), is(true)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testInvalidModifiers() { + //should never happen due to regex + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> DissectKey.Modifier.fromString("x")); + assertThat(e.getMessage(), CoreMatchers.containsString("invalid modifier")); + } +} diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java new file mode 100644 index 00000000000..d562afb6363 --- /dev/null +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java @@ -0,0 +1,93 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import org.elasticsearch.common.collect.MapBuilder; +import org.elasticsearch.test.ESTestCase; + +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.stream.IntStream; + +import static org.hamcrest.Matchers.equalTo; + +public class DissectMatchTests extends ESTestCase { + + public void testIllegalArgs() { + expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 0, 1, 0, 0)); + expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 1, 0, 0, 0)); + } + + public void testValidAndFullyMatched() { + int expectedMatches = randomIntBetween(1, 26); + DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0); + IntStream.range(97, 97 + expectedMatches) //allow for a-z values + .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), "")); + assertThat(dissectMatch.fullyMatched(), equalTo(true)); + assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(true)); + } + + public void testNotValidAndFullyMatched() { + int expectedMatches = randomIntBetween(1, 26); + DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0); + IntStream.range(97, 97 + expectedMatches - 1) //allow for a-z values + .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), "")); + assertThat(dissectMatch.fullyMatched(), equalTo(false)); + assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(false)); + } + + public void testGetResultsIdempotent(){ + int expectedMatches = randomIntBetween(1, 26); + DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0); + IntStream.range(97, 97 + expectedMatches) //allow for a-z values + .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), "")); + assertThat(dissectMatch.getResults(), equalTo(dissectMatch.getResults())); + } + + public void testAppend(){ + DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0); + dissectMatch.add(new DissectKey("+a"), "x"); + dissectMatch.add(new DissectKey("+a"), "y"); + dissectMatch.add(new DissectKey("+a"), "z"); + Map results = dissectMatch.getResults(); + assertThat(dissectMatch.isValid(results), equalTo(true)); + assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "x-y-z").map())); + } + + public void testAppendWithOrder(){ + DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0); + dissectMatch.add(new DissectKey("+a/3"), "x"); + dissectMatch.add(new DissectKey("+a"), "y"); + dissectMatch.add(new DissectKey("+a/1"), "z"); + Map results = dissectMatch.getResults(); + assertThat(dissectMatch.isValid(results), equalTo(true)); + assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "y-z-x").map())); + } + + public void testReference(){ + DissectMatch dissectMatch = new DissectMatch("-", 2, 1, 0, 1); + dissectMatch.add(new DissectKey("&a"), "x"); + dissectMatch.add(new DissectKey("*a"), "y"); + Map results = dissectMatch.getResults(); + assertThat(dissectMatch.isValid(results), equalTo(true)); + assertThat(results, equalTo(MapBuilder.newMapBuilder().put("y", "x").map())); + } + +} diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java new file mode 100644 index 00000000000..c22cec98eb7 --- /dev/null +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -0,0 +1,386 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.CoreMatchers; +import org.hamcrest.Matchers; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiAlphanumOfLengthBetween; + +public class DissectParserTests extends ESTestCase { + + public void testJavaDocExamples() { + assertMatch("%{a} %{b},%{c}", "foo bar,baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMiss("%{a},%{b}:%{c}", "foo,bar,baz"); + assertMatch("%{a->} %{b} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz")); + assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar")); + assertMatch("%{*a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar")); + assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); + assertMatch("%{a} %{?skipme} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); + assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "")); + assertMatch("%{a->},%{b}", "foo,,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + } + + /** + * Borrowed from Logstash's test cases: + * https://github.com/logstash-plugins/logstash-filter-dissect/blob/master/src/test/java/org/logstash/dissect/DissectorTest.java + * Append Note - Logstash appends with the delimiter as the separator between values, this uses a user defined separator + */ + public void testLogstashSpecs() { + assertMatch("%{a} %{b->} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMiss("%{a}%{b} %{c}", null); + assertMiss("%{a} %{b}%{c} %{d}", "foo bar baz"); + assertMiss("%{a} %{b} %{c}%{d}", "foo bar baz quux"); + assertMatch("%{a} %{b->} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); + assertMatch("%{a} %{b} %{+b} %{z}", "foo bar baz quux", Arrays.asList("a", "b", "z"), Arrays.asList("foo", "bar baz", "quux"), " "); + assertMatch("%{a}------->%{b}", "foo------->bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo", "bar baz quux")); + assertMatch("%{a}------->%{}", "foo------->bar baz quux", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a} » %{b}»%{c}€%{d}", "foo » bar»baz€quux", + Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "bar", "baz", "quux")); + assertMatch("%{a} %{b} %{+a}", "foo bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo baz quux", "bar"), " "); + //Logstash supports implicit ordering based anchored by the the key without the '+' + //This implementation will only honor implicit ordering for appending right to left else explicit order (/N) is required. + //The results of this test differ from Logstash. + assertMatch("%{+a} %{a} %{+a} %{b}", "December 31 1999 quux", + Arrays.asList("a", "b"), Arrays.asList("December 31 1999", "quux"), " "); + //Same test as above, but with same result as Logstash using explicit ordering in the pattern + assertMatch("%{+a/1} %{a} %{+a/2} %{b}", "December 31 1999 quux", + Arrays.asList("a", "b"), Arrays.asList("31 December 1999", "quux"), " "); + assertMatch("%{+a/2} %{+a/4} %{+a/1} %{+a/3}", "bar quux foo baz", Arrays.asList("a"), Arrays.asList("foo bar baz quux"), " "); + assertMatch("%{+a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{+a} %{b} %{+a} %{c}", "foo bar baz quux", + Arrays.asList("a", "b", "c"), Arrays.asList("foo baz", "bar", "quux"), " "); + assertMatch("%{} %{syslog_timestamp} %{hostname} %{rt}: %{reason} %{+reason} %{src_ip}/%{src_port}->%{dst_ip}/%{dst_port} " + + "%{polrt} %{+polrt} %{+polrt} %{from_zone} %{to_zone} %{rest}", + "42 2016-05-25T14:47:23Z host.name.com RT_FLOW - RT_FLOW_SESSION_DENY: session denied 2.2.2.20/60000->1.1.1.10/8090 None " + + "6(0) DEFAULT-DENY ZONE-UNTRUST ZONE-DMZ UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0", + Arrays.asList("syslog_timestamp", "hostname", "rt", "reason", "src_ip", "src_port", "dst_ip", "dst_port", "polrt" + , "from_zone", "to_zone", "rest"), + Arrays.asList("2016-05-25T14:47:23Z", "host.name.com", "RT_FLOW - RT_FLOW_SESSION_DENY", "session denied", "2.2.2.20", "60000" + , "1.1.1.10", "8090", "None 6(0) DEFAULT-DENY", "ZONE-UNTRUST", "ZONE-DMZ", "UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0"), " "); + assertBadKey("%{+/2}"); + assertBadKey("%{&+a_field}"); + assertMatch("%{a->} %{b->}---%{c}", "foo bar------------baz", + Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666")); + assertMatch("%{?skipme->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666")); + assertMatch("%{a},%{b},%{c},%{d},%{e},%{f}", "111,,333,,555,666", + Arrays.asList("a", "b", "c", "d", "e", "f"), Arrays.asList("111", "", "333", "", "555", "666")); + assertMatch("%{a}.࿏.%{b}", "⟳༒.࿏.༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); + assertMatch("%{a}", "子", Arrays.asList("a"), Arrays.asList("子")); + assertMatch("%{a}{\n}%{b}", "aaa{\n}bbb", Arrays.asList("a", "b"), Arrays.asList("aaa", "bbb")); + assertMiss("MACHINE[%{a}] %{b}", "1234567890 MACHINE[foo] bar"); + assertMiss("%{a} %{b} %{c}", "foo:bar:baz"); + assertMatch("/var/%{key1}/log/%{key2}.log", "/var/foo/log/bar.log", Arrays.asList("key1", "key2"), Arrays.asList("foo", "bar")); + assertMatch("%{a->} %{b}-.-%{c}-%{d}-..-%{e}-%{f}-%{g}-%{h}", "foo bar-.-baz-1111-..-22-333-4444-55555", + Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h"), + Arrays.asList("foo", "bar", "baz", "1111", "22", "333", "4444", "55555")); + } + + public void testBasicMatch() { + String valueFirstInput = ""; + String keyFirstPattern = ""; + String delimiterFirstInput = ""; + String delimiterFirstPattern = ""; + //parallel arrays + List expectedKeys = Arrays.asList(generateRandomStringArray(100, 10, false, false)); + List expectedValues = new ArrayList<>(expectedKeys.size()); + for (String key : expectedKeys) { + String value = randomAsciiAlphanumOfLengthBetween(1, 100); + String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail + keyFirstPattern += "%{" + key + "}" + delimiter; + valueFirstInput += value + delimiter; + delimiterFirstPattern += delimiter + "%{" + key + "}"; + delimiterFirstInput += delimiter + value; + expectedValues.add(value); + } + assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues); + assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues); + } + + public void testBasicMatchUnicode() { + String valueFirstInput = ""; + String keyFirstPattern = ""; + String delimiterFirstInput = ""; + String delimiterFirstPattern = ""; + //parallel arrays + List expectedKeys = new ArrayList<>(); + List expectedValues = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 100); i++) { + String key = randomAsciiAlphanumOfLengthBetween(1, 100); + String value = randomRealisticUnicodeOfCodepointLengthBetween(1, 100); + String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail + keyFirstPattern += "%{" + key + "}" + delimiter; + valueFirstInput += value + delimiter; + delimiterFirstPattern += delimiter + "%{" + key + "}"; + delimiterFirstInput += delimiter + value; + expectedKeys.add(key); + expectedValues.add(value); + } + assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues); + assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues); + } + + public void testMatchUnicode() { + assertMatch("%{a} %{b}", "foo 子", Arrays.asList("a", "b"), Arrays.asList("foo", "子")); + assertMatch("%{a}࿏%{b} %{c}", "⟳༒࿏༒⟲ 子", Arrays.asList("a", "b", "c"), Arrays.asList("⟳༒", "༒⟲", "子")); + assertMatch("%{a}࿏%{+a} %{+a}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒༒⟲子")); + assertMatch("%{a}࿏%{+a/2} %{+a/1}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒子༒⟲")); + assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); + assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲")); + assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲")); + } + + public void testMatchRemainder() { + assertMatch("%{a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest")); + assertMatch("%{a} %{b}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest")); + assertMatch("%{} %{b}", "foo bar the rest", Arrays.asList("b"), Arrays.asList("bar the rest")); + assertMatch("%{a} %{b->}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest")); + assertMatch("%{*a} %{&a}", "foo bar the rest", Arrays.asList("foo"), Arrays.asList("bar the rest")); + assertMatch("%{a} %{+a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"), " "); + } + + public void testAppend() { + assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz")); + assertMatch("%{a} %{+a} %{b} %{+b}", "foo bar baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", "bazlol")); + assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar")); + assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo baz bar"), " "); + } + + public void testAssociate() { + assertMatch("%{*a} %{&a}", "foo bar", Arrays.asList("foo"), Arrays.asList("bar")); + assertMatch("%{&a} %{*a}", "foo bar", Arrays.asList("bar"), Arrays.asList("foo")); + assertMatch("%{*a} %{&a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("foo", "baz"), Arrays.asList("bar", "lol")); + assertMatch("%{*a} %{&a} %{c} %{*b} %{&b}", "foo bar x baz lol", + Arrays.asList("foo", "baz", "c"), Arrays.asList("bar", "lol", "x")); + assertBadPattern("%{*a} %{a}"); + assertBadPattern("%{a} %{&a}"); + assertMiss("%{*a} %{&a} {a} %{*b} %{&b}", "foo bar x baz lol"); + } + + public void testAppendAndAssociate() { + assertMatch("%{a} %{+a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("a", "baz"), Arrays.asList("foobar", "lol")); + assertMatch("%{a->} %{+a/2} %{+a/1} %{*b} %{&b}", "foo bar baz lol x", + Arrays.asList("a", "lol"), Arrays.asList("foobazbar", "x")); + } + + public void testEmptyKey() { + assertMatch("%{} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{a} %{}", "foo bar", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{->} %{b}", " bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{a} %{->}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo")); + } + + public void testNamedSkipKey() { + assertMatch("%{?foo} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{?} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{a} %{?bar}", "foo bar", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{?foo->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{?->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{?foo->} %{b}", " bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{a} %{->?bar}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a} %{?skipme} %{?skipme}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a} %{?} %{?}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo")); + } + + public void testConsecutiveDelimiters() { + //leading + assertMatch("%{->},%{a}", ",,,,,foo", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a->},%{b}", ",,,,,foo", Arrays.asList("a", "b"), Arrays.asList("", "foo")); + //trailing + assertMatch("%{a->},", "foo,,,,,", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a} %{b},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a} %{b->},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + //middle + assertMatch("%{a->},%{b}", "foo,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a->}x%{b}", "fooxxxxxbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a->} xyz%{b}", "foo xyz xyz xyz xyz xyzbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + //skipped with empty values + assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "")); + assertMatch("%{a},%{b},%{c},%{d}", "foo,,bar,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "bar", "baz")); + assertMatch("%{a},%{b},%{c},%{d}", "foo,,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "baz")); + assertMatch("%{a},%{b},%{c},%{d}", ",bar,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("", "bar", "", "baz")); + assertMatch("%{->},%{a->},%{b}", ",,,bar,,baz", Arrays.asList("a", "b"), Arrays.asList("bar", "baz")); + } + + public void testAppendWithConsecutiveDelimiters() { + assertMatch("%{+a/1},%{+a/3}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", "")); + assertMatch("%{+a/1},%{+a/3->}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobazbar", "lol")); + } + + public void testSkipRightPadding() { + assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{->} %{a}", "foo bar", Arrays.asList("a"), Arrays.asList("bar")); + assertMatch("%{a->} %{+a->} %{*b->} %{&b->} %{c}", "foo bar baz lol x", + Arrays.asList("a", "baz", "c"), Arrays.asList("foobar", "lol", "x")); + } + + public void testTrimmedEnd() { + assertMatch("%{a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a} %{b->} ", "foo bar ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + //only whitespace is trimmed in the absence of trailing characters + assertMatch("%{a} %{b->}", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar,,,,,,")); + //consecutive delimiters + right padding can be used to skip over the trailing delimiters + assertMatch("%{a} %{b->},", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + } + + public void testLeadingDelimiter() { + assertMatch(",,,%{a} %{b}", ",,,foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch(",%{a} %{b}", ",,foo bar", Arrays.asList("a", "b"), Arrays.asList(",foo", "bar")); + } + + /** + * Runtime errors + */ + public void testMiss() { + assertMiss("%{a}%{b}", "foo"); + assertMiss("%{a},%{b}", "foo bar"); + assertMiss("%{a}, %{b}", "foo,bar"); + assertMiss("x%{a},%{b}", "foo,bar"); + assertMiss("x%{},%{b}", "foo,bar"); + assertMiss("leading_delimiter_long%{a}", "foo"); + assertMiss("%{a}trailing_delimiter_long", "foo"); + assertMiss("leading_delimiter_long%{a}trailing_delimiter_long", "foo"); + assertMiss("%{a}x", "foo"); + assertMiss("%{a},%{b}x", "foo,bar"); + } + + /** + * Construction errors + */ + public void testBadPatternOrKey() { + assertBadPattern(""); + assertBadPattern("{}"); + assertBadPattern("%{*a} %{&b}"); + assertBadKey("%{*}"); + assertBadKey("%{++}"); + } + + public void testSyslog() { + assertMatch("%{timestamp} %{+timestamp} %{+timestamp} %{logsource} %{program}[%{pid}]: %{message}", + "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]", + Arrays.asList("timestamp", "logsource", "program", "pid", "message"), + Arrays.asList("Mar 16 00:01:25", "evita", "postfix/smtpd", "1713", "connect from camomile.cloud9.net[168.100.1.3]"), " "); + } + + public void testApacheLog() { + assertMatch("%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{response} %{bytes}" + + " \"%{referrer}\" \"%{agent}\" %{->}", + "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " + + "\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"", + Arrays.asList("clientip", "ident", "auth", "timestamp", "verb", "request", "httpversion", "response", "bytes", + "referrer", "agent"), + Arrays.asList("31.184.238.164", "-", "-", "24/Jul/2014:05:35:37 +0530", "GET", "/logs/access.log", "1.0", "200", "69849", + "http://8rursodiol.enjin.com", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36" + + " (KHTML, like Gecko) Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36")); + } + + /** + * Shared specification between Beats, Logstash, and Ingest node + */ + public void testJsonSpecification() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + JsonNode rootNode = mapper.readTree(this.getClass().getResourceAsStream("/specification/tests.json")); + Iterator tests = rootNode.elements(); + while (tests.hasNext()) { + JsonNode test = tests.next(); + boolean skip = test.path("skip").asBoolean(); + if (!skip) { + String name = test.path("name").asText(); + logger.debug("Running Json specification: " + name); + String pattern = test.path("tok").asText(); + String input = test.path("msg").asText(); + String append = test.path("append").asText(); + boolean fail = test.path("fail").asBoolean(); + Iterator> expected = test.path("expected").fields(); + List expectedKeys = new ArrayList<>(); + List expectedValues = new ArrayList<>(); + expected.forEachRemaining(entry -> { + expectedKeys.add(entry.getKey()); + expectedValues.add(entry.getValue().asText()); + }); + if (fail) { + assertFail(pattern, input); + } else { + assertMatch(pattern, input, expectedKeys, expectedValues, append); + } + } + } + } + + private DissectException assertFail(String pattern, String input){ + return expectThrows(DissectException.class, () -> new DissectParser(pattern, null).parse(input)); + } + + private void assertMiss(String pattern, String input) { + DissectException e = assertFail(pattern, input); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to find match for dissect pattern")); + assertThat(e.getMessage(), CoreMatchers.containsString(pattern)); + assertThat(e.getMessage(), input == null ? CoreMatchers.containsString("null") : CoreMatchers.containsString(input)); + } + + private void assertBadPattern(String pattern) { + DissectException e = assertFail(pattern, null); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse pattern")); + assertThat(e.getMessage(), CoreMatchers.containsString(pattern)); + } + + private void assertBadKey(String pattern, String key) { + DissectException e = assertFail(pattern, null); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); + assertThat(e.getMessage(), CoreMatchers.containsString(key)); + } + + private void assertBadKey(String pattern) { + assertBadKey(pattern, pattern.replace("%{", "").replace("}", "")); + } + + private void assertMatch(String pattern, String input, List expectedKeys, List expectedValues) { + assertMatch(pattern, input, expectedKeys, expectedValues, null); + } + + private void assertMatch(String pattern, String input, List expectedKeys, List expectedValues, String appendSeperator) { + Map results = new DissectParser(pattern, appendSeperator).parse(input); + List foundKeys = new ArrayList<>(results.keySet()); + List foundValues = new ArrayList<>(results.values()); + Collections.sort(foundKeys); + Collections.sort(foundValues); + Collections.sort(expectedKeys); + Collections.sort(expectedValues); + assertThat(foundKeys, Matchers.equalTo(expectedKeys)); + assertThat(foundValues, Matchers.equalTo(expectedValues)); + } +} diff --git a/libs/dissect/src/test/resources/specification/tests.json b/libs/dissect/src/test/resources/specification/tests.json new file mode 100644 index 00000000000..1cb85ce6519 --- /dev/null +++ b/libs/dissect/src/test/resources/specification/tests.json @@ -0,0 +1,363 @@ +[ + { + "name": "When all the defined fields are captured by we have remaining data", + "tok": "level=%{level} ts=%{timestamp} caller=%{caller} msg=\"%{message}\"", + "msg": "level=info ts=2018-06-27T17:19:13.036579993Z caller=main.go:222 msg=\"Starting OK\" version=\"(version=2.3.1, branch=HEAD, revision=188ca45bd85ce843071e768d855722a9d9dabe03)\"}", + "expected": { + "caller": "main.go:222", + "level": "info", + "message": "Starting OK", + "timestamp": "2018-06-27T17:19:13.036579993Z" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "Complex stack trace", + "tok": "%{day}-%{month}-%{year} %{hour} %{severity} [%{thread_id}] %{origin} %{message}", + "msg": "18-Apr-2018 06:53:20.411 INFO [http-nio-8080-exec-1] org.apache.coyote.http11.Http11Processor.service Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n at java.lang.Thread.run(Thread.java:748)", + "expected": { + "day": "18", + "hour": "06:53:20.411", + "message": "Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n at java.lang.Thread.run(Thread.java:748)", + "month": "Apr", + "origin": "org.apache.coyote.http11.Http11Processor.service", + "severity": "INFO", + "thread_id": "http-nio-8080-exec-1", + "year": "2018" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "success when delimiter found at the beginning and end of the string", + "tok": "/var/log/%{key}.log", + "msg": "/var/log/foobar.log", + "expected": { + "key": "foobar" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "fails when delimiter is not found at the beginning of the string", + "tok": "/var/log/%{key}.log", + "msg": "foobar", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "fails when delimiter is not found after the key", + "tok": "/var/log/%{key}.log", + "msg": "/var/log/foobar", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "simple dissect", + "tok": "%{key}", + "msg": "foobar", + "expected": { + "key": "foobar" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "dissect two replacement", + "tok": "%{key1} %{key2}", + "msg": "foo bar", + "expected": { + "key1": "foo", + "key2": "bar" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "fail on partial match", + "tok": "%{key1} %{key2} %{key3}", + "msg": "foo bar", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "one level dissect not end of string", + "tok": "/var/%{key}/log", + "msg": "/var/foobar/log", + "expected": { + "key": "foobar" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "one level dissect", + "tok": "/var/%{key}", + "msg": "/var/foobar/log", + "expected": { + "key": "foobar/log" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "multiple keys dissect end of string", + "tok": "/var/%{key}/log/%{key1}", + "msg": "/var/foobar/log/apache", + "expected": { + "key": "foobar", + "key1": "apache" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "multiple keys not end of string", + "tok": "/var/%{key}/log/%{key1}.log", + "msg": "/var/foobar/log/apache.log", + "expected": { + "key": "foobar", + "key1": "apache" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "append with order", + "tok": "%{+key/3} %{+key/1} %{+key/2}", + "msg": "1 2 3", + "expected": { + "key": "231" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "append with order and separator", + "tok": "%{+key/3} %{+key/1} %{+key/2}", + "msg": "1 2 3", + "expected": { + "key": "2::3::1" + }, + "skip": false, + "fail": false, + "append": "::" + }, + { + "name": "append with order and right padding", + "tok": "%{+key/3} %{+key/1-\u003e} %{+key/2}", + "msg": "1 2 3", + "expected": { + "key": "231" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "simple append", + "tok": "%{key}-%{+key}-%{+key}", + "msg": "1-2-3", + "expected": { + "key": "123" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "simple append with separator", + "tok": "%{key}-%{+key}-%{+key}", + "msg": "1-2-3", + "expected": { + "key": "1,2,3" + }, + "skip": false, + "fail": false, + "append": "," + }, + { + "name": "reference field", + "tok": "%{*key} %{\u0026key}", + "msg": "hello world", + "expected": { + "hello": "world" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "reference field alt order", + "tok": "%{\u0026key} %{*key}", + "msg": "hello world", + "expected": { + "world": "hello" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "nameless skip field", + "tok": "%{} %{key}", + "msg": "hello world", + "expected": { + "key": "world" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "named skip field", + "tok": "%{?skipme} %{key}", + "msg": "hello world", + "expected": { + "key": "world" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "reference without pairing", + "tok": "%{key} %{\u0026key}", + "msg": "hello world", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "missing fields (consecutive delimiters)", + "tok": "%{name},%{addr1},%{addr2},%{addr3},%{city},%{zip}", + "msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432", + "expected": { + "addr1": "4321 Fifth Avenue", + "addr2": "", + "addr3": "", + "city": "New York", + "name": "Jane Doe", + "zip": "87432" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "missing fields with right padding (consecutive delimiters)", + "tok": "%{name},%{addr1-\u003e},%{city},%{zip}", + "msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432", + "expected": { + "addr1": "4321 Fifth Avenue", + "city": "New York", + "name": "Jane Doe", + "zip": "87432" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "ignore right padding", + "tok": "%{id} %{function-\u003e} %{server}", + "msg": "00000043 ViewReceive machine-321", + "expected": { + "function": "ViewReceive", + "id": "00000043", + "server": "machine-321" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "padding on the last key need a delimiter", + "tok": "%{id} %{function} %{server-\u003e} ", + "msg": "00000043 ViewReceive machine-321 ", + "expected": { + "function": "ViewReceive", + "id": "00000043", + "server": "machine-321" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "ignore left padding", + "tok": "%{id-\u003e} %{function} %{server}", + "msg": "00000043 ViewReceive machine-321", + "expected": { + "function": "ViewReceive", + "id": "00000043", + "server": "machine-321" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "when the delimiters contains `{` and `}`", + "tok": "{%{a}}{%{b}} %{rest}", + "msg": "{c}{d} anything", + "expected": { + "a": "c", + "b": "d", + "rest": "anything" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "no keys defined", + "tok": "anything", + "msg": "anything", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "invalid key", + "tok": "%{some?thing}", + "msg": "anything", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "matches non-ascii", + "tok": "%{a}࿏%{b} %{c}", + "msg": "⟳༒࿏༒⟲ 子", + "expected": { + "a": "⟳༒", + "b": "༒⟲", + "c": "子" + }, + "skip": false, + "fail": false, + "append": "" + } + +] \ No newline at end of file diff --git a/modules/lang-painless/spi/src/main/java/org/elasticsearch/painless/spi/WhitelistLoader.java b/modules/lang-painless/spi/src/main/java/org/elasticsearch/painless/spi/WhitelistLoader.java index c59023b8622..a4a0076626a 100644 --- a/modules/lang-painless/spi/src/main/java/org/elasticsearch/painless/spi/WhitelistLoader.java +++ b/modules/lang-painless/spi/src/main/java/org/elasticsearch/painless/spi/WhitelistLoader.java @@ -53,7 +53,7 @@ public final class WhitelistLoader { * a Painless type name with the exception that any dollar symbols used as part of inner classes will * be replaced with dot symbols. *
  • short Java type name - The text after the final dot symbol of any specified Java class. A - * short type Java name may be excluded by using the 'only_fqn' token during Painless class parsing + * short type Java name may be excluded by using the 'no_import' token during Painless class parsing * as described later.
  • * * @@ -65,7 +65,7 @@ public final class WhitelistLoader { *
  • Primitive types may be specified starting with 'class' and followed by the Java type name, * an opening bracket, a newline, a closing bracket, and a final newline.
  • *
  • Complex types may be specified starting with 'class' and followed the fully-qualified Java - * class name, optionally followed by an 'only_fqn' token, an opening bracket, a newline, + * class name, optionally followed by an 'no_import' token, an opening bracket, a newline, * constructor/method/field specifications, a closing bracket, and a final newline. Within a complex * type the following may be parsed: *