EQL: [Tests] Add correctness integration tests (#63644) (#63735)

Add a new gradle module under eql/qa which runs and validates a set of
queries over a 4m event dataset (restored from a snapshot residing in a
gcs bucket). The results are providing by running the exact set of queries
with Python EQL against the same dataset.

Co-authored-by: Marios Trivyzas <matriv@users.noreply.github.com>
(cherry picked from commit 1cf789e5fcfb0f364f665bfaac021e24a4c2f556)

Co-authored-by: Mark Vieira <portugee@gmail.com>
This commit is contained in:
Marios Trivyzas 2020-10-15 15:28:26 +02:00 committed by GitHub
parent eb3ae9a204
commit 095f979060
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1147 additions and 0 deletions

View File

@ -0,0 +1,68 @@
## ES EQL Integration correctness tests
### Description
Python EQL runs a series of queries against a specific dataset and the output of those queries (including results and
timing) becomes the `queries.toml` file of this module.
The dataset is stored as a snapshot on a bucket in gcs. This module starts up an ES node, restores these data, executes
the queries and asserts the results that are provided along with the query statement in the `queries.toml` file.
### Running the tests
To be able to run the tests locally, one should set the environmental variable `eql_test_credentials_file` pointing to
a local file holding the service account credentials which allow access to the gcs bucket where the dataset resides.
E.g.:
```shell script
export eql_test_credentials_file=/Users/username/credentials.gcs.json
```
To run the tests you can issue:
```shell script
./gradlew -p x-pack/plugin/eql/qa/correctness check
```
or simply run:
```shell script
./gradlew -p x-pack/plugin/eql check
```
**If the `eql_test_credentials_file` environmental variable is not set the correctness tests will not be executed.**
*For every query you will get an `INFO` line logged that shows the response time for the query, e.g.:*
```
org.elasticsearch.xpack.eql.EsEQLCorrectnessIT > test {2} STANDARD_OUT
[2020-10-15T11:55:02,870][INFO ][o.e.x.e.EsEQLCorrectnessIT] [test] [2] before test
[2020-10-15T11:55:03,070][INFO ][o.e.x.e.EsEQLCorrectnessIT] [test] QueryNo: 2, took: 169ms
[2020-10-15T11:55:03,083][INFO ][o.e.x.e.EsEQLCorrectnessIT] [test] [2] after test
```
*At the end of a successful run an `INFO` line is logged by the tests that shows the total response time for all the
queries executed, e.g.:*
```
[2020-10-15T06:39:55,826][INFO ][o.e.x.e.EsEQLCorrectnessIT] [suite] Total time: 24563 ms
```
#### Run a specific query
If one wants to run just one query from the set, needs to do it with following command by replacing `<queryNo>` (which
can be found in queries.toml file) with the desired number of the query:
```shell script
./gradlew ':x-pack:plugin:eql:qa:correctness:javaRestTest' --tests "org.elasticsearch.xpack.eql.EsEQLCorrectnessIT.test {<queryNo>}"
```
#### Debug queries
If one wants to check that the filtering subqueries of a sequence query yields the same results (to pinpoint that the
possible failure is in the sequence algortihm), needs to enable this debug mode with the use of a parameter:
```shell script
./gradlew -p x-pack/plugin/eql/qa/correctness check -Dtests.eql_correctness_debug=true
```
or
```shell script
./gradlew ':x-pack:plugin:eql:qa:correctness:javaRestTest' --tests "org.elasticsearch.xpack.eql.EsEQLCorrectnessIT.test {<queryNo>}" -Dtests.eql_correctness_debug=true
```

View File

@ -0,0 +1,38 @@
apply plugin: 'elasticsearch.java-rest-test'
apply plugin: 'elasticsearch.build'
test.enabled = false
restResources {
restApi {
includeCore '_common', 'bulk', 'indices', 'snapshot'
includeXpack 'eql', 'indices'
}
}
dependencies {
javaRestTestImplementation project(':test:framework')
javaRestTestImplementation project(path: xpackModule('core'), configuration: 'default')
javaRestTestImplementation project(path: xpackModule('core'), configuration: 'testArtifacts')
javaRestTestImplementation project(xpackModule('ql:test'))
javaRestTestImplementation 'io.ous:jtoml:2.0.0'
}
File serviceAccountFile = (System.getenv("eql_test_credentials_file") ?: System.getProperty("eql.test.credentials.file")) as File
testClusters.all {
plugin ':plugins:repository-gcs'
if (serviceAccountFile) {
keystore 'gcs.client.eql_test.credentials_file', serviceAccountFile
}
testDistribution = 'DEFAULT'
setting 'xpack.license.self_generated.type', 'basic'
jvmArgs '-Xms4g', '-Xmx4g'
}
tasks.named('javaRestTest').configure {
onlyIf { serviceAccountFile }
testLogging {
showStandardStreams = true
}
}

View File

@ -0,0 +1,102 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql;
import java.util.Objects;
public class EqlSpec {
private int queryNo;
private String query;
private long seqCount;
private long[] expectedEventIds;
private long[] filterCounts;
private String[] filters;
private double time;
public int queryNo() {
return queryNo;
}
public void queryNo(int queryNo) {
this.queryNo = queryNo;
}
public String query() {
return query;
}
public void query(String query) {
this.query = query;
}
public long seqCount() {
return seqCount;
}
public void seqCount(long seqCount) {
this.seqCount = seqCount;
}
public long[] expectedEventIds() {
return expectedEventIds;
}
public void expectedEventIds(long[] expectedEventIds) {
this.expectedEventIds = expectedEventIds;
}
public long[] filterCounts() {
return filterCounts;
}
public void filterCounts(long[] filterCounts) {
this.filterCounts = filterCounts;
}
public String[] filters() {
return filters;
}
public void filters(String[] filters) {
this.filters = filters;
}
public double time() {
return time;
}
public void time(double time) {
this.time = time;
}
public EqlSpec(int queryNo) {
this.queryNo = queryNo;
}
@Override
public String toString() {
return queryNo + "";
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
EqlSpec eqlSpec = (EqlSpec) o;
return queryNo == eqlSpec.queryNo;
}
@Override
public int hashCode() {
return Objects.hash(queryNo);
}
}

View File

@ -0,0 +1,89 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql;
import io.ous.jtoml.JToml;
import io.ous.jtoml.Toml;
import io.ous.jtoml.TomlTable;
import org.elasticsearch.common.Strings;
import java.io.InputStream;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
public class EqlSpecLoader {
private static void validateAndAddSpec(Set<EqlSpec> specs, EqlSpec spec) {
if (Strings.isNullOrEmpty(spec.query())) {
throw new IllegalArgumentException("Read a test without a query value");
}
if (specs.contains(spec)) {
throw new IllegalArgumentException("Read a test query with the same queryNo");
}
specs.add(spec);
}
private static String getTrimmedString(TomlTable table, String key) {
String s = table.getString(key);
if (s != null) {
return s.trim();
}
return null;
}
public static Collection<EqlSpec> readFromStream(InputStream is) throws Exception {
Set<EqlSpec> testSpecs = new LinkedHashSet<>();
EqlSpec spec;
Toml toml = JToml.parse(is);
List<TomlTable> queries = toml.getArrayTable("queries");
for (TomlTable table : queries) {
spec = new EqlSpec(table.getLong("queryNo").intValue());
spec.seqCount(table.getLong("count"));
List<?> arr = table.getList("expected_event_ids");
if (arr != null) {
long expectedEventIds[] = new long[arr.size()];
int i = 0;
for (Object obj : arr) {
expectedEventIds[i++] = (Long) obj;
}
spec.expectedEventIds(expectedEventIds);
}
arr = table.getList("filter_counts");
if (arr != null) {
long filterCounts[] = new long[arr.size()];
int i = 0;
for (Object obj : arr) {
filterCounts[i++] = (Long) obj;
}
spec.filterCounts(filterCounts);
}
arr = table.getList("filters");
if (arr != null) {
String filters[] = new String[arr.size()];
int i = 0;
for (Object obj : arr) {
filters[i++] = (String) obj;
}
spec.filters(filters);
}
spec.query(getTrimmedString(table, "query"));
spec.time(table.getDouble("time"));
validateAndAddSpec(testSpecs, spec);
}
return testSpecs;
}
}

View File

@ -0,0 +1,213 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
import org.apache.http.HttpHost;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.util.TimeUnits;
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequest;
import org.elasticsearch.client.HttpAsyncResponseConsumerFactory;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.eql.EqlSearchRequest;
import org.elasticsearch.client.eql.EqlSearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import static org.elasticsearch.xpack.ql.TestUtils.assertNoSearchContexts;
@TimeoutSuite(millis = 30 * TimeUnits.MINUTE)
@TestLogging(value = "org.elasticsearch.xpack.eql.EsEQLCorrectnessIT:INFO", reason = "Log query execution time")
public class EsEQLCorrectnessIT extends ESRestTestCase {
private static final String PARAM_FORMATTING = "%1$s";
private static final String QUERIES_FILENAME = "queries.toml";
private static final String PROPERTIES_FILENAME = "config.properties";
private static Properties CFG;
private static RestHighLevelClient highLevelClient;
private static RequestOptions COMMON_REQUEST_OPTIONS;
private static long totalTime = 0;
private static final Logger LOGGER = LogManager.getLogger(EsEQLCorrectnessIT.class);
@BeforeClass
public static void init() throws IOException {
try (InputStream is = EsEQLCorrectnessIT.class.getClassLoader().getResourceAsStream(PROPERTIES_FILENAME)) {
CFG = new Properties();
CFG.load(is);
}
RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
builder.setHttpAsyncResponseConsumerFactory(
new HttpAsyncResponseConsumerFactory.HeapBufferedResponseConsumerFactory(1000 * 1024 * 1024)
);
COMMON_REQUEST_OPTIONS = builder.build();
}
@Before
public void restoreDataFromGcsRepo() throws Exception {
if (client().performRequest(new Request("HEAD", "/" + CFG.getProperty("index_name"))).getStatusLine().getStatusCode() == 404) {
highLevelClient().snapshot()
.createRepository(
new PutRepositoryRequest(CFG.getProperty("gcs_repo_name")).type("gcs")
.settings(
Settings.builder()
.put("bucket", CFG.getProperty("gcs_bucket_name"))
.put("base_path", CFG.getProperty("gcs_base_path"))
.put("client", CFG.getProperty("gcs_client_name"))
.build()
),
RequestOptions.DEFAULT
);
highLevelClient().snapshot()
.restore(
new RestoreSnapshotRequest(CFG.getProperty("gcs_repo_name"), CFG.getProperty("gcs_snapshot_name")).waitForCompletion(
true
),
RequestOptions.DEFAULT
);
}
}
@After
public void checkSearchContent() throws Exception {
assertNoSearchContexts(client());
}
@AfterClass
public static void logTotalExecutionTime() {
LOGGER.info("Total time: {} ms", totalTime);
}
@AfterClass
public static void wipeTestData() throws IOException {
try {
adminClient().performRequest(new Request("DELETE", "/*"));
} catch (ResponseException e) {
// 404 here just means we had no indexes
if (e.getResponse().getStatusLine().getStatusCode() != 404) {
throw e;
}
}
}
@Override
protected boolean preserveClusterUponCompletion() {
// Need to preserve data between parameterized tests runs
return true;
}
@Override
protected RestClient buildClient(Settings settings, HttpHost[] hosts) throws IOException {
RestClientBuilder builder = RestClient.builder(hosts);
configureClient(builder, settings);
builder.setRequestConfigCallback(
requestConfigBuilder -> requestConfigBuilder.setConnectTimeout(30000000)
.setConnectionRequestTimeout(30000000)
.setSocketTimeout(30000000)
);
builder.setStrictDeprecationMode(true);
return builder.build();
}
private final EqlSpec spec;
public EsEQLCorrectnessIT(EqlSpec spec) {
this.spec = spec;
}
private RestHighLevelClient highLevelClient() {
if (highLevelClient == null) {
highLevelClient = new RestHighLevelClient(client(), ignore -> {}, Collections.emptyList()) {
};
}
return highLevelClient;
}
@ParametersFactory(shuffle = false, argumentFormatting = PARAM_FORMATTING)
public static Iterable<Object[]> parameters() throws Exception {
Collection<EqlSpec> specs;
try (InputStream is = EsEQLCorrectnessIT.class.getClassLoader().getResourceAsStream(QUERIES_FILENAME)) {
specs = EqlSpecLoader.readFromStream(is);
}
assertFalse("Found 0 queries for testing", specs.isEmpty());
List<Object[]> params = new ArrayList<>(specs.size());
for (EqlSpec spec : specs) {
params.add(new Object[] { spec });
}
return params;
}
// To enable test of subqueries (filtering) results: -Dtests.eql_correctness_debug=true
public void test() throws Exception {
boolean debugMode = Boolean.parseBoolean(System.getProperty("tests.eql_correctness_debug", "false"));
int queryNo = spec.queryNo();
if (debugMode) {
for (int i = 0; i < spec.filters().length; i++) {
String filterQuery = spec.filters()[i];
EqlSearchRequest eqlSearchRequest = new EqlSearchRequest(CFG.getProperty("index_name"), filterQuery);
eqlSearchRequest.eventCategoryField("event_type");
eqlSearchRequest.size(100000);
EqlSearchResponse response = highLevelClient().eql().search(eqlSearchRequest, COMMON_REQUEST_OPTIONS);
assertEquals(
"Failed to match filter counts for query No: " + queryNo + " filterCount: " + i,
spec.filterCounts()[i],
response.hits().events().size()
);
}
}
EqlSearchRequest eqlSearchRequest = new EqlSearchRequest(CFG.getProperty("index_name"), spec.query());
eqlSearchRequest.eventCategoryField("event_type");
eqlSearchRequest.tiebreakerField("serial_id");
eqlSearchRequest.size(Integer.parseInt(CFG.getProperty("size")));
eqlSearchRequest.fetchSize(Integer.parseInt(CFG.getProperty("fetch_size")));
EqlSearchResponse response = highLevelClient().eql().search(eqlSearchRequest, RequestOptions.DEFAULT);
long responseTime = response.took();
LOGGER.info("QueryNo: {}, took: {}ms", queryNo, responseTime);
totalTime += responseTime;
assertEquals(
"Failed to match sequence count for query No: " + queryNo + " : " + spec.query() + System.lineSeparator(),
spec.seqCount(),
response.hits().sequences().size()
);
int expectedEvenIdIdx = 0;
for (EqlSearchResponse.Sequence seq : response.hits().sequences()) {
for (EqlSearchResponse.Event event : seq.events()) {
assertEquals(
"Failed to match event ids for query No: " + queryNo + " : " + spec.query() + System.lineSeparator(),
spec.expectedEventIds()[expectedEvenIdIdx++],
((Integer) event.sourceAsMap().get("serial_id")).longValue()
);
}
}
}
}

View File

@ -0,0 +1,14 @@
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License;
# you may not use this file except in compliance with the Elastic License.
#
index_name=mitre
fetch_size=1000
size=2000
gcs_repo_name=eql_correctness_gcs_repo
gcs_snapshot_name=mitre-snapshot
gcs_bucket_name=matriv-gcs
gcs_base_path=mitre-data
gcs_client_name=eql_test

File diff suppressed because one or more lines are too long