EQL: transform query AST into queryDSL (#52432)

(cherry picked from commit 94cef29df259319dfe2a3bf92d3f1a42d7e45781)
This commit is contained in:
Costin Leau 2020-02-25 17:43:26 +02:00 committed by Costin Leau
parent 638f3e4183
commit a8911802d3
8 changed files with 462 additions and 8 deletions

View File

@ -0,0 +1,74 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql.execution.search;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.StoredFieldsContext;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.xpack.eql.querydsl.container.QueryContainer;
import org.elasticsearch.xpack.ql.execution.search.QlSourceBuilder;
import java.util.List;
import static java.util.Collections.singletonList;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
public abstract class SourceGenerator {
private SourceGenerator() {}
private static final List<String> NO_STORED_FIELD = singletonList(StoredFieldsContext._NONE_);
public static SearchSourceBuilder sourceBuilder(QueryContainer container, QueryBuilder filter, Integer size) {
QueryBuilder finalQuery = null;
// add the source
if (container.query() != null) {
if (filter != null) {
finalQuery = boolQuery().must(container.query().asBuilder()).filter(filter);
} else {
finalQuery = container.query().asBuilder();
}
} else {
if (filter != null) {
finalQuery = boolQuery().filter(filter);
}
}
final SearchSourceBuilder source = new SearchSourceBuilder();
source.query(finalQuery);
QlSourceBuilder sortBuilder = new QlSourceBuilder();
// Iterate through all the columns requested, collecting the fields that
// need to be retrieved from the result documents
// NB: the sortBuilder takes care of eliminating duplicates
container.fields().forEach(f -> f.v1().collectFields(sortBuilder));
sortBuilder.build(source);
optimize(sortBuilder, source);
return source;
}
private static void optimize(QlSourceBuilder qlSource, SearchSourceBuilder builder) {
if (qlSource.noSource()) {
disableSource(builder);
}
}
private static void optimize(QueryContainer query, SearchSourceBuilder builder) {
if (query.shouldTrackHits()) {
builder.trackTotalHits(true);
}
}
private static void disableSource(SearchSourceBuilder builder) {
builder.fetchSource(FetchSourceContext.DO_NOT_FETCH_SOURCE);
if (builder.storedFields() == null) {
builder.storedFields(NO_STORED_FIELD);
}
}
}

View File

@ -77,4 +77,8 @@ public class EsQueryExec extends LeafExec {
public String nodeString() {
return nodeName() + "[" + index + "," + queryContainer + "]";
}
public QueryContainer queryContainer() {
return queryContainer;
}
}

View File

@ -6,10 +6,17 @@
package org.elasticsearch.xpack.eql.planner;
import org.elasticsearch.xpack.eql.plan.physical.EsQueryExec;
import org.elasticsearch.xpack.eql.plan.physical.FilterExec;
import org.elasticsearch.xpack.eql.plan.physical.PhysicalPlan;
import org.elasticsearch.xpack.eql.querydsl.container.QueryContainer;
import org.elasticsearch.xpack.ql.expression.Attribute;
import org.elasticsearch.xpack.ql.planner.ExpressionTranslators;
import org.elasticsearch.xpack.ql.querydsl.query.Query;
import org.elasticsearch.xpack.ql.rule.Rule;
import org.elasticsearch.xpack.ql.rule.RuleExecutor;
import static java.util.Collections.emptyList;
import java.util.Arrays;
class QueryFolder extends RuleExecutor<PhysicalPlan> {
@ -19,6 +26,59 @@ class QueryFolder extends RuleExecutor<PhysicalPlan> {
@Override
protected Iterable<RuleExecutor<PhysicalPlan>.Batch> batches() {
return emptyList();
Batch fold = new Batch("Fold queries",
new FoldFilter()
);
Batch finish = new Batch("Finish query", Limiter.ONCE,
new PlanOutputToQueryRef()
);
return Arrays.asList(fold, finish);
}
private static class FoldFilter extends FoldingRule<FilterExec> {
@Override
protected PhysicalPlan rule(FilterExec plan) {
if (plan.child() instanceof EsQueryExec) {
EsQueryExec exec = (EsQueryExec) plan.child();
QueryContainer qContainer = exec.queryContainer();
Query query = ExpressionTranslators.toQuery(plan.condition());
if (qContainer.query() != null || query != null) {
query = ExpressionTranslators.and(plan.source(), qContainer.query(), query);
}
qContainer = qContainer.with(query);
return exec.with(qContainer);
}
return plan;
}
}
private static class PlanOutputToQueryRef extends FoldingRule<EsQueryExec> {
@Override
protected PhysicalPlan rule(EsQueryExec exec) {
QueryContainer qContainer = exec.queryContainer();
for (Attribute attr : exec.output()) {
qContainer = qContainer.addColumn(attr);
}
// after all attributes have been resolved
return exec.with(qContainer);
}
}
abstract static class FoldingRule<SubPlan extends PhysicalPlan> extends Rule<SubPlan, PhysicalPlan> {
@Override
public final PhysicalPlan apply(PhysicalPlan plan) {
return plan.transformUp(this::rule, typeToken());
}
@Override
protected abstract PhysicalPlan rule(SubPlan plan);
}
}

View File

@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql.querydsl.container;
import org.elasticsearch.xpack.ql.execution.search.FieldExtraction;
import org.elasticsearch.xpack.ql.execution.search.QlSourceBuilder;
import org.elasticsearch.xpack.ql.expression.gen.pipeline.Pipe;
public class ComputedRef implements FieldExtraction {
private final Pipe processor;
public ComputedRef(Pipe processor) {
this.processor = processor;
}
public Pipe processor() {
return processor;
}
@Override
public boolean supportedByAggsOnlyQuery() {
return processor.supportedByAggsOnlyQuery();
}
@Override
public void collectFields(QlSourceBuilder sourceBuilder) {
processor.collectFields(sourceBuilder);
}
@Override
public String toString() {
return processor + "(" + processor + ")";
}
}

View File

@ -5,9 +5,174 @@
*/
package org.elasticsearch.xpack.eql.querydsl.container;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.eql.EqlIllegalArgumentException;
import org.elasticsearch.xpack.eql.execution.search.SourceGenerator;
import org.elasticsearch.xpack.ql.execution.search.FieldExtraction;
import org.elasticsearch.xpack.ql.expression.Attribute;
import org.elasticsearch.xpack.ql.expression.AttributeMap;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.Expressions;
import org.elasticsearch.xpack.ql.expression.FieldAttribute;
import org.elasticsearch.xpack.ql.expression.gen.pipeline.ConstantInput;
import org.elasticsearch.xpack.ql.querydsl.query.Query;
import org.elasticsearch.xpack.ql.type.DataTypes;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import static java.util.Collections.emptyList;
import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine;
public class QueryContainer {
private final Query query;
// attributes found in the tree
private final AttributeMap<Expression> attributes;
// list of fields available in the output
private final List<Tuple<FieldExtraction, String>> fields;
private final boolean trackHits;
private final boolean includeFrozen;
public QueryContainer() {
this(null, emptyList(), AttributeMap.emptyAttributeMap(), false, false);
}
private QueryContainer(Query query, List<Tuple<FieldExtraction, String>> fields, AttributeMap<Expression> attributes, boolean trackHits,
boolean includeFrozen) {
this.query = query;
this.fields = fields;
this.attributes = attributes;
this.trackHits = trackHits;
this.includeFrozen = includeFrozen;
}
public QueryContainer withFrozen() {
throw new UnsupportedOperationException();
}
public Query query() {
return query;
}
public List<Tuple<FieldExtraction, String>> fields() {
return fields;
}
public boolean shouldTrackHits() {
return trackHits;
}
public QueryContainer with(Query q) {
return new QueryContainer(q, fields, attributes, trackHits, includeFrozen);
}
public QueryContainer addColumn(Attribute attr) {
Expression expression = attributes.getOrDefault(attr, attr);
Tuple<QueryContainer, FieldExtraction> tuple = asFieldExtraction(attr);
return tuple.v1().addColumn(tuple.v2(), Expressions.id(expression));
}
private Tuple<QueryContainer, FieldExtraction> asFieldExtraction(Attribute attr) {
// resolve it Expression
Expression expression = attributes.getOrDefault(attr, attr);
if (expression instanceof FieldAttribute) {
FieldAttribute fa = (FieldAttribute) expression;
if (fa.isNested()) {
throw new UnsupportedOperationException("Nested not yet supported");
}
return new Tuple<>(this, topHitFieldRef(fa));
}
if (expression.foldable()) {
return new Tuple<>(this, new ComputedRef(new ConstantInput(expression.source(), expression, expression.fold())));
}
throw new EqlIllegalArgumentException("Unknown output attribute {}", attr);
}
//
// reference methods
//
private FieldExtraction topHitFieldRef(FieldAttribute fieldAttr) {
FieldAttribute actualField = fieldAttr;
FieldAttribute rootField = fieldAttr;
StringBuilder fullFieldName = new StringBuilder(fieldAttr.field().getName());
// Only if the field is not an alias (in which case it will be taken out from docvalue_fields if it's isAggregatable()),
// go up the tree of parents until a non-object (and non-nested) type of field is found and use that specific parent
// as the field to extract data from, from _source. We do it like this because sub-fields are not in the _source, only
// the root field to which those sub-fields belong to, are. Instead of "text_field.keyword_subfield" for _source extraction,
// we use "text_field", because there is no source for "keyword_subfield".
/*
* "text_field": {
* "type": "text",
* "fields": {
* "keyword_subfield": {
* "type": "keyword"
* }
* }
* }
*/
if (fieldAttr.field().isAlias() == false) {
while (actualField.parent() != null
&& actualField.parent().field().getDataType() != DataTypes.OBJECT
&& actualField.parent().field().getDataType() != DataTypes.NESTED
&& actualField.field().getDataType().hasDocValues() == false) {
actualField = actualField.parent();
}
}
while (rootField.parent() != null) {
fullFieldName.insert(0, ".").insert(0, rootField.parent().field().getName());
rootField = rootField.parent();
}
return new SearchHitFieldRef(actualField.name(), fullFieldName.toString(), fieldAttr.field().getDataType(),
fieldAttr.field().isAggregatable(), fieldAttr.field().isAlias());
}
public QueryContainer addColumn(FieldExtraction ref, String id) {
return new QueryContainer(query, combine(fields, new Tuple<>(ref, id)), attributes, trackHits, includeFrozen);
}
@Override
public int hashCode() {
return Objects.hash(query, attributes, fields, trackHits, includeFrozen);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
QueryContainer other = (QueryContainer) obj;
return Objects.equals(query, other.query)
&& Objects.equals(attributes, other.attributes)
&& Objects.equals(fields, other.fields)
&& Objects.equals(trackHits, other.trackHits)
&& Objects.equals(includeFrozen, other.includeFrozen);
}
@Override
public String toString() {
try (XContentBuilder builder = JsonXContent.contentBuilder()) {
builder.humanReadable(true).prettyPrint();
SourceGenerator.sourceBuilder(this, null, null).toXContent(builder, ToXContent.EMPTY_PARAMS);
return Strings.toString(builder);
} catch (IOException e) {
throw new EqlIllegalArgumentException("error rendering", e);
}
}
}

View File

@ -0,0 +1,92 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql.querydsl.container;
import org.elasticsearch.xpack.ql.execution.search.FieldExtraction;
import org.elasticsearch.xpack.ql.execution.search.QlSourceBuilder;
import org.elasticsearch.xpack.ql.type.DataType;
import static org.elasticsearch.xpack.ql.type.DataTypes.DATETIME;
import static org.elasticsearch.xpack.ql.type.DataTypes.KEYWORD;
// NB: this class is taken from SQL - it hasn't been ported over to QL
// since at this stage is unclear whether the whole FieldExtraction infrastructure
// needs porting or just the field extraction
public class SearchHitFieldRef implements FieldExtraction {
private final String name;
private final String fullFieldName; // path included. If field full path is a.b.c, full field name is "a.b.c" and name is "c"
private final DataType dataType;
private final boolean docValue;
private final String hitName;
public SearchHitFieldRef(String name, String fullFieldName, DataType dataType, boolean useDocValueInsteadOfSource, boolean isAlias) {
this(name, fullFieldName, dataType, useDocValueInsteadOfSource, isAlias, null);
}
public SearchHitFieldRef(String name, String fullFieldName, DataType dataType, boolean useDocValueInsteadOfSource, boolean isAlias,
String hitName) {
this.name = name;
this.fullFieldName = fullFieldName;
this.dataType = dataType;
// these field types can only be extracted from docvalue_fields (ie, values already computed by Elasticsearch)
// because, for us to be able to extract them from _source, we would need the mapping of those fields (which we don't have)
this.docValue = isAlias ? useDocValueInsteadOfSource : (hasDocValues(dataType) ? useDocValueInsteadOfSource : false);
this.hitName = hitName;
}
public String hitName() {
return hitName;
}
public String name() {
return name;
}
public String fullFieldName() {
return fullFieldName;
}
public DataType getDataType() {
return dataType;
}
public boolean useDocValue() {
return docValue;
}
@Override
public void collectFields(QlSourceBuilder sourceBuilder) {
// nested fields are handled by inner hits
if (hitName != null) {
return;
}
if (docValue) {
sourceBuilder.addDocField(name, format(dataType));
} else {
sourceBuilder.addSourceField(name);
}
}
@Override
public final boolean supportedByAggsOnlyQuery() {
return false;
}
@Override
public String toString() {
return name;
}
private static boolean hasDocValues(DataType dataType) {
return dataType == KEYWORD || dataType == DATETIME;
}
private static String format(DataType dataType) {
return dataType == DATETIME ? "epoch_millis" : null;
}
}

View File

@ -13,12 +13,14 @@ import org.elasticsearch.xpack.eql.analysis.Verifier;
import org.elasticsearch.xpack.eql.expression.function.EqlFunctionRegistry;
import org.elasticsearch.xpack.eql.optimizer.Optimizer;
import org.elasticsearch.xpack.eql.parser.EqlParser;
import org.elasticsearch.xpack.eql.plan.physical.EsQueryExec;
import org.elasticsearch.xpack.eql.plan.physical.PhysicalPlan;
import org.elasticsearch.xpack.ql.QlClientException;
import org.elasticsearch.xpack.ql.index.EsIndex;
import org.elasticsearch.xpack.ql.index.IndexResolution;
import static org.elasticsearch.xpack.ql.type.DataTypes.KEYWORD;
import static org.elasticsearch.xpack.ql.type.TypesTests.loadMapping;
import static org.hamcrest.Matchers.containsString;
public class QueryFolderTests extends ESTestCase {
@ -39,7 +41,20 @@ public class QueryFolderTests extends ESTestCase {
return plan(index, eql);
}
public void testBasicPlan() throws Exception {
expectThrows(QlClientException.class, "not yet implemented", () -> plan("process where true"));
public void testBasicPlan() {
PhysicalPlan p = plan("process where true");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(22, eqe.output().size());
assertEquals(KEYWORD, eqe.output().get(0).dataType());
String query = eqe.queryContainer().toString().replaceAll("\\s+", "");
// test query term
assertThat(query, containsString("\"term\":{\"event_type\":{\"value\":\"process\""));
// test field source extraction
assertThat(query, containsString("\"_source\":{\"includes\":["));
assertThat(query, containsString("\"pid\""));
// test docvalue extraction
assertThat(query, containsString("{\"field\":\"command_line\"}"));
assertThat(query, containsString("{\"field\":\"timestamp\",\"format\":\"epoch_millis\"}"));
}
}

View File

@ -70,6 +70,10 @@ public final class ExpressionTranslators {
new Scalars()
);
public static Query toQuery(Expression e) {
return toQuery(e, new QlTranslatorHandler());
}
public static Query toQuery(Expression e, TranslatorHandler handler) {
Query translation = null;
for (ExpressionTranslator<?> translator : QUERY_TRANSLATORS) {