SQL: fix COUNT DISTINCT filtering (#37176)
* Use `_count` aggregation value only for not-DISTINCT COUNT function calls * COUNT DISTINCT will use the _exact_ version of a field (the `keyword` sub-field for example), if there is one
This commit is contained in:
parent
b0665963e8
commit
3fad9d25f6
|
@ -110,6 +110,8 @@ aggCountWithAlias
|
|||
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g ORDER BY gender;
|
||||
countDistinct
|
||||
SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
|
||||
countDistinctAndCountSimpleWithAlias
|
||||
SELECT COUNT(*) cnt, COUNT(DISTINCT first_name) as names, gender FROM test_emp GROUP BY gender ORDER BY gender;
|
||||
|
||||
aggCountAliasAndWhereClauseMultiGroupBy
|
||||
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages;
|
||||
|
@ -121,6 +123,8 @@ aggCountWithAliasMultiGroupBy
|
|||
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY gender, languages;
|
||||
aggCountWithAliasMultiGroupByDifferentOrder
|
||||
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY languages ASC, gender DESC;
|
||||
aggCountDistinctWithAliasAndGroupBy
|
||||
SELECT COUNT(*) cnt, COUNT(DISTINCT first_name) as names, gender FROM test_emp GROUP BY gender ORDER BY gender;
|
||||
|
||||
|
||||
|
||||
|
@ -161,12 +165,20 @@ aggCountStarAndHavingBetween
|
|||
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender ASC;
|
||||
aggCountStarAndHavingBetweenWithLimit
|
||||
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender LIMIT 1;
|
||||
aggCountDistinctAndHavingBetweenWithLimit
|
||||
SELECT gender g, COUNT(DISTINCT first_name) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 40 AND 50 ORDER BY gender LIMIT 1;
|
||||
aggCountOnColumnAndHavingOnAliasAndFunction
|
||||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(gender) < 70 ORDER BY gender;
|
||||
aggCountOnColumnAndHavingOnAliasAndFunctionWildcard -> COUNT(*/1) vs COUNT(gender)
|
||||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender;
|
||||
aggCountOnColumnAndHavingOnAliasAndFunctionConstant
|
||||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender;
|
||||
aggDistinctCountWithAliasAndHaving
|
||||
SELECT COUNT(*) c, COUNT(DISTINCT first_name) AS names, gender FROM test_emp GROUP BY gender HAVING names > 40 ORDER BY gender;
|
||||
aggDistinctCountWithFunctionWildcardAndHaving
|
||||
SELECT COUNT(*) c, COUNT(DISTINCT first_name) AS names, gender FROM test_emp GROUP BY gender HAVING names < 50 AND c < 50 ORDER BY gender;
|
||||
aggDistinctCountWithFunctionWildcardAndFunctionConstantAndHaving
|
||||
SELECT COUNT(*) c, COUNT(DISTINCT first_name) AS names, COUNT(123) AS c123, gender FROM test_emp GROUP BY gender HAVING names < 50 AND c < 50 AND c123 < 50 ORDER BY gender;
|
||||
|
||||
aggCountAndHavingMultiGroupBy
|
||||
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING COUNT(*) > 10 ORDER BY gender, l;
|
||||
|
@ -195,6 +207,8 @@ aggCountOnColumnAndHavingOnAliasAndFunctionWildcardMultiGroupBy -> COUNT(*/1) vs
|
|||
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender, languages;
|
||||
aggCountOnColumnAndHavingOnAliasAndFunctionConstantMultiGroupBy
|
||||
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender, languages;
|
||||
aggCountOnDistinctColumnAndHavingOnAliasAndFunctionConstantMultiGroupBy
|
||||
SELECT gender g, languages l, COUNT(DISTINCT last_name) c FROM "test_emp" GROUP BY g, l HAVING c > 5 AND COUNT(1) < 70 ORDER BY gender, languages;
|
||||
|
||||
|
||||
// MIN
|
||||
|
|
|
@ -61,6 +61,9 @@ public class Count extends AggregateFunction {
|
|||
|
||||
@Override
|
||||
public AggregateFunctionAttribute toAttribute() {
|
||||
if (!distinct()) {
|
||||
return new AggregateFunctionAttribute(source(), name(), dataType(), id(), functionId(), "_count");
|
||||
}
|
||||
return super.toAttribute();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -429,7 +429,13 @@ final class QueryTranslator {
|
|||
static String field(AggregateFunction af) {
|
||||
Expression arg = af.field();
|
||||
if (arg instanceof FieldAttribute) {
|
||||
return ((FieldAttribute) arg).name();
|
||||
FieldAttribute field = (FieldAttribute) arg;
|
||||
// COUNT(DISTINCT) uses cardinality aggregation which works on exact values (not changed by analyzers or normalizers)
|
||||
if (af instanceof Count && ((Count) af).distinct()) {
|
||||
// use the `keyword` version of the field, if there is one
|
||||
return field.isInexact() ? field.exactAttribute().name() : field.name();
|
||||
}
|
||||
return field.name();
|
||||
}
|
||||
if (arg instanceof Literal) {
|
||||
return String.valueOf(((Literal) arg).value());
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
*/
|
||||
package org.elasticsearch.xpack.sql.planner;
|
||||
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.CardinalityAggregationBuilder;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
|
||||
import org.elasticsearch.xpack.sql.TestUtils;
|
||||
|
@ -19,11 +21,14 @@ import org.elasticsearch.xpack.sql.expression.function.FunctionRegistry;
|
|||
import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram;
|
||||
import org.elasticsearch.xpack.sql.expression.function.scalar.math.MathProcessor.MathOperation;
|
||||
import org.elasticsearch.xpack.sql.expression.gen.script.ScriptTemplate;
|
||||
import org.elasticsearch.xpack.sql.optimizer.Optimizer;
|
||||
import org.elasticsearch.xpack.sql.parser.SqlParser;
|
||||
import org.elasticsearch.xpack.sql.plan.logical.Aggregate;
|
||||
import org.elasticsearch.xpack.sql.plan.logical.Filter;
|
||||
import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan;
|
||||
import org.elasticsearch.xpack.sql.plan.logical.Project;
|
||||
import org.elasticsearch.xpack.sql.plan.physical.EsQueryExec;
|
||||
import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan;
|
||||
import org.elasticsearch.xpack.sql.planner.QueryTranslator.QueryTranslation;
|
||||
import org.elasticsearch.xpack.sql.querydsl.agg.AggFilter;
|
||||
import org.elasticsearch.xpack.sql.querydsl.query.ExistsQuery;
|
||||
|
@ -41,6 +46,7 @@ import org.elasticsearch.xpack.sql.util.DateUtils;
|
|||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
@ -55,6 +61,8 @@ public class QueryTranslatorTests extends ESTestCase {
|
|||
|
||||
private static SqlParser parser;
|
||||
private static Analyzer analyzer;
|
||||
private static Optimizer optimizer;
|
||||
private static Planner planner;
|
||||
|
||||
@BeforeClass
|
||||
public static void init() {
|
||||
|
@ -64,6 +72,8 @@ public class QueryTranslatorTests extends ESTestCase {
|
|||
EsIndex test = new EsIndex("test", mapping);
|
||||
IndexResolution getIndexResult = IndexResolution.valid(test);
|
||||
analyzer = new Analyzer(TestUtils.TEST_CFG, new FunctionRegistry(), getIndexResult, new Verifier(new Metrics()));
|
||||
optimizer = new Optimizer();
|
||||
planner = new Planner();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
|
@ -76,6 +86,10 @@ public class QueryTranslatorTests extends ESTestCase {
|
|||
return analyzer.analyze(parser.createStatement(sql), true);
|
||||
}
|
||||
|
||||
private PhysicalPlan optimizeAndPlan(String sql) {
|
||||
return planner.plan(optimizer.optimize(plan(sql)), true);
|
||||
}
|
||||
|
||||
public void testTermEqualityAnalyzer() {
|
||||
LogicalPlan p = plan("SELECT some.string FROM test WHERE some.string = 'value'");
|
||||
assertTrue(p instanceof Project);
|
||||
|
@ -433,6 +447,7 @@ public class QueryTranslatorTests extends ESTestCase {
|
|||
scriptTemplate.toString());
|
||||
assertEquals("[{v=int}, {v=10}]", scriptTemplate.params().toString());
|
||||
}
|
||||
|
||||
public void testGroupByDateHistogram() {
|
||||
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(int, 1000)");
|
||||
assertTrue(p instanceof Aggregate);
|
||||
|
@ -448,7 +463,6 @@ public class QueryTranslatorTests extends ESTestCase {
|
|||
assertEquals(DataType.INTEGER, field.dataType());
|
||||
}
|
||||
|
||||
|
||||
public void testGroupByHistogram() {
|
||||
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(date, INTERVAL 2 YEARS)");
|
||||
assertTrue(p instanceof Aggregate);
|
||||
|
@ -463,4 +477,23 @@ public class QueryTranslatorTests extends ESTestCase {
|
|||
assertEquals(FieldAttribute.class, field.getClass());
|
||||
assertEquals(DataType.DATE, field.dataType());
|
||||
}
|
||||
|
||||
public void testCountDistinctCardinalityFolder() {
|
||||
PhysicalPlan p = optimizeAndPlan("SELECT COUNT(DISTINCT keyword) cnt FROM test GROUP BY bool HAVING cnt = 0");
|
||||
assertEquals(EsQueryExec.class, p.getClass());
|
||||
EsQueryExec ee = (EsQueryExec) p;
|
||||
assertEquals(1, ee.output().size());
|
||||
assertThat(ee.output().get(0).toString(), startsWith("cnt{a->"));
|
||||
|
||||
Collection<AggregationBuilder> subAggs = ee.queryContainer().aggs().asAggBuilder().getSubAggregations();
|
||||
assertEquals(1, subAggs.size());
|
||||
assertTrue(subAggs.toArray()[0] instanceof CardinalityAggregationBuilder);
|
||||
|
||||
CardinalityAggregationBuilder cardinalityAgg = (CardinalityAggregationBuilder) subAggs.toArray()[0];
|
||||
assertEquals("keyword", cardinalityAgg.field());
|
||||
assertThat(ee.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
|
||||
endsWith("{\"buckets_path\":{\"a0\":\"" + cardinalityAgg.getName() +"\"},\"script\":{"
|
||||
+ "\"source\":\"InternalSqlScriptUtils.nullSafeFilter(InternalSqlScriptUtils.eq(params.a0,params.v0))\","
|
||||
+ "\"lang\":\"painless\",\"params\":{\"v0\":0}},\"gap_policy\":\"skip\"}}}}}"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue