SQL: fix COUNT DISTINCT filtering (#37176)
* Use `_count` aggregation value only for not-DISTINCT COUNT function calls * COUNT DISTINCT will use the _exact_ version of a field (the `keyword` sub-field for example), if there is one
This commit is contained in:
parent
b0665963e8
commit
3fad9d25f6
|
@ -110,6 +110,8 @@ aggCountWithAlias
|
||||||
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g ORDER BY gender;
|
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g ORDER BY gender;
|
||||||
countDistinct
|
countDistinct
|
||||||
SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
|
SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
|
||||||
|
countDistinctAndCountSimpleWithAlias
|
||||||
|
SELECT COUNT(*) cnt, COUNT(DISTINCT first_name) as names, gender FROM test_emp GROUP BY gender ORDER BY gender;
|
||||||
|
|
||||||
aggCountAliasAndWhereClauseMultiGroupBy
|
aggCountAliasAndWhereClauseMultiGroupBy
|
||||||
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages;
|
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages;
|
||||||
|
@ -121,6 +123,8 @@ aggCountWithAliasMultiGroupBy
|
||||||
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY gender, languages;
|
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY gender, languages;
|
||||||
aggCountWithAliasMultiGroupByDifferentOrder
|
aggCountWithAliasMultiGroupByDifferentOrder
|
||||||
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY languages ASC, gender DESC;
|
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY languages ASC, gender DESC;
|
||||||
|
aggCountDistinctWithAliasAndGroupBy
|
||||||
|
SELECT COUNT(*) cnt, COUNT(DISTINCT first_name) as names, gender FROM test_emp GROUP BY gender ORDER BY gender;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -161,12 +165,20 @@ aggCountStarAndHavingBetween
|
||||||
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender ASC;
|
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender ASC;
|
||||||
aggCountStarAndHavingBetweenWithLimit
|
aggCountStarAndHavingBetweenWithLimit
|
||||||
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender LIMIT 1;
|
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender LIMIT 1;
|
||||||
|
aggCountDistinctAndHavingBetweenWithLimit
|
||||||
|
SELECT gender g, COUNT(DISTINCT first_name) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 40 AND 50 ORDER BY gender LIMIT 1;
|
||||||
aggCountOnColumnAndHavingOnAliasAndFunction
|
aggCountOnColumnAndHavingOnAliasAndFunction
|
||||||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(gender) < 70 ORDER BY gender;
|
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(gender) < 70 ORDER BY gender;
|
||||||
aggCountOnColumnAndHavingOnAliasAndFunctionWildcard -> COUNT(*/1) vs COUNT(gender)
|
aggCountOnColumnAndHavingOnAliasAndFunctionWildcard -> COUNT(*/1) vs COUNT(gender)
|
||||||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender;
|
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender;
|
||||||
aggCountOnColumnAndHavingOnAliasAndFunctionConstant
|
aggCountOnColumnAndHavingOnAliasAndFunctionConstant
|
||||||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender;
|
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender;
|
||||||
|
aggDistinctCountWithAliasAndHaving
|
||||||
|
SELECT COUNT(*) c, COUNT(DISTINCT first_name) AS names, gender FROM test_emp GROUP BY gender HAVING names > 40 ORDER BY gender;
|
||||||
|
aggDistinctCountWithFunctionWildcardAndHaving
|
||||||
|
SELECT COUNT(*) c, COUNT(DISTINCT first_name) AS names, gender FROM test_emp GROUP BY gender HAVING names < 50 AND c < 50 ORDER BY gender;
|
||||||
|
aggDistinctCountWithFunctionWildcardAndFunctionConstantAndHaving
|
||||||
|
SELECT COUNT(*) c, COUNT(DISTINCT first_name) AS names, COUNT(123) AS c123, gender FROM test_emp GROUP BY gender HAVING names < 50 AND c < 50 AND c123 < 50 ORDER BY gender;
|
||||||
|
|
||||||
aggCountAndHavingMultiGroupBy
|
aggCountAndHavingMultiGroupBy
|
||||||
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING COUNT(*) > 10 ORDER BY gender, l;
|
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING COUNT(*) > 10 ORDER BY gender, l;
|
||||||
|
@ -195,6 +207,8 @@ aggCountOnColumnAndHavingOnAliasAndFunctionWildcardMultiGroupBy -> COUNT(*/1) vs
|
||||||
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender, languages;
|
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender, languages;
|
||||||
aggCountOnColumnAndHavingOnAliasAndFunctionConstantMultiGroupBy
|
aggCountOnColumnAndHavingOnAliasAndFunctionConstantMultiGroupBy
|
||||||
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender, languages;
|
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender, languages;
|
||||||
|
aggCountOnDistinctColumnAndHavingOnAliasAndFunctionConstantMultiGroupBy
|
||||||
|
SELECT gender g, languages l, COUNT(DISTINCT last_name) c FROM "test_emp" GROUP BY g, l HAVING c > 5 AND COUNT(1) < 70 ORDER BY gender, languages;
|
||||||
|
|
||||||
|
|
||||||
// MIN
|
// MIN
|
||||||
|
|
|
@ -61,6 +61,9 @@ public class Count extends AggregateFunction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AggregateFunctionAttribute toAttribute() {
|
public AggregateFunctionAttribute toAttribute() {
|
||||||
|
if (!distinct()) {
|
||||||
return new AggregateFunctionAttribute(source(), name(), dataType(), id(), functionId(), "_count");
|
return new AggregateFunctionAttribute(source(), name(), dataType(), id(), functionId(), "_count");
|
||||||
}
|
}
|
||||||
|
return super.toAttribute();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -429,7 +429,13 @@ final class QueryTranslator {
|
||||||
static String field(AggregateFunction af) {
|
static String field(AggregateFunction af) {
|
||||||
Expression arg = af.field();
|
Expression arg = af.field();
|
||||||
if (arg instanceof FieldAttribute) {
|
if (arg instanceof FieldAttribute) {
|
||||||
return ((FieldAttribute) arg).name();
|
FieldAttribute field = (FieldAttribute) arg;
|
||||||
|
// COUNT(DISTINCT) uses cardinality aggregation which works on exact values (not changed by analyzers or normalizers)
|
||||||
|
if (af instanceof Count && ((Count) af).distinct()) {
|
||||||
|
// use the `keyword` version of the field, if there is one
|
||||||
|
return field.isInexact() ? field.exactAttribute().name() : field.name();
|
||||||
|
}
|
||||||
|
return field.name();
|
||||||
}
|
}
|
||||||
if (arg instanceof Literal) {
|
if (arg instanceof Literal) {
|
||||||
return String.valueOf(((Literal) arg).value());
|
return String.valueOf(((Literal) arg).value());
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.xpack.sql.planner;
|
package org.elasticsearch.xpack.sql.planner;
|
||||||
|
|
||||||
|
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.CardinalityAggregationBuilder;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
|
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
|
||||||
import org.elasticsearch.xpack.sql.TestUtils;
|
import org.elasticsearch.xpack.sql.TestUtils;
|
||||||
|
@ -19,11 +21,14 @@ import org.elasticsearch.xpack.sql.expression.function.FunctionRegistry;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram;
|
import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.scalar.math.MathProcessor.MathOperation;
|
import org.elasticsearch.xpack.sql.expression.function.scalar.math.MathProcessor.MathOperation;
|
||||||
import org.elasticsearch.xpack.sql.expression.gen.script.ScriptTemplate;
|
import org.elasticsearch.xpack.sql.expression.gen.script.ScriptTemplate;
|
||||||
|
import org.elasticsearch.xpack.sql.optimizer.Optimizer;
|
||||||
import org.elasticsearch.xpack.sql.parser.SqlParser;
|
import org.elasticsearch.xpack.sql.parser.SqlParser;
|
||||||
import org.elasticsearch.xpack.sql.plan.logical.Aggregate;
|
import org.elasticsearch.xpack.sql.plan.logical.Aggregate;
|
||||||
import org.elasticsearch.xpack.sql.plan.logical.Filter;
|
import org.elasticsearch.xpack.sql.plan.logical.Filter;
|
||||||
import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan;
|
import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan;
|
||||||
import org.elasticsearch.xpack.sql.plan.logical.Project;
|
import org.elasticsearch.xpack.sql.plan.logical.Project;
|
||||||
|
import org.elasticsearch.xpack.sql.plan.physical.EsQueryExec;
|
||||||
|
import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan;
|
||||||
import org.elasticsearch.xpack.sql.planner.QueryTranslator.QueryTranslation;
|
import org.elasticsearch.xpack.sql.planner.QueryTranslator.QueryTranslation;
|
||||||
import org.elasticsearch.xpack.sql.querydsl.agg.AggFilter;
|
import org.elasticsearch.xpack.sql.querydsl.agg.AggFilter;
|
||||||
import org.elasticsearch.xpack.sql.querydsl.query.ExistsQuery;
|
import org.elasticsearch.xpack.sql.querydsl.query.ExistsQuery;
|
||||||
|
@ -41,6 +46,7 @@ import org.elasticsearch.xpack.sql.util.DateUtils;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -55,6 +61,8 @@ public class QueryTranslatorTests extends ESTestCase {
|
||||||
|
|
||||||
private static SqlParser parser;
|
private static SqlParser parser;
|
||||||
private static Analyzer analyzer;
|
private static Analyzer analyzer;
|
||||||
|
private static Optimizer optimizer;
|
||||||
|
private static Planner planner;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void init() {
|
public static void init() {
|
||||||
|
@ -64,6 +72,8 @@ public class QueryTranslatorTests extends ESTestCase {
|
||||||
EsIndex test = new EsIndex("test", mapping);
|
EsIndex test = new EsIndex("test", mapping);
|
||||||
IndexResolution getIndexResult = IndexResolution.valid(test);
|
IndexResolution getIndexResult = IndexResolution.valid(test);
|
||||||
analyzer = new Analyzer(TestUtils.TEST_CFG, new FunctionRegistry(), getIndexResult, new Verifier(new Metrics()));
|
analyzer = new Analyzer(TestUtils.TEST_CFG, new FunctionRegistry(), getIndexResult, new Verifier(new Metrics()));
|
||||||
|
optimizer = new Optimizer();
|
||||||
|
planner = new Planner();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterClass
|
@AfterClass
|
||||||
|
@ -76,6 +86,10 @@ public class QueryTranslatorTests extends ESTestCase {
|
||||||
return analyzer.analyze(parser.createStatement(sql), true);
|
return analyzer.analyze(parser.createStatement(sql), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private PhysicalPlan optimizeAndPlan(String sql) {
|
||||||
|
return planner.plan(optimizer.optimize(plan(sql)), true);
|
||||||
|
}
|
||||||
|
|
||||||
public void testTermEqualityAnalyzer() {
|
public void testTermEqualityAnalyzer() {
|
||||||
LogicalPlan p = plan("SELECT some.string FROM test WHERE some.string = 'value'");
|
LogicalPlan p = plan("SELECT some.string FROM test WHERE some.string = 'value'");
|
||||||
assertTrue(p instanceof Project);
|
assertTrue(p instanceof Project);
|
||||||
|
@ -433,6 +447,7 @@ public class QueryTranslatorTests extends ESTestCase {
|
||||||
scriptTemplate.toString());
|
scriptTemplate.toString());
|
||||||
assertEquals("[{v=int}, {v=10}]", scriptTemplate.params().toString());
|
assertEquals("[{v=int}, {v=10}]", scriptTemplate.params().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGroupByDateHistogram() {
|
public void testGroupByDateHistogram() {
|
||||||
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(int, 1000)");
|
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(int, 1000)");
|
||||||
assertTrue(p instanceof Aggregate);
|
assertTrue(p instanceof Aggregate);
|
||||||
|
@ -448,7 +463,6 @@ public class QueryTranslatorTests extends ESTestCase {
|
||||||
assertEquals(DataType.INTEGER, field.dataType());
|
assertEquals(DataType.INTEGER, field.dataType());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testGroupByHistogram() {
|
public void testGroupByHistogram() {
|
||||||
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(date, INTERVAL 2 YEARS)");
|
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(date, INTERVAL 2 YEARS)");
|
||||||
assertTrue(p instanceof Aggregate);
|
assertTrue(p instanceof Aggregate);
|
||||||
|
@ -463,4 +477,23 @@ public class QueryTranslatorTests extends ESTestCase {
|
||||||
assertEquals(FieldAttribute.class, field.getClass());
|
assertEquals(FieldAttribute.class, field.getClass());
|
||||||
assertEquals(DataType.DATE, field.dataType());
|
assertEquals(DataType.DATE, field.dataType());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCountDistinctCardinalityFolder() {
|
||||||
|
PhysicalPlan p = optimizeAndPlan("SELECT COUNT(DISTINCT keyword) cnt FROM test GROUP BY bool HAVING cnt = 0");
|
||||||
|
assertEquals(EsQueryExec.class, p.getClass());
|
||||||
|
EsQueryExec ee = (EsQueryExec) p;
|
||||||
|
assertEquals(1, ee.output().size());
|
||||||
|
assertThat(ee.output().get(0).toString(), startsWith("cnt{a->"));
|
||||||
|
|
||||||
|
Collection<AggregationBuilder> subAggs = ee.queryContainer().aggs().asAggBuilder().getSubAggregations();
|
||||||
|
assertEquals(1, subAggs.size());
|
||||||
|
assertTrue(subAggs.toArray()[0] instanceof CardinalityAggregationBuilder);
|
||||||
|
|
||||||
|
CardinalityAggregationBuilder cardinalityAgg = (CardinalityAggregationBuilder) subAggs.toArray()[0];
|
||||||
|
assertEquals("keyword", cardinalityAgg.field());
|
||||||
|
assertThat(ee.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
|
||||||
|
endsWith("{\"buckets_path\":{\"a0\":\"" + cardinalityAgg.getName() +"\"},\"script\":{"
|
||||||
|
+ "\"source\":\"InternalSqlScriptUtils.nullSafeFilter(InternalSqlScriptUtils.eq(params.a0,params.v0))\","
|
||||||
|
+ "\"lang\":\"painless\",\"params\":{\"v0\":0}},\"gap_policy\":\"skip\"}}}}}"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue