SQL: Enable aggregations to create a separate bucket for missing values (#32832)
Enable aggregations to create a separate bucket for missing values.
This commit is contained in:
parent
c41c614527
commit
3d9ca4baee
|
@ -25,7 +25,8 @@ public class GroupByColumnKey extends GroupByKey {
|
|||
public TermsValuesSourceBuilder asValueSource() {
|
||||
return new TermsValuesSourceBuilder(id())
|
||||
.field(fieldName())
|
||||
.order(direction().asOrder());
|
||||
.order(direction().asOrder())
|
||||
.missingBucket(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -44,7 +44,8 @@ public class GroupByDateKey extends GroupByKey {
|
|||
return new DateHistogramValuesSourceBuilder(id())
|
||||
.field(fieldName())
|
||||
.dateHistogramInterval(new DateHistogramInterval(interval))
|
||||
.timeZone(DateTimeZone.forTimeZone(timeZone));
|
||||
.timeZone(DateTimeZone.forTimeZone(timeZone))
|
||||
.missingBucket(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -36,7 +36,8 @@ public class GroupByScriptKey extends GroupByKey {
|
|||
public TermsValuesSourceBuilder asValueSource() {
|
||||
TermsValuesSourceBuilder builder = new TermsValuesSourceBuilder(id())
|
||||
.script(script.toPainless())
|
||||
.order(direction().asOrder());
|
||||
.order(direction().asOrder())
|
||||
.missingBucket(true);
|
||||
|
||||
if (script.outputType().isNumeric()) {
|
||||
builder.valueType(ValueType.NUMBER);
|
||||
|
|
|
@ -42,14 +42,15 @@ public class DataLoader {
|
|||
}
|
||||
|
||||
protected static void loadEmpDatasetIntoEs(RestClient client) throws Exception {
|
||||
loadEmpDatasetIntoEs(client, "test_emp");
|
||||
loadEmpDatasetIntoEs(client, "test_emp_copy");
|
||||
loadEmpDatasetIntoEs(client, "test_emp", "employees");
|
||||
loadEmpDatasetIntoEs(client, "test_emp_copy", "employees");
|
||||
loadEmpDatasetIntoEs(client, "test_emp_with_nulls", "employees_with_nulls");
|
||||
makeAlias(client, "test_alias", "test_emp", "test_emp_copy");
|
||||
makeAlias(client, "test_alias_emp", "test_emp", "test_emp_copy");
|
||||
}
|
||||
|
||||
public static void loadDocsDatasetIntoEs(RestClient client) throws Exception {
|
||||
loadEmpDatasetIntoEs(client, "emp");
|
||||
loadEmpDatasetIntoEs(client, "emp", "employees");
|
||||
loadLibDatasetIntoEs(client, "library");
|
||||
makeAlias(client, "employees", "emp");
|
||||
}
|
||||
|
@ -62,7 +63,7 @@ public class DataLoader {
|
|||
.endObject();
|
||||
}
|
||||
|
||||
protected static void loadEmpDatasetIntoEs(RestClient client, String index) throws Exception {
|
||||
protected static void loadEmpDatasetIntoEs(RestClient client, String index, String fileName) throws Exception {
|
||||
Request request = new Request("PUT", "/" + index);
|
||||
XContentBuilder createIndex = JsonXContent.contentBuilder().startObject();
|
||||
createIndex.startObject("settings");
|
||||
|
@ -129,15 +130,18 @@ public class DataLoader {
|
|||
request = new Request("POST", "/" + index + "/emp/_bulk");
|
||||
request.addParameter("refresh", "true");
|
||||
StringBuilder bulk = new StringBuilder();
|
||||
csvToLines("employees", (titles, fields) -> {
|
||||
csvToLines(fileName, (titles, fields) -> {
|
||||
bulk.append("{\"index\":{}}\n");
|
||||
bulk.append('{');
|
||||
String emp_no = fields.get(1);
|
||||
for (int f = 0; f < fields.size(); f++) {
|
||||
if (f != 0) {
|
||||
bulk.append(',');
|
||||
// an empty value in the csv file is treated as 'null', thus skipping it in the bulk request
|
||||
if (fields.get(f).trim().length() > 0) {
|
||||
if (f != 0) {
|
||||
bulk.append(',');
|
||||
}
|
||||
bulk.append('"').append(titles.get(f)).append("\":\"").append(fields.get(f)).append('"');
|
||||
}
|
||||
bulk.append('"').append(titles.get(f)).append("\":\"").append(fields.get(f)).append('"');
|
||||
}
|
||||
// append department
|
||||
List<List<String>> list = dep_emp.get(emp_no);
|
||||
|
|
|
@ -25,7 +25,10 @@ public abstract class SqlSpecTestCase extends SpecBaseIntegrationTestCase {
|
|||
private String query;
|
||||
|
||||
@ClassRule
|
||||
public static LocalH2 H2 = new LocalH2((c) -> c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp.sql'"));
|
||||
public static LocalH2 H2 = new LocalH2((c) -> {
|
||||
c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp.sql'");
|
||||
c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp_with_nulls.sql'");
|
||||
});
|
||||
|
||||
@ParametersFactory(argumentFormatting = PARAM_FORMATTING)
|
||||
public static List<Object[]> readScriptSpec() throws Exception {
|
||||
|
@ -39,6 +42,7 @@ public abstract class SqlSpecTestCase extends SpecBaseIntegrationTestCase {
|
|||
tests.addAll(readScriptSpec("/arithmetic.sql-spec", parser));
|
||||
tests.addAll(readScriptSpec("/string-functions.sql-spec", parser));
|
||||
tests.addAll(readScriptSpec("/case-functions.sql-spec", parser));
|
||||
tests.addAll(readScriptSpec("/agg_nulls.sql-spec", parser));
|
||||
return tests;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
selectGenderWithNullsAndGroupByGender
|
||||
SELECT gender, COUNT(*) count FROM test_emp_with_nulls GROUP BY gender ORDER BY gender;
|
||||
selectFirstNameWithNullsAndGroupByFirstName
|
||||
SELECT first_name FROM test_emp_with_nulls GROUP BY first_name ORDER BY first_name;
|
||||
selectCountWhereIsNull
|
||||
SELECT COUNT(*) count FROM test_emp_with_nulls WHERE first_name IS NULL;
|
||||
selectLanguagesCountWithNullsAndGroupByLanguage
|
||||
SELECT languages l, COUNT(*) c FROM test_emp_with_nulls GROUP BY languages ORDER BY languages;
|
||||
selectHireDateGroupByHireDate
|
||||
SELECT hire_date HD, COUNT(*) c FROM test_emp_with_nulls GROUP BY hire_date ORDER BY hire_date DESC;
|
||||
selectHireDateGroupByHireDate
|
||||
SELECT hire_date HD, COUNT(*) c FROM test_emp_with_nulls GROUP BY hire_date ORDER BY hire_date DESC;
|
||||
selectSalaryGroupBySalary
|
||||
SELECT salary, COUNT(*) c FROM test_emp_with_nulls GROUP BY salary ORDER BY salary DESC;
|
|
@ -86,6 +86,7 @@ test_alias | ALIAS
|
|||
test_alias_emp | ALIAS
|
||||
test_emp | BASE TABLE
|
||||
test_emp_copy | BASE TABLE
|
||||
test_emp_with_nulls | BASE TABLE
|
||||
;
|
||||
|
||||
testGroupByOnAlias
|
||||
|
@ -98,10 +99,10 @@ F | 10099.28
|
|||
;
|
||||
|
||||
testGroupByOnPattern
|
||||
SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_* GROUP BY gender;
|
||||
SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_* WHERE gender is NOT NULL GROUP BY gender;
|
||||
|
||||
gender:s | p1:d
|
||||
|
||||
F | 10099.28
|
||||
M | 10095.75
|
||||
F | 10099.32
|
||||
M | 10095.98
|
||||
;
|
|
@ -0,0 +1,101 @@
|
|||
birth_date,emp_no,first_name,gender,hire_date,languages,last_name,salary
|
||||
1953-09-02T00:00:00Z,10001,Georgi,,1986-06-26T00:00:00Z,2,Facello,57305
|
||||
1964-06-02T00:00:00Z,10002,Bezalel,,1985-11-21T00:00:00Z,5,Simmel,56371
|
||||
1959-12-03T00:00:00Z,10003,Parto,,1986-08-28T00:00:00Z,4,Bamford,61805
|
||||
1954-05-01T00:00:00Z,10004,Chirstian,,1986-12-01T00:00:00Z,5,Koblick,36174
|
||||
1955-01-21T00:00:00Z,10005,Kyoichi,,1989-09-12T00:00:00Z,1,Maliniak,63528
|
||||
1953-04-20T00:00:00Z,10006,Anneke,,1989-06-02T00:00:00Z,3,Preusig,60335
|
||||
1957-05-23T00:00:00Z,10007,Tzvetan,,1989-02-10T00:00:00Z,4,Zielinski,74572
|
||||
1958-02-19T00:00:00Z,10008,Saniya,,1994-09-15T00:00:00Z,2,Kalloufi,43906
|
||||
1952-04-19T00:00:00Z,10009,Sumant,,1985-02-18T00:00:00Z,1,Peac,66174
|
||||
1963-06-01T00:00:00Z,10010,Duangkaew,,1989-08-24T00:00:00Z,4,Piveteau,45797
|
||||
1953-11-07T00:00:00Z,10011,Mary,F,1990-01-22T00:00:00Z,5,Sluis,31120
|
||||
1960-10-04T00:00:00Z,10012,Patricio,M,1992-12-18T00:00:00Z,5,Bridgland,48942
|
||||
1963-06-07T00:00:00Z,10013,Eberhardt,M,1985-10-20T00:00:00Z,1,Terkki,48735
|
||||
1956-02-12T00:00:00Z,10014,Berni,M,1987-03-11T00:00:00Z,5,Genin,37137
|
||||
1959-08-19T00:00:00Z,10015,Guoxiang,M,1987-07-02T00:00:00Z,5,Nooteboom,25324
|
||||
1961-05-02T00:00:00Z,10016,Kazuhito,M,1995-01-27T00:00:00Z,2,Cappelletti,61358
|
||||
1958-07-06T00:00:00Z,10017,Cristinel,F,1993-08-03T00:00:00Z,2,Bouloucos,58715
|
||||
1954-06-19T00:00:00Z,10018,Kazuhide,F,1993-08-03T00:00:00Z,2,Peha,56760
|
||||
1953-01-23T00:00:00Z,10019,Lillian,M,1993-08-03T00:00:00Z,1,Haddadi,73717
|
||||
1952-12-24T00:00:00Z,10020,,M,1991-01-26T00:00:00Z,3,Warwick,40031
|
||||
1960-02-20T00:00:00Z,10021,,M,1989-12-17T00:00:00Z,5,Erde,60408
|
||||
1952-07-08T00:00:00Z,10022,,M,1995-08-22T00:00:00Z,3,Famili,48233
|
||||
1953-09-29T00:00:00Z,10023,,F,1989-12-17T00:00:00Z,2,Montemayor,47896
|
||||
1958-09-05T00:00:00Z,10024,,F,1997-05-19T00:00:00Z,3,Pettey,64675
|
||||
1958-10-31T00:00:00Z,10025,Prasadram,M,1987-08-17T00:00:00Z,5,Heyers,47411
|
||||
1953-04-03T00:00:00Z,10026,Yongqiao,M,1995-03-20T00:00:00Z,3,Berztiss,28336
|
||||
1962-07-10T00:00:00Z,10027,Divier,F,1989-07-07T00:00:00Z,5,Reistad,73851
|
||||
1963-11-26T00:00:00Z,10028,Domenick,M,1991-10-22T00:00:00Z,1,Tempesti,39356
|
||||
1956-12-13T00:00:00Z,10029,Otmar,M,1985-11-20T00:00:00Z,,Herbst,74999
|
||||
1958-07-14T00:00:00Z,10030,Elvis,M,1994-02-17T00:00:00Z,,Demeyer,67492
|
||||
1959-01-27T00:00:00Z,10031,Karsten,M,1994-02-17T00:00:00Z,,Joslin,37716
|
||||
1960-08-09T00:00:00Z,10032,Jeong,F,1990-06-20T00:00:00Z,,Reistad,62233
|
||||
1956-11-14T00:00:00Z,10033,Arif,M,1987-03-18T00:00:00Z,,Merlo,70011
|
||||
1962-12-29T00:00:00Z,10034,Bader,M,1988-09-05T00:00:00Z,,Swan,39878
|
||||
1953-02-08T00:00:00Z,10035,Alain,M,1988-09-05T00:00:00Z,,Chappelet,25945
|
||||
1959-08-10T00:00:00Z,10036,Adamantios,M,1992-01-03T00:00:00Z,,Portugali,60781
|
||||
1963-07-22T00:00:00Z,10037,Pradeep,M,1990-12-05T00:00:00Z,,Makrucki,37691
|
||||
1960-07-20T00:00:00Z,10038,Huan,M,1989-09-20T00:00:00Z,,Lortz,35222
|
||||
1959-10-01T00:00:00Z,10039,Alejandro,M,1988-01-19T00:00:00Z,,Brender,36051
|
||||
1959-09-13T00:00:00Z,10040,Weiyi,F,1993-02-14T00:00:00Z,,Meriste,37112
|
||||
1959-08-27T00:00:00Z,10041,Uri,F,1989-11-12T00:00:00Z,1,Lenart,56415
|
||||
1956-02-26T00:00:00Z,10042,Magy,F,1993-03-21T00:00:00Z,3,Stamatiou,30404
|
||||
1960-09-19T00:00:00Z,10043,Yishay,M,1990-10-20T00:00:00Z,1,Tzvieli,34341
|
||||
1961-09-21T00:00:00Z,10044,Mingsen,F,1994-05-21T00:00:00Z,1,Casley,39728
|
||||
1957-08-14T00:00:00Z,10045,Moss,M,1989-09-02T00:00:00Z,3,Shanbhogue,74970
|
||||
1960-07-23T00:00:00Z,10046,Lucien,M,1992-06-20T00:00:00Z,4,Rosenbaum,50064
|
||||
1952-06-29T00:00:00Z,10047,Zvonko,M,1989-03-31T00:00:00Z,4,Nyanchama,42716
|
||||
1963-07-11T00:00:00Z,10048,Florian,M,1985-02-24T00:00:00Z,3,Syrotiuk,26436
|
||||
1961-04-24T00:00:00Z,10049,Basil,F,1992-05-04T00:00:00Z,5,Tramer,37853
|
||||
1958-05-21T00:00:00Z,10050,Yinghua,M,1990-12-25T00:00:00Z,2,Dredge,43026
|
||||
1953-07-28T00:00:00Z,10051,Hidefumi,M,1992-10-15T00:00:00Z,3,Caine,58121
|
||||
1961-02-26T00:00:00Z,10052,Heping,M,1988-05-21T00:00:00Z,1,Nitsch,55360
|
||||
1954-09-13T00:00:00Z,10053,Sanjiv,F,1986-02-04T00:00:00Z,3,Zschoche,54462
|
||||
1957-04-04T00:00:00Z,10054,Mayumi,M,1995-03-13T00:00:00Z,4,Schueller,65367
|
||||
1956-06-06T00:00:00Z,10055,Georgy,M,1992-04-27T00:00:00Z,5,Dredge,49281
|
||||
1961-09-01T00:00:00Z,10056,Brendon,F,1990-02-01T00:00:00Z,2,Bernini,33370
|
||||
1954-05-30T00:00:00Z,10057,Ebbe,F,1992-01-15T00:00:00Z,4,Callaway,27215
|
||||
1954-10-01T00:00:00Z,10058,Berhard,M,1987-04-13T00:00:00Z,3,McFarlin,38376
|
||||
1953-09-19T00:00:00Z,10059,Alejandro,F,1991-06-26T00:00:00Z,2,McAlpine,44307
|
||||
1961-10-15T00:00:00Z,10060,Breannda,M,1987-11-02T00:00:00Z,2,Billingsley,29175
|
||||
1962-10-19T00:00:00Z,10061,Tse,M,1985-09-17T00:00:00Z,1,Herber,49095
|
||||
1961-11-02T00:00:00Z,10062,Anoosh,M,1991-08-30T00:00:00Z,3,Peyn,65030
|
||||
1952-08-06T00:00:00Z,10063,Gino,F,1989-04-08T00:00:00Z,3,Leonhardt,52121
|
||||
1959-04-07T00:00:00Z,10064,Udi,M,1985-11-20T00:00:00Z,5,Jansch,33956
|
||||
1963-04-14T00:00:00Z,10065,Satosi,M,1988-05-18T00:00:00Z,2,Awdeh,50249
|
||||
1952-11-13T00:00:00Z,10066,Kwee,M,1986-02-26T00:00:00Z,5,Schusler,31897
|
||||
1953-01-07T00:00:00Z,10067,Claudi,M,1987-03-04T00:00:00Z,2,Stavenow,52044
|
||||
1962-11-26T00:00:00Z,10068,Charlene,M,1987-08-07T00:00:00Z,3,Brattka,28941
|
||||
1960-09-06T00:00:00Z,10069,Margareta,F,1989-11-05T00:00:00Z,5,Bierman,41933
|
||||
1955-08-20T00:00:00Z,10070,Reuven,M,1985-10-14T00:00:00Z,3,Garigliano,54329
|
||||
1958-01-21T00:00:00Z,10071,Hisao,M,1987-10-01T00:00:00Z,2,Lipner,40612
|
||||
1952-05-15T00:00:00Z,10072,Hironoby,F,1988-07-21T00:00:00Z,5,Sidou,54518
|
||||
1954-02-23T00:00:00Z,10073,Shir,M,1991-12-01T00:00:00Z,4,McClurg,32568
|
||||
1955-08-28T00:00:00Z,10074,Mokhtar,F,1990-08-13T00:00:00Z,5,Bernatsky,38992
|
||||
1960-03-09T00:00:00Z,10075,Gao,F,1987-03-19T00:00:00Z,5,Dolinsky,51956
|
||||
1952-06-13T00:00:00Z,10076,Erez,F,1985-07-09T00:00:00Z,3,Ritzmann,62405
|
||||
1964-04-18T00:00:00Z,10077,Mona,M,1990-03-02T00:00:00Z,5,Azuma,46595
|
||||
1959-12-25T00:00:00Z,10078,Danel,F,1987-05-26T00:00:00Z,2,Mondadori,69904
|
||||
1961-10-05T00:00:00Z,10079,Kshitij,F,1986-03-27T00:00:00Z,2,Gils,32263
|
||||
1957-12-03T00:00:00Z,10080,Premal,M,1985-11-19T00:00:00Z,5,Baek,52833
|
||||
1960-12-17T00:00:00Z,10081,Zhongwei,M,1986-10-30T00:00:00Z,2,Rosen,50128
|
||||
1963-09-09T00:00:00Z,10082,Parviz,M,1990-01-03T00:00:00Z,4,Lortz,49818
|
||||
1959-07-23T00:00:00Z,10083,Vishv,M,1987-03-31T00:00:00Z,1,Zockler,
|
||||
1960-05-25T00:00:00Z,10084,Tuval,M,1995-12-15T00:00:00Z,1,Kalloufi,
|
||||
1962-11-07T00:00:00Z,10085,Kenroku,M,1994-04-09T00:00:00Z,5,Malabarba,
|
||||
1962-11-19T00:00:00Z,10086,Somnath,M,1990-02-16T00:00:00Z,1,Foote,
|
||||
1959-07-23T00:00:00Z,10087,Xinglin,F,1986-09-08T00:00:00Z,5,Eugenio,
|
||||
1954-02-25T00:00:00Z,10088,Jungsoon,F,1988-09-02T00:00:00Z,5,Syrzycki,
|
||||
1963-03-21T00:00:00Z,10089,Sudharsan,F,1986-08-12T00:00:00Z,4,Flasterstein,
|
||||
1961-05-30T00:00:00Z,10090,Kendra,M,1986-03-14T00:00:00Z,2,Hofting,44956
|
||||
1955-10-04T00:00:00Z,10091,Amabile,M,1992-11-18T00:00:00Z,3,Gomatam,38645
|
||||
1964-10-18T00:00:00Z,10092,Valdiodio,F,1989-09-22T00:00:00Z,1,Niizuma,25976
|
||||
1964-06-11T00:00:00Z,10093,Sailaja,M,1996-11-05T00:00:00Z,3,Desikan,45656
|
||||
1957-05-25T00:00:00Z,10094,Arumugam,F,1987-04-18T00:00:00Z,5,Ossenbruggen,66817
|
||||
1965-01-03T00:00:00Z,10095,Hilari,M,1986-07-15T00:00:00Z,4,Morton,37702
|
||||
1954-09-16T00:00:00Z,10096,Jayson,M,1990-01-14T00:00:00Z,4,Mandell,43889
|
||||
1952-02-27T00:00:00Z,10097,Remzi,M,1990-09-15T00:00:00Z,3,Waschkowski,71165
|
||||
1961-09-23T00:00:00Z,10098,Sreekrishna,F,1985-05-13T00:00:00Z,4,Servieres,44817
|
||||
1956-05-25T00:00:00Z,10099,Valter,F,1988-10-18T00:00:00Z,2,Sullins,73578
|
||||
1953-04-21T00:00:00Z,10100,Hironobu,F,1987-09-21T00:00:00Z,4,Haraldson,68431
|
|
|
@ -0,0 +1,12 @@
|
|||
DROP TABLE IF EXISTS "test_emp_with_nulls";
|
||||
CREATE TABLE "test_emp_with_nulls" (
|
||||
"birth_date" TIMESTAMP WITH TIME ZONE,
|
||||
"emp_no" INT,
|
||||
"first_name" VARCHAR(50),
|
||||
"gender" VARCHAR(1),
|
||||
"hire_date" TIMESTAMP WITH TIME ZONE,
|
||||
"languages" TINYINT,
|
||||
"last_name" VARCHAR(50),
|
||||
"salary" INT
|
||||
)
|
||||
AS SELECT * FROM CSVREAD('classpath:/employees_with_nulls.csv');
|
Loading…
Reference in New Issue