From 16f5ac5bd5ea4d8ce1eef546793c90298f1a43e0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 27 Aug 2022 07:15:47 -0700 Subject: [PATCH] json_value adjustments (#12968) * json_value adjustments changes: * native json_value expression now has optional 3rd argument to specify type, which will cast all values to the specified type * rework how JSON_VALUE is wired up in SQL. Now we are using a custom convertlet to translate JSON_VALUE(... RETURNING type) into dedicated JSON_VALUE_BIGINT, JSON_VALUE_DOUBLE, JSON_VALUE_VARCHAR, JSON_VALUE_ANY instead of using the calcite StandardConvertletTable that wraps JSON_VALUE_ANY in a CAST, so that we preserve the typing of JSON_VALUE to pass down to the native expression as the 3rd argument * fix json_value_any to be usable by humans too, coverage * fix bug * checkstyle * checkstyle * review stuff * validate that options to json_value are the supported options rather than ignore them * remove more legacy undocumented functions --- docs/misc/math-expr.md | 4 +- .../expression/NestedDataExpressions.java | 289 ++++-------- .../expression/NestedDataExpressionsTest.java | 136 ++---- .../query/expression/TestExprMacroTable.java | 2 - .../apache/druid/guice/ExpressionModule.java | 4 - .../NestedDataOperatorConversions.java | 410 ++++++++++++------ .../calcite/planner/DruidOperatorTable.java | 6 +- .../convertlet/DruidConvertletTable.java | 2 + .../calcite/CalciteNestedDataQueryTest.java | 284 +++++++----- 9 files changed, 588 insertions(+), 549 deletions(-) diff --git a/docs/misc/math-expr.md b/docs/misc/math-expr.md index 94167800c42..8fe4aba1ab4 100644 --- a/docs/misc/math-expr.md +++ b/docs/misc/math-expr.md @@ -63,7 +63,7 @@ The following built-in functions are available. |name|description| |----|-----------| -|cast|cast(expr,'LONG' or 'DOUBLE' or 'STRING' or 'LONG_ARRAY', or 'DOUBLE_ARRAY' or 'STRING_ARRAY') returns expr with specified type. exception can be thrown. Scalar types may be cast to array types and will take the form of a single element list (null will still be null). | +|cast|cast(expr,'LONG' or 'DOUBLE' or 'STRING' or 'ARRAY', or 'ARRAY' or 'ARRAY') returns expr with specified type. exception can be thrown. Scalar types may be cast to array types and will take the form of a single element list (null will still be null). | |if|if(predicate,then,else) returns 'then' if 'predicate' evaluates to a positive number, otherwise it returns 'else' | |nvl|nvl(expr,expr-for-null) returns 'expr-for-null' if 'expr' is null (or empty string for string type) | |like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`| @@ -232,7 +232,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX` value from `expr` using JSONPath syntax of `path` | | json_object(expr1, expr2[, expr3, expr4 ...]) | Construct a `COMPLEX` with alternating 'key' and 'value' arguments| | parse_json(expr) | Deserialize a JSON `STRING` into a `COMPLEX`. If the input is not a `STRING` or it is invalid JSON, this function will result in an error.| diff --git a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java index 45dc5a127fa..768d5196314 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java +++ b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java @@ -23,7 +23,6 @@ package org.apache.druid.query.expression; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; import org.apache.druid.guice.annotations.Json; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.Expr; @@ -50,9 +49,9 @@ public class NestedDataExpressions ExpressionType.fromColumnType(NestedDataComplexTypeSerde.TYPE) ); - public static class StructExprMacro implements ExprMacroTable.ExprMacro + public static class JsonObjectExprMacro implements ExprMacroTable.ExprMacro { - public static final String NAME = "struct"; + public static final String NAME = "json_object"; @Override public String name() @@ -104,17 +103,6 @@ public class NestedDataExpressions } } - public static class JsonObjectExprMacro extends StructExprMacro - { - public static final String NAME = "json_object"; - - @Override - public String name() - { - return NAME; - } - } - public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro { public static final String NAME = "to_json_string"; @@ -138,7 +126,7 @@ public class NestedDataExpressions @Override public Expr apply(List args) { - class ToJsonStringExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + final class ToJsonStringExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { public ToJsonStringExpr(List args) { @@ -203,7 +191,7 @@ public class NestedDataExpressions @Override public Expr apply(List args) { - class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { public ParseJsonExpr(List args) { @@ -278,7 +266,7 @@ public class NestedDataExpressions @Override public Expr apply(List args) { - class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { public ParseJsonExpr(List args) { @@ -327,11 +315,9 @@ public class NestedDataExpressions } } - - - public static class GetPathExprMacro implements ExprMacroTable.ExprMacro + public static class JsonValueExprMacro implements ExprMacroTable.ExprMacro { - public static final String NAME = "get_path"; + public static final String NAME = "json_value"; @Override public String name() @@ -342,39 +328,78 @@ public class NestedDataExpressions @Override public Expr apply(List args) { - final List parts = getArg1PathPartsFromLiteral(name(), args); - class GetPathExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr - { - public GetPathExpr(List args) - { - super(name(), args); + final List parts = getJsonPathPartsFromLiteral(name(), args.get(1)); + if (args.size() == 3 && args.get(2).isLiteral()) { + final ExpressionType castTo = ExpressionType.fromString((String) args.get(2).getLiteralValue()); + if (castTo == null) { + throw new IAE("Invalid output type: [%s]", args.get(2).getLiteralValue()); } + final class JsonValueCastExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + public JsonValueCastExpr(List args) + { + super(name(), args); + } - @Override - public ExprEval eval(ObjectBinding bindings) - { - ExprEval input = args.get(0).eval(bindings); - return ExprEval.bestEffortOf( - NestedPathFinder.findLiteral(unwrap(input), parts) - ); - } + @Override + public ExprEval eval(ObjectBinding bindings) + { + ExprEval input = args.get(0).eval(bindings); + return ExprEval.bestEffortOf( + NestedPathFinder.findLiteral(unwrap(input), parts) + ).castTo(castTo); + } - @Override - public Expr visit(Shuttle shuttle) - { - List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); - return shuttle.visit(new GetPathExpr(newArgs)); - } + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + return shuttle.visit(new JsonValueCastExpr(newArgs)); + } - @Nullable - @Override - public ExpressionType getOutputType(InputBindingInspector inspector) - { - // we cannot infer the output type (well we could say it is 'STRING' right now because is all we support... - return null; + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + return castTo; + } } + return new JsonValueCastExpr(args); + } else { + final class JsonValueExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + + public JsonValueExpr(List args) + { + super(name(), args); + } + + @Override + public ExprEval eval(ObjectBinding bindings) + { + ExprEval input = args.get(0).eval(bindings); + return ExprEval.bestEffortOf( + NestedPathFinder.findLiteral(unwrap(input), parts) + ); + } + + @Override + public Expr visit(Shuttle shuttle) + { + List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); + return shuttle.visit(new JsonValueExpr(newArgs)); + } + + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + // we cannot infer output type because there could be anything at the path, and, we lack a proper VARIANT type + return null; + } + } + return new JsonValueExpr(args); } - return new GetPathExpr(args); } } @@ -391,8 +416,8 @@ public class NestedDataExpressions @Override public Expr apply(List args) { - final List parts = getArg1JsonPathPartsFromLiteral(name(), args); - class JsonQueryExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + final List parts = getJsonPathPartsFromLiteral(name(), args.get(1)); + final class JsonQueryExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { public JsonQueryExpr(List args) { @@ -428,114 +453,6 @@ public class NestedDataExpressions } } - public static class JsonValueExprMacro implements ExprMacroTable.ExprMacro - { - public static final String NAME = "json_value"; - - @Override - public String name() - { - return NAME; - } - - @Override - public Expr apply(List args) - { - final List parts = getArg1JsonPathPartsFromLiteral(name(), args); - class JsonValueExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr - { - public JsonValueExpr(List args) - { - super(name(), args); - } - - @Override - public ExprEval eval(ObjectBinding bindings) - { - ExprEval input = args.get(0).eval(bindings); - return ExprEval.bestEffortOf( - NestedPathFinder.findLiteral(unwrap(input), parts) - ); - } - - @Override - public Expr visit(Shuttle shuttle) - { - List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); - return shuttle.visit(new JsonValueExpr(newArgs)); - } - - @Nullable - @Override - public ExpressionType getOutputType(InputBindingInspector inspector) - { - // we cannot infer the output type (well we could say it is 'STRING' right now because is all we support... - return null; - } - } - return new JsonValueExpr(args); - } - } - - public static class ListPathsExprMacro implements ExprMacroTable.ExprMacro - { - public static final String NAME = "list_paths"; - - @Override - public String name() - { - return NAME; - } - - @Override - public Expr apply(List args) - { - final StructuredDataProcessor processor = new StructuredDataProcessor() - { - @Override - public int processLiteralField(String fieldName, Object fieldValue) - { - // do nothing, we only want the list of fields returned by this processor - return 0; - } - }; - - class ListPathsExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr - { - public ListPathsExpr(List args) - { - super(name(), args); - } - - @Override - public ExprEval eval(ObjectBinding bindings) - { - ExprEval input = args.get(0).eval(bindings); - StructuredDataProcessor.ProcessResults info = processor.processFields(unwrap(input)); - return ExprEval.ofType( - ExpressionType.STRING_ARRAY, - ImmutableList.copyOf(info.getLiteralFields()) - ); - } - - @Override - public Expr visit(Shuttle shuttle) - { - List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); - return shuttle.visit(new ListPathsExpr(newArgs)); - } - - @Nullable - @Override - public ExpressionType getOutputType(InputBindingInspector inspector) - { - return ExpressionType.STRING_ARRAY; - } - } - return new ListPathsExpr(args); - } - } - public static class JsonPathsExprMacro implements ExprMacroTable.ExprMacro { public static final String NAME = "json_paths"; @@ -559,7 +476,7 @@ public class NestedDataExpressions } }; - class JsonPathsExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + final class JsonPathsExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { public JsonPathsExpr(List args) { @@ -600,9 +517,9 @@ public class NestedDataExpressions } } - public static class ListKeysExprMacro implements ExprMacroTable.ExprMacro + public static class JsonKeysExprMacro implements ExprMacroTable.ExprMacro { - public static final String NAME = "list_keys"; + public static final String NAME = "json_keys"; @Override public String name() @@ -613,10 +530,10 @@ public class NestedDataExpressions @Override public Expr apply(List args) { - final List parts = getArg1PathPartsFromLiteral(name(), args); - class ListKeysExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + final List parts = getJsonPathPartsFromLiteral(name(), args.get(1)); + final class JsonKeysExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr { - public ListKeysExpr(List args) + public JsonKeysExpr(List args) { super(name(), args); } @@ -636,7 +553,7 @@ public class NestedDataExpressions public Expr visit(Shuttle shuttle) { List newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); - return shuttle.visit(new ListKeysExpr(newArgs)); + return shuttle.visit(new JsonKeysExpr(newArgs)); } @Nullable @@ -646,18 +563,7 @@ public class NestedDataExpressions return ExpressionType.STRING_ARRAY; } } - return new ListKeysExpr(args); - } - } - - public static class JsonKeysExprMacro extends ListKeysExprMacro - { - public static final String NAME = "json_keys"; - - @Override - public String name() - { - return NAME; + return new JsonKeysExpr(args); } } @@ -676,39 +582,18 @@ public class NestedDataExpressions } - static List getArg1PathPartsFromLiteral(String fnName, List args) + static List getJsonPathPartsFromLiteral(String fnName, Expr arg) { - if (!(args.get(1).isLiteral() && args.get(1).getLiteralValue() instanceof String)) { + if (!(arg.isLiteral() && arg.getLiteralValue() instanceof String)) { throw new IAE( "Function[%s] second argument [%s] must be a literal [%s] value", fnName, - args.get(1).stringify(), - ExpressionType.STRING - ); - } - final String path = (String) args.get(1).getLiteralValue(); - List parts; - try { - parts = NestedPathFinder.parseJsonPath(path); - } - catch (IllegalArgumentException iae) { - parts = NestedPathFinder.parseJqPath(path); - } - return parts; - } - - static List getArg1JsonPathPartsFromLiteral(String fnName, List args) - { - if (!(args.get(1).isLiteral() && args.get(1).getLiteralValue() instanceof String)) { - throw new IAE( - "Function[%s] second argument [%s] must be a literal [%s] value", - fnName, - args.get(1).stringify(), + arg.stringify(), ExpressionType.STRING ); } final List parts = NestedPathFinder.parseJsonPath( - (String) args.get(1).getLiteralValue() + (String) arg.getLiteralValue() ); return parts; } diff --git a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java index ce495bb97c4..82415f18c65 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Pair; @@ -45,10 +46,6 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); private static final ExprMacroTable MACRO_TABLE = new ExprMacroTable( ImmutableList.of( - new NestedDataExpressions.StructExprMacro(), - new NestedDataExpressions.GetPathExprMacro(), - new NestedDataExpressions.ListKeysExprMacro(), - new NestedDataExpressions.ListPathsExprMacro(), new NestedDataExpressions.JsonPathsExprMacro(), new NestedDataExpressions.JsonKeysExprMacro(), new NestedDataExpressions.JsonObjectExprMacro(), @@ -84,20 +81,6 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest .build() ); - @Test - public void testStructExpression() - { - Expr expr = Parser.parse("struct('x',100,'y',200,'z',300)", MACRO_TABLE); - ExprEval eval = expr.eval(inputBindings); - Assert.assertEquals(NEST, eval.value()); - - expr = Parser.parse("struct('x',array('a','b','c'),'y',struct('a','hello','b','world'))", MACRO_TABLE); - eval = expr.eval(inputBindings); - // decompose because of array equals - Assert.assertArrayEquals(new Object[]{"a", "b", "c"}, (Object[]) ((Map) eval.value()).get("x")); - Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y")); - } - @Test public void testJsonObjectExpression() { @@ -112,73 +95,30 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y")); } - @Test - public void testListKeysExpression() - { - Expr expr = Parser.parse("list_keys(nest, '.')", MACRO_TABLE); - ExprEval eval = expr.eval(inputBindings); - Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); - Assert.assertArrayEquals(new Object[]{"x", "y", "z"}, (Object[]) eval.value()); - - - expr = Parser.parse("list_keys(nester, '.x')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); - Assert.assertArrayEquals(new Object[]{"0", "1", "2"}, (Object[]) eval.value()); - - expr = Parser.parse("list_keys(nester, '.y')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); - Assert.assertArrayEquals(new Object[]{"a", "b"}, (Object[]) eval.value()); - - expr = Parser.parse("list_keys(nester, '.x.a')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertNull(eval.value()); - - expr = Parser.parse("list_keys(nester, '.x.a.b')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertNull(eval.value()); - } - - @Test - public void testListPathsExpression() - { - Expr expr = Parser.parse("list_paths(nest)", MACRO_TABLE); - ExprEval eval = expr.eval(inputBindings); - Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); - Assert.assertArrayEquals(new Object[]{".\"y\"", ".\"z\"", ".\"x\""}, (Object[]) eval.value()); - - expr = Parser.parse("list_paths(nester)", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); - Assert.assertArrayEquals(new Object[]{".\"x\"[0]", ".\"x\"[1]", ".\"x\"[2]", ".\"y\".\"b\"", ".\"y\".\"a\""}, (Object[]) eval.value()); - - } - @Test public void testJsonKeysExpression() { - Expr expr = Parser.parse("json_keys(nest, '.')", MACRO_TABLE); + Expr expr = Parser.parse("json_keys(nest, '$.')", MACRO_TABLE); ExprEval eval = expr.eval(inputBindings); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertArrayEquals(new Object[]{"x", "y", "z"}, (Object[]) eval.value()); - expr = Parser.parse("json_keys(nester, '.x')", MACRO_TABLE); + expr = Parser.parse("json_keys(nester, '$.x')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertArrayEquals(new Object[]{"0", "1", "2"}, (Object[]) eval.value()); - expr = Parser.parse("json_keys(nester, '.y')", MACRO_TABLE); + expr = Parser.parse("json_keys(nester, '$.y')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertArrayEquals(new Object[]{"a", "b"}, (Object[]) eval.value()); - expr = Parser.parse("json_keys(nester, '.x.a')", MACRO_TABLE); + expr = Parser.parse("json_keys(nester, '$.x.a')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertNull(eval.value()); - expr = Parser.parse("json_keys(nester, '.x.a.b')", MACRO_TABLE); + expr = Parser.parse("json_keys(nester, '$.x.a.b')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertNull(eval.value()); } @@ -197,45 +137,6 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Assert.assertArrayEquals(new Object[]{"$.x[0]", "$.x[1]", "$.x[2]", "$.y.b", "$.y.a"}, (Object[]) eval.value()); } - @Test - public void testGetPathExpression() - { - Expr expr = Parser.parse("get_path(nest, '.x')", MACRO_TABLE); - ExprEval eval = expr.eval(inputBindings); - Assert.assertEquals(100L, eval.value()); - Assert.assertEquals(ExpressionType.LONG, eval.type()); - - expr = Parser.parse("get_path(nester, '.x')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertNull(eval.value()); - - expr = Parser.parse("get_path(nester, '.x[1]')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertEquals("b", eval.value()); - Assert.assertEquals(ExpressionType.STRING, eval.type()); - - expr = Parser.parse("get_path(nester, '.x[23]')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertNull(eval.value()); - - expr = Parser.parse("get_path(nester, '.x[1].b')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertNull(eval.value()); - - expr = Parser.parse("get_path(nester, '.y[1]')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertNull(eval.value()); - - expr = Parser.parse("get_path(nester, '.y.a')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertEquals("hello", eval.value()); - Assert.assertEquals(ExpressionType.STRING, eval.type()); - - expr = Parser.parse("get_path(nester, '.y.a.b.c[12]')", MACRO_TABLE); - eval = expr.eval(inputBindings); - Assert.assertNull(eval.value()); - } - @Test public void testJsonValueExpression() { @@ -270,6 +171,11 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Assert.assertEquals("hello", eval.value()); Assert.assertEquals(ExpressionType.STRING, eval.type()); + expr = Parser.parse("json_value(nester, '$.y.a', 'LONG')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals(NullHandling.defaultLongValue(), eval.value()); + Assert.assertEquals(ExpressionType.LONG, eval.type()); + expr = Parser.parse("json_value(nester, '$.y.a.b.c[12]')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertNull(eval.value()); @@ -278,6 +184,26 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest eval = expr.eval(inputBindings); Assert.assertEquals(1234L, eval.value()); Assert.assertEquals(ExpressionType.LONG, eval.type()); + + expr = Parser.parse("json_value(long, '$', 'STRING')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("1234", eval.value()); + Assert.assertEquals(ExpressionType.STRING, eval.type()); + + expr = Parser.parse("json_value(nest, '$.x')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals(100L, eval.value()); + Assert.assertEquals(ExpressionType.LONG, eval.type()); + + expr = Parser.parse("json_value(nest, '$.x', 'DOUBLE')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals(100.0, eval.value()); + Assert.assertEquals(ExpressionType.DOUBLE, eval.type()); + + expr = Parser.parse("json_value(nest, '$.x', 'STRING')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("100", eval.value()); + Assert.assertEquals(ExpressionType.STRING, eval.type()); } @Test diff --git a/processing/src/test/java/org/apache/druid/query/expression/TestExprMacroTable.java b/processing/src/test/java/org/apache/druid/query/expression/TestExprMacroTable.java index a4b3c734f9e..140bcac3fa0 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/TestExprMacroTable.java +++ b/processing/src/test/java/org/apache/druid/query/expression/TestExprMacroTable.java @@ -56,8 +56,6 @@ public class TestExprMacroTable extends ExprMacroTable new HyperUniqueExpressions.HllEstimateExprMacro(), new HyperUniqueExpressions.HllRoundEstimateExprMacro(), new NestedDataExpressions.JsonObjectExprMacro(), - new NestedDataExpressions.ListKeysExprMacro(), - new NestedDataExpressions.ListPathsExprMacro(), new NestedDataExpressions.JsonKeysExprMacro(), new NestedDataExpressions.JsonPathsExprMacro(), new NestedDataExpressions.JsonValueExprMacro(), diff --git a/server/src/main/java/org/apache/druid/guice/ExpressionModule.java b/server/src/main/java/org/apache/druid/guice/ExpressionModule.java index 2aa29d4dd75..a9baad566a2 100644 --- a/server/src/main/java/org/apache/druid/guice/ExpressionModule.java +++ b/server/src/main/java/org/apache/druid/guice/ExpressionModule.java @@ -70,11 +70,7 @@ public class ExpressionModule implements Module .add(HyperUniqueExpressions.HllAddExprMacro.class) .add(HyperUniqueExpressions.HllEstimateExprMacro.class) .add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class) - .add(NestedDataExpressions.StructExprMacro.class) .add(NestedDataExpressions.JsonObjectExprMacro.class) - .add(NestedDataExpressions.GetPathExprMacro.class) - .add(NestedDataExpressions.ListKeysExprMacro.class) - .add(NestedDataExpressions.ListPathsExprMacro.class) .add(NestedDataExpressions.JsonKeysExprMacro.class) .add(NestedDataExpressions.JsonPathsExprMacro.class) .add(NestedDataExpressions.JsonValueExprMacro.class) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java index 045f4dc4d31..f6d2af5e831 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java @@ -19,19 +19,27 @@ package org.apache.druid.sql.calcite.expression.builtin; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlDataTypeSpec; import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlOperandCountRanges; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeTransforms; +import org.apache.calcite.sql2rel.SqlRexConvertlet; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.InputBindings; @@ -42,123 +50,30 @@ import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.segment.nested.NestedPathFinder; import org.apache.druid.segment.nested.NestedPathPart; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; -import org.apache.druid.sql.calcite.expression.AliasedOperatorConversion; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; import org.apache.druid.sql.calcite.expression.OperatorConversions; import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; -import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.planner.UnsupportedSQLQueryException; +import org.apache.druid.sql.calcite.planner.convertlet.DruidConvertletFactory; import org.apache.druid.sql.calcite.table.RowSignatures; import javax.annotation.Nullable; +import java.util.Collections; import java.util.List; public class NestedDataOperatorConversions { + public static final DruidJsonValueConvertletFactory DRUID_JSON_VALUE_CONVERTLET_FACTORY_INSTANCE = + new DruidJsonValueConvertletFactory(); + public static final SqlReturnTypeInference NESTED_RETURN_TYPE_INFERENCE = opBinding -> RowSignatures.makeComplexType( opBinding.getTypeFactory(), NestedDataComplexTypeSerde.TYPE, true ); - public static class GetPathOperatorConversion implements SqlOperatorConversion - { - private static final String FUNCTION_NAME = StringUtils.toUpperCase("get_path"); - private static final SqlFunction SQL_FUNCTION = OperatorConversions - .operatorBuilder(FUNCTION_NAME) - .operandTypeChecker( - OperandTypes.sequence( - "(expr,path)", - OperandTypes.family(SqlTypeFamily.ANY), - OperandTypes.family(SqlTypeFamily.STRING) - ) - ) - .returnTypeCascadeNullable(SqlTypeName.VARCHAR) - .functionCategory(SqlFunctionCategory.USER_DEFINED_FUNCTION) - .build(); - - @Override - public SqlOperator calciteOperator() - { - return SQL_FUNCTION; - } - - @Nullable - @Override - public DruidExpression toDruidExpression( - PlannerContext plannerContext, - RowSignature rowSignature, - RexNode rexNode - ) - { - final RexCall call = (RexCall) rexNode; - - final List druidExpressions = Expressions.toDruidExpressions( - plannerContext, - rowSignature, - call.getOperands() - ); - - if (druidExpressions == null || druidExpressions.size() != 2) { - return null; - } - - final Expr pathExpr = Parser.parse(druidExpressions.get(1).getExpression(), plannerContext.getExprMacroTable()); - if (!pathExpr.isLiteral()) { - return null; - } - // pre-normalize path so that the same expressions with different jq syntax are collapsed - final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value(); - final List parts; - try { - parts = NestedPathFinder.parseJqPath(path); - } - catch (IllegalArgumentException iae) { - throw new UnsupportedSQLQueryException( - "Cannot use [%s]: [%s]", - call.getOperator().getName(), - iae.getMessage() - ); - } - final String normalized = NestedPathFinder.toNormalizedJqPath(parts); - - if (druidExpressions.get(0).isSimpleExtraction()) { - - return DruidExpression.ofVirtualColumn( - Calcites.getColumnTypeForRelDataType(call.getType()), - (args) -> "get_path(" + args.get(0).getExpression() + ",'" + normalized + "')", - ImmutableList.of( - DruidExpression.ofColumn(NestedDataComplexTypeSerde.TYPE, druidExpressions.get(0).getDirectColumn()) - ), - (name, outputType, expression, macroTable) -> new NestedFieldVirtualColumn( - druidExpressions.get(0).getDirectColumn(), - name, - outputType, - parts, - false, - null, - null - ) - ); - } - throw new UnsupportedSQLQueryException( - "Cannot use [%s] on expression input: [%s]", - call.getOperator().getName(), - druidExpressions.get(0).getExpression() - ); - } - } - - public static class JsonGetPathAliasOperatorConversion extends AliasedOperatorConversion - { - public JsonGetPathAliasOperatorConversion() - { - super(new GetPathOperatorConversion(), StringUtils.toUpperCase("json_get_path")); - } - } - public static class JsonPathsOperatorConversion implements SqlOperatorConversion { private static final SqlFunction SQL_FUNCTION = OperatorConversions @@ -316,12 +231,266 @@ public class NestedDataOperatorConversions } } - public static class JsonValueOperatorConversion implements SqlOperatorConversion + + /** + * The {@link org.apache.calcite.sql2rel.StandardConvertletTable} converts json_value(.. RETURNING type) into + * cast(json_value_any(..), type). + * + * This is not that useful for us, so we have our own convertlet, to translate into specialized operators such + * as {@link JsonValueBigintOperatorConversion}, {@link JsonValueDoubleOperatorConversion}, or + * {@link JsonValueVarcharOperatorConversion}, before falling back to {@link JsonValueAnyOperatorConversion}. + * + * This convertlet still always wraps the function in a {@link SqlStdOperatorTable#CAST}, to smooth out type + * mismatches, such as VARCHAR(2000) vs VARCHAR or whatever else various type checkers like to complain about not + * exactly matching. + */ + public static class DruidJsonValueConvertletFactory implements DruidConvertletFactory { + @Override + public SqlRexConvertlet createConvertlet(PlannerContext plannerContext) + { + return (cx, call) -> { + // we don't support modifying the behavior to be anything other than 'NULL ON EMPTY' / 'NULL ON ERROR' + Preconditions.checkArgument( + "SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL]".equals(call.operand(2).toString()), + "Unsupported JSON_VALUE parameter 'ON EMPTY' defined - please re-issue this query without this argument" + ); + Preconditions.checkArgument( + "NULL".equals(call.operand(3).toString()), + "Unsupported JSON_VALUE parameter 'ON EMPTY' defined - please re-issue this query without this argument" + ); + Preconditions.checkArgument( + "SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL]".equals(call.operand(4).toString()), + "Unsupported JSON_VALUE parameter 'ON ERROR' defined - please re-issue this query without this argument" + ); + Preconditions.checkArgument( + "NULL".equals(call.operand(5).toString()), + "Unsupported JSON_VALUE parameter 'ON ERROR' defined - please re-issue this query without this argument" + ); + SqlDataTypeSpec dataType = call.operand(6); + RelDataType sqlType = dataType.deriveType(cx.getValidator()); + SqlNode rewrite; + if (SqlTypeName.INT_TYPES.contains(sqlType.getSqlTypeName())) { + rewrite = JsonValueBigintOperatorConversion.FUNCTION.createCall( + SqlParserPos.ZERO, + call.operand(0), + call.operand(1) + ); + } else if (SqlTypeName.APPROX_TYPES.contains(sqlType.getSqlTypeName())) { + rewrite = JsonValueDoubleOperatorConversion.FUNCTION.createCall( + SqlParserPos.ZERO, + call.operand(0), + call.operand(1) + ); + } else if (SqlTypeName.STRING_TYPES.contains(sqlType.getSqlTypeName())) { + rewrite = JsonValueVarcharOperatorConversion.FUNCTION.createCall( + SqlParserPos.ZERO, + call.operand(0), + call.operand(1) + ); + } else { + // fallback to json_value_any, e.g. the 'standard' convertlet. + rewrite = JsonValueAnyOperatorConversion.FUNCTION.createCall( + SqlParserPos.ZERO, + call.operand(0), + call.operand(1) + ); + } + + // always cast anyway, to prevent haters from complaining that VARCHAR doesn't match VARCHAR(2000) + SqlNode caster = SqlStdOperatorTable.CAST.createCall( + SqlParserPos.ZERO, + rewrite, + call.operand(6) + ); + return cx.convertExpression(caster); + }; + } + + @Override + public List operators() + { + return Collections.singletonList(SqlStdOperatorTable.JSON_VALUE); + } + } + + public abstract static class JsonValueReturningTypeOperatorConversion implements SqlOperatorConversion + { + private final SqlFunction function; + private final ColumnType druidType; + + public JsonValueReturningTypeOperatorConversion(SqlFunction function, ColumnType druidType) + { + this.druidType = druidType; + this.function = function; + } + @Override public SqlOperator calciteOperator() { - return SqlStdOperatorTable.JSON_VALUE; + return function; + } + + @Nullable + @Override + public DruidExpression toDruidExpression( + PlannerContext plannerContext, + RowSignature rowSignature, + RexNode rexNode + ) + { + final RexCall call = (RexCall) rexNode; + final List druidExpressions = Expressions.toDruidExpressions( + plannerContext, + rowSignature, + call.getOperands() + ); + + if (druidExpressions == null || druidExpressions.size() != 2) { + return null; + } + + final Expr pathExpr = Parser.parse(druidExpressions.get(1).getExpression(), plannerContext.getExprMacroTable()); + if (!pathExpr.isLiteral()) { + return null; + } + // pre-normalize path so that the same expressions with different jq syntax are collapsed + final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value(); + final List parts; + try { + parts = NestedPathFinder.parseJsonPath(path); + } + catch (IllegalArgumentException iae) { + throw new UnsupportedSQLQueryException( + "Cannot use [%s]: [%s]", + call.getOperator().getName(), + iae.getMessage() + ); + } + final String jsonPath = NestedPathFinder.toNormalizedJsonPath(parts); + final DruidExpression.ExpressionGenerator builder = (args) -> + "json_value(" + args.get(0).getExpression() + ",'" + jsonPath + "', '" + druidType.asTypeString() + "')"; + + if (druidExpressions.get(0).isSimpleExtraction()) { + + return DruidExpression.ofVirtualColumn( + druidType, + builder, + ImmutableList.of( + DruidExpression.ofColumn(NestedDataComplexTypeSerde.TYPE, druidExpressions.get(0).getDirectColumn()) + ), + (name, outputType, expression, macroTable) -> new NestedFieldVirtualColumn( + druidExpressions.get(0).getDirectColumn(), + name, + outputType, + parts, + false, + null, + null + ) + ); + } + return DruidExpression.ofExpression(druidType, builder, druidExpressions); + } + + static SqlFunction buildFunction(String functionName, SqlTypeName typeName) + { + return OperatorConversions.operatorBuilder(functionName) + .operandTypeChecker( + OperandTypes.sequence( + "(expr,path)", + OperandTypes.family(SqlTypeFamily.ANY), + OperandTypes.family(SqlTypeFamily.STRING) + ) + ) + .returnTypeInference( + ReturnTypes.cascade( + opBinding -> opBinding.getTypeFactory().createSqlType(typeName), + SqlTypeTransforms.FORCE_NULLABLE + ) + ) + .functionCategory(SqlFunctionCategory.USER_DEFINED_FUNCTION) + .build(); + } + } + + public static class JsonValueBigintOperatorConversion extends JsonValueReturningTypeOperatorConversion + { + private static final SqlFunction FUNCTION = buildFunction("JSON_VALUE_BIGINT", SqlTypeName.BIGINT); + + public JsonValueBigintOperatorConversion() + { + super(FUNCTION, ColumnType.LONG); + } + } + + public static class JsonValueDoubleOperatorConversion extends JsonValueReturningTypeOperatorConversion + { + private static final SqlFunction FUNCTION = buildFunction("JSON_VALUE_DOUBLE", SqlTypeName.DOUBLE); + + public JsonValueDoubleOperatorConversion() + { + super(FUNCTION, ColumnType.DOUBLE); + } + } + + public static class JsonValueVarcharOperatorConversion extends JsonValueReturningTypeOperatorConversion + { + private static final SqlFunction FUNCTION = buildFunction("JSON_VALUE_VARCHAR", SqlTypeName.VARCHAR); + + public JsonValueVarcharOperatorConversion() + { + super(FUNCTION, ColumnType.STRING); + } + } + + public static class JsonValueAnyOperatorConversion implements SqlOperatorConversion + { + private static final SqlFunction FUNCTION = + OperatorConversions.operatorBuilder("JSON_VALUE_ANY") + .operandTypeChecker( + OperandTypes.or( + OperandTypes.sequence( + "(expr,path)", + OperandTypes.family(SqlTypeFamily.ANY), + OperandTypes.family(SqlTypeFamily.STRING) + ), + OperandTypes.family( + SqlTypeFamily.ANY, + SqlTypeFamily.CHARACTER, + SqlTypeFamily.ANY, + SqlTypeFamily.ANY, + SqlTypeFamily.ANY, + SqlTypeFamily.ANY, + SqlTypeFamily.ANY + ) + ) + ) + .operandTypeInference((callBinding, returnType, operandTypes) -> { + RelDataTypeFactory typeFactory = callBinding.getTypeFactory(); + if (operandTypes.length > 5) { + operandTypes[3] = typeFactory.createSqlType(SqlTypeName.ANY); + operandTypes[5] = typeFactory.createSqlType(SqlTypeName.ANY); + } + }) + .returnTypeInference( + ReturnTypes.cascade( + opBinding -> opBinding.getTypeFactory().createTypeWithNullability( + // STRING is the closest thing we have to an ANY type + // however, this should really be using SqlTypeName.ANY.. someday + opBinding.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), + true + ), + SqlTypeTransforms.FORCE_NULLABLE + ) + ) + .functionCategory(SqlFunctionCategory.SYSTEM) + .build(); + + @Override + public SqlOperator calciteOperator() + { + return FUNCTION; } @Nullable @@ -334,22 +503,17 @@ public class NestedDataOperatorConversions { final RexCall call = (RexCall) rexNode; - // calcite puts a bunch of junk in here so the call looks something like - // JSON_VALUE(`nested`.`nest`, '$.x', SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL], NULL, SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL], NULL, VARCHAR(2000)) + // calcite parser can allow for a bunch of junk in here that we don't care about right now, so the call looks + // something like this: + // JSON_VALUE_ANY(`nested`.`nest`, '$.x', SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL], NULL, SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL], NULL) // by the time it gets here + final List druidExpressions = Expressions.toDruidExpressions( plannerContext, rowSignature, - call.getOperands().subList(0, 2) + call.getOperands().size() > 2 ? call.getOperands().subList(0, 2) : call.getOperands() ); - ColumnType inferredOutputType = ColumnType.STRING; - if (call.getOperands().size() == 7) { - ColumnType maybe = Calcites.getColumnTypeForRelDataType(call.getOperands().get(6).getType()); - if (maybe != null && !ColumnType.UNKNOWN_COMPLEX.equals(maybe)) { - inferredOutputType = maybe; - } - } if (druidExpressions == null || druidExpressions.size() != 2) { return null; @@ -376,10 +540,13 @@ public class NestedDataOperatorConversions final DruidExpression.ExpressionGenerator builder = (args) -> "json_value(" + args.get(0).getExpression() + ",'" + jsonPath + "')"; - if (druidExpressions.get(0).isSimpleExtraction()) { + // STRING is the closest thing we have to ANY, though maybe someday this + // can be replaced with a VARIANT type + final ColumnType columnType = ColumnType.STRING; + if (druidExpressions.get(0).isSimpleExtraction()) { return DruidExpression.ofVirtualColumn( - inferredOutputType, + columnType, builder, ImmutableList.of( DruidExpression.ofColumn(NestedDataComplexTypeSerde.TYPE, druidExpressions.get(0).getDirectColumn()) @@ -395,24 +562,7 @@ public class NestedDataOperatorConversions ) ); } - return DruidExpression.ofExpression(ColumnType.STRING, builder, druidExpressions); - } - } - - // calcite converts JSON_VALUE to JSON_VALUE_ANY so we have to wire that up too... - public static class JsonValueAnyOperatorConversion extends AliasedOperatorConversion - { - private static final String FUNCTION_NAME = StringUtils.toUpperCase("json_value_any"); - - public JsonValueAnyOperatorConversion() - { - super(new JsonValueOperatorConversion(), FUNCTION_NAME); - } - - @Override - public SqlOperator calciteOperator() - { - return SqlStdOperatorTable.JSON_VALUE_ANY; + return DruidExpression.ofExpression(columnType, builder, druidExpressions); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java index de259402bf2..6c6044a672d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java @@ -299,13 +299,13 @@ public class DruidOperatorTable implements SqlOperatorTable private static final List NESTED_DATA_OPERATOR_CONVERSIONS = ImmutableList.builder() - .add(new NestedDataOperatorConversions.GetPathOperatorConversion()) - .add(new NestedDataOperatorConversions.JsonGetPathAliasOperatorConversion()) .add(new NestedDataOperatorConversions.JsonKeysOperatorConversion()) .add(new NestedDataOperatorConversions.JsonPathsOperatorConversion()) .add(new NestedDataOperatorConversions.JsonQueryOperatorConversion()) - .add(new NestedDataOperatorConversions.JsonValueOperatorConversion()) .add(new NestedDataOperatorConversions.JsonValueAnyOperatorConversion()) + .add(new NestedDataOperatorConversions.JsonValueBigintOperatorConversion()) + .add(new NestedDataOperatorConversions.JsonValueDoubleOperatorConversion()) + .add(new NestedDataOperatorConversions.JsonValueVarcharOperatorConversion()) .add(new NestedDataOperatorConversions.JsonObjectOperatorConversion()) .add(new NestedDataOperatorConversions.ToJsonStringOperatorConversion()) .add(new NestedDataOperatorConversions.ParseJsonOperatorConversion()) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/convertlet/DruidConvertletTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/convertlet/DruidConvertletTable.java index 3f5652d9aa4..59ad4ef24f0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/convertlet/DruidConvertletTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/convertlet/DruidConvertletTable.java @@ -28,6 +28,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql2rel.SqlRexConvertlet; import org.apache.calcite.sql2rel.SqlRexConvertletTable; import org.apache.calcite.sql2rel.StandardConvertletTable; +import org.apache.druid.sql.calcite.expression.builtin.NestedDataOperatorConversions; import org.apache.druid.sql.calcite.planner.PlannerContext; import java.util.ArrayList; @@ -44,6 +45,7 @@ public class DruidConvertletTable implements SqlRexConvertletTable ImmutableList.builder() .add(CurrentTimestampAndFriendsConvertletFactory.INSTANCE) .add(TimeInIntervalConvertletFactory.INSTANCE) + .add(NestedDataOperatorConversions.DRUID_JSON_VALUE_CONVERTLET_FACTORY_INSTANCE) .build(); // Operators we don't have standard conversions for, but which can be converted into ones that do by diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index eb4cf5ba7be..e95ba656437 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -239,6 +239,43 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testGroupJsonValueAny() + { + testQuery( + "SELECT " + + "JSON_VALUE_ANY(nest, '$.x'), " + + "SUM(cnt) " + + "FROM druid.nested GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.STRING) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue(), 4L}, + new Object[]{"100", 2L}, + new Object[]{"200", 1L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + @Test public void testGroupByJsonValue() { @@ -350,102 +387,6 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ); } - @Test - public void testGroupByGetPaths() - { - testQuery( - "SELECT " - + "GET_PATH(nest, '.x'), " - + "GET_PATH(nest, '.\"x\"'), " - + "GET_PATH(nest, '.[\"x\"]'), " - + "SUM(cnt) " - + "FROM druid.nested GROUP BY 1, 2, 3", - ImmutableList.of( - GroupByQuery.builder() - .setDataSource(DATA_SOURCE) - .setInterval(querySegmentSpec(Filtration.eternity())) - .setGranularity(Granularities.ALL) - .setVirtualColumns( - new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.STRING) - ) - .setDimensions( - dimensions( - new DefaultDimensionSpec("v0", "d0"), - new DefaultDimensionSpec("v0", "d1"), - new DefaultDimensionSpec("v0", "d2") - ) - ) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) - .setContext(QUERY_CONTEXT_DEFAULT) - .build() - ), - ImmutableList.of( - new Object[]{ - NullHandling.defaultStringValue(), - NullHandling.defaultStringValue(), - NullHandling.defaultStringValue(), - 4L - }, - new Object[]{"100", "100", "100", 2L}, - new Object[]{"200", "200", "200", 1L} - ), - RowSignature.builder() - .add("EXPR$0", ColumnType.STRING) - .add("EXPR$1", ColumnType.STRING) - .add("EXPR$2", ColumnType.STRING) - .add("EXPR$3", ColumnType.LONG) - .build() - ); - } - - @Test - public void testGroupByJsonGetPaths() - { - testQuery( - "SELECT " - + "JSON_GET_PATH(nest, '.x'), " - + "JSON_GET_PATH(nest, '.\"x\"'), " - + "JSON_GET_PATH(nest, '.[\"x\"]'), " - + "SUM(cnt) " - + "FROM druid.nested GROUP BY 1, 2, 3", - ImmutableList.of( - GroupByQuery.builder() - .setDataSource(DATA_SOURCE) - .setInterval(querySegmentSpec(Filtration.eternity())) - .setGranularity(Granularities.ALL) - .setVirtualColumns( - new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.STRING) - ) - .setDimensions( - dimensions( - new DefaultDimensionSpec("v0", "d0"), - new DefaultDimensionSpec("v0", "d1"), - new DefaultDimensionSpec("v0", "d2") - ) - ) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) - .setContext(QUERY_CONTEXT_DEFAULT) - .build() - ), - ImmutableList.of( - new Object[]{ - NullHandling.defaultStringValue(), - NullHandling.defaultStringValue(), - NullHandling.defaultStringValue(), - 4L - }, - new Object[]{"100", "100", "100", 2L}, - new Object[]{"200", "200", "200", 1L} - ), - RowSignature.builder() - .add("EXPR$0", ColumnType.STRING) - .add("EXPR$1", ColumnType.STRING) - .add("EXPR$2", ColumnType.STRING) - .add("EXPR$3", ColumnType.LONG) - .build() - ); - } - @Test public void testGroupByJsonValues() { @@ -2010,6 +1951,57 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testReturningAndSumPathDouble() + { + testQuery( + "SELECT " + + "SUM(JSON_VALUE(nest, '$.x' RETURNING DOUBLE)) " + + "FROM druid.nested", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .virtualColumns(new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.DOUBLE)) + .aggregators(aggregators(new DoubleSumAggregatorFactory("a0", "v0"))) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{400.0} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.DOUBLE) + .build() + ); + } + + @Test + public void testReturningAndSumPathDecimal() + { + testQuery( + "SELECT " + + "SUM(JSON_VALUE(nest, '$.x' RETURNING DECIMAL)) " + + "FROM druid.nested", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .virtualColumns(new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.DOUBLE)) + .aggregators(aggregators(new DoubleSumAggregatorFactory("a0", "v0"))) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{400.0} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.DOUBLE) + .build() + ); + } @Test public void testReturningAndSumPathStrings() @@ -2043,7 +2035,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest cannotVectorize(); testQuery( "SELECT " - + "JSON_KEYS(nester, '.'), " + + "JSON_KEYS(nester, '$'), " + "SUM(cnt) " + "FROM druid.nested GROUP BY 1", ImmutableList.of( @@ -2054,7 +2046,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .setVirtualColumns( new ExpressionVirtualColumn( "v0", - "json_keys(\"nester\",'.')", + "json_keys(\"nester\",'$')", ColumnType.STRING_ARRAY, macroTable ) @@ -2127,7 +2119,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest cannotVectorize(); testQuery( "SELECT " - + "JSON_KEYS(nest, '.'), " + + "JSON_KEYS(nest, '$'), " + "SUM(cnt) " + "FROM druid.nested GROUP BY 1", ImmutableList.of( @@ -2138,7 +2130,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .setVirtualColumns( new ExpressionVirtualColumn( "v0", - "json_keys(\"nest\",'.')", + "json_keys(\"nest\",'$')", ColumnType.STRING_ARRAY, macroTable ) @@ -2253,7 +2245,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest (expected) -> { expected.expect(UnsupportedSQLQueryException.class); expected.expectMessage( - "Cannot use [JSON_VALUE_ANY]: [Bad format, '.array.[1]' is not a valid JSONPath path: must start with '$']"); + "Cannot use [JSON_VALUE_VARCHAR]: [Bad format, '.array.[1]' is not a valid JSONPath path: must start with '$']"); } ); } @@ -2358,6 +2350,54 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testCompositionTyping() + { + testQuery( + "SELECT " + + "JSON_VALUE((JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x' RETURNING BIGINT))), '$.x' RETURNING BIGINT)\n" + + "FROM druid.nested", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + new ExpressionVirtualColumn( + "v0", + "json_value(json_object('x',\"v1\"),'$.x', 'LONG')", + ColumnType.LONG, + macroTable + ), + new NestedFieldVirtualColumn( + "nest", + "v1", + ColumnType.LONG, + null, + false, + "$.x", + false + ) + ) + .columns("v0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + ImmutableList.of( + new Object[]{100L}, + new Object[]{NullHandling.defaultLongValue()}, + new Object[]{200L}, + new Object[]{NullHandling.defaultLongValue()}, + new Object[]{NullHandling.defaultLongValue()}, + new Object[]{100L}, + new Object[]{NullHandling.defaultLongValue()} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.LONG) + .build() + ); + } + @Test public void testToJsonAndParseJson() { @@ -2584,4 +2624,46 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ); } + + @Test + public void testJsonValueUnDocumentedButSupportedOptions() + { + testQuery( + "SELECT " + + "SUM(JSON_VALUE(nest, '$.z' RETURNING BIGINT NULL ON EMPTY NULL ON ERROR)) " + + "FROM druid.nested", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .virtualColumns(new NestedFieldVirtualColumn("nest", "$.z", "v0", ColumnType.LONG)) + .aggregators(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{700L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.LONG) + .build() + ); + } + + @Test + public void testJsonValueUnsupportedOptions() + { + testQueryThrows( + "SELECT " + + "SUM(JSON_VALUE(nest, '$.z' RETURNING BIGINT ERROR ON EMPTY ERROR ON ERROR)) " + + "FROM druid.nested", + exception -> { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage( + "Unsupported JSON_VALUE parameter 'ON EMPTY' defined - please re-issue this query without this argument" + ); + } + ); + } }