json_value adjustments (#12968)

* json_value adjustments
changes:
* native json_value expression now has optional 3rd argument to specify type, which will cast all values to the specified type
* rework how JSON_VALUE is wired up in SQL. Now we are using a custom convertlet to translate JSON_VALUE(... RETURNING type) into dedicated JSON_VALUE_BIGINT, JSON_VALUE_DOUBLE, JSON_VALUE_VARCHAR, JSON_VALUE_ANY instead of using the calcite StandardConvertletTable that wraps JSON_VALUE_ANY in a CAST, so that we preserve the typing of JSON_VALUE to pass down to the native expression as the 3rd argument

* fix json_value_any to be usable by humans too, coverage

* fix bug

* checkstyle

* checkstyle

* review stuff

* validate that options to json_value are the supported options rather than ignore them

* remove more legacy undocumented functions
This commit is contained in:
Clint Wylie 2022-08-27 07:15:47 -07:00 committed by GitHub
parent 7e2371bbde
commit 16f5ac5bd5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 588 additions and 549 deletions

View File

@ -63,7 +63,7 @@ The following built-in functions are available.
|name|description| |name|description|
|----|-----------| |----|-----------|
|cast|cast(expr,'LONG' or 'DOUBLE' or 'STRING' or 'LONG_ARRAY', or 'DOUBLE_ARRAY' or 'STRING_ARRAY') returns expr with specified type. exception can be thrown. Scalar types may be cast to array types and will take the form of a single element list (null will still be null). | |cast|cast(expr,'LONG' or 'DOUBLE' or 'STRING' or 'ARRAY<LONG>', or 'ARRAY<DOUBLE>' or 'ARRAY<STRING>') returns expr with specified type. exception can be thrown. Scalar types may be cast to array types and will take the form of a single element list (null will still be null). |
|if|if(predicate,then,else) returns 'then' if 'predicate' evaluates to a positive number, otherwise it returns 'else' | |if|if(predicate,then,else) returns 'then' if 'predicate' evaluates to a positive number, otherwise it returns 'else' |
|nvl|nvl(expr,expr-for-null) returns 'expr-for-null' if 'expr' is null (or empty string for string type) | |nvl|nvl(expr,expr-for-null) returns 'expr-for-null' if 'expr' is null (or empty string for string type) |
|like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`| |like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`|
@ -232,7 +232,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX<jso
| function | description | | function | description |
|---|---| |---|---|
| json_value(expr, path) | Extract a Druid literal (`STRING`, `LONG`, `DOUBLE`) value from `expr` using JSONPath syntax of `path` | | json_value(expr, path[, type]) | Extract a Druid literal (`STRING`, `LONG`, `DOUBLE`) value from `expr` using JSONPath syntax of `path`. The optional `type` argument can be set to `'LONG'`,`'DOUBLE'` or `'STRING'` to cast values to that type. |
| json_query(expr, path) | Extract a `COMPLEX<json>` value from `expr` using JSONPath syntax of `path` | | json_query(expr, path) | Extract a `COMPLEX<json>` value from `expr` using JSONPath syntax of `path` |
| json_object(expr1, expr2[, expr3, expr4 ...]) | Construct a `COMPLEX<json>` with alternating 'key' and 'value' arguments| | json_object(expr1, expr2[, expr3, expr4 ...]) | Construct a `COMPLEX<json>` with alternating 'key' and 'value' arguments|
| parse_json(expr) | Deserialize a JSON `STRING` into a `COMPLEX<json>`. If the input is not a `STRING` or it is invalid JSON, this function will result in an error.| | parse_json(expr) | Deserialize a JSON `STRING` into a `COMPLEX<json>`. If the input is not a `STRING` or it is invalid JSON, this function will result in an error.|

View File

@ -23,7 +23,6 @@ package org.apache.druid.query.expression;
import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import org.apache.druid.guice.annotations.Json; import org.apache.druid.guice.annotations.Json;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.Expr;
@ -50,9 +49,9 @@ public class NestedDataExpressions
ExpressionType.fromColumnType(NestedDataComplexTypeSerde.TYPE) ExpressionType.fromColumnType(NestedDataComplexTypeSerde.TYPE)
); );
public static class StructExprMacro implements ExprMacroTable.ExprMacro public static class JsonObjectExprMacro implements ExprMacroTable.ExprMacro
{ {
public static final String NAME = "struct"; public static final String NAME = "json_object";
@Override @Override
public String name() public String name()
@ -104,17 +103,6 @@ public class NestedDataExpressions
} }
} }
public static class JsonObjectExprMacro extends StructExprMacro
{
public static final String NAME = "json_object";
@Override
public String name()
{
return NAME;
}
}
public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro
{ {
public static final String NAME = "to_json_string"; public static final String NAME = "to_json_string";
@ -138,7 +126,7 @@ public class NestedDataExpressions
@Override @Override
public Expr apply(List<Expr> args) public Expr apply(List<Expr> args)
{ {
class ToJsonStringExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr final class ToJsonStringExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{ {
public ToJsonStringExpr(List<Expr> args) public ToJsonStringExpr(List<Expr> args)
{ {
@ -203,7 +191,7 @@ public class NestedDataExpressions
@Override @Override
public Expr apply(List<Expr> args) public Expr apply(List<Expr> args)
{ {
class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{ {
public ParseJsonExpr(List<Expr> args) public ParseJsonExpr(List<Expr> args)
{ {
@ -278,7 +266,7 @@ public class NestedDataExpressions
@Override @Override
public Expr apply(List<Expr> args) public Expr apply(List<Expr> args)
{ {
class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{ {
public ParseJsonExpr(List<Expr> args) public ParseJsonExpr(List<Expr> args)
{ {
@ -327,11 +315,9 @@ public class NestedDataExpressions
} }
} }
public static class JsonValueExprMacro implements ExprMacroTable.ExprMacro
public static class GetPathExprMacro implements ExprMacroTable.ExprMacro
{ {
public static final String NAME = "get_path"; public static final String NAME = "json_value";
@Override @Override
public String name() public String name()
@ -342,10 +328,48 @@ public class NestedDataExpressions
@Override @Override
public Expr apply(List<Expr> args) public Expr apply(List<Expr> args)
{ {
final List<NestedPathPart> parts = getArg1PathPartsFromLiteral(name(), args); final List<NestedPathPart> parts = getJsonPathPartsFromLiteral(name(), args.get(1));
class GetPathExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr if (args.size() == 3 && args.get(2).isLiteral()) {
final ExpressionType castTo = ExpressionType.fromString((String) args.get(2).getLiteralValue());
if (castTo == null) {
throw new IAE("Invalid output type: [%s]", args.get(2).getLiteralValue());
}
final class JsonValueCastExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{ {
public GetPathExpr(List<Expr> args) public JsonValueCastExpr(List<Expr> args)
{
super(name(), args);
}
@Override
public ExprEval eval(ObjectBinding bindings)
{
ExprEval input = args.get(0).eval(bindings);
return ExprEval.bestEffortOf(
NestedPathFinder.findLiteral(unwrap(input), parts)
).castTo(castTo);
}
@Override
public Expr visit(Shuttle shuttle)
{
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
return shuttle.visit(new JsonValueCastExpr(newArgs));
}
@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
return castTo;
}
}
return new JsonValueCastExpr(args);
} else {
final class JsonValueExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{
public JsonValueExpr(List<Expr> args)
{ {
super(name(), args); super(name(), args);
} }
@ -363,18 +387,19 @@ public class NestedDataExpressions
public Expr visit(Shuttle shuttle) public Expr visit(Shuttle shuttle)
{ {
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
return shuttle.visit(new GetPathExpr(newArgs)); return shuttle.visit(new JsonValueExpr(newArgs));
} }
@Nullable @Nullable
@Override @Override
public ExpressionType getOutputType(InputBindingInspector inspector) public ExpressionType getOutputType(InputBindingInspector inspector)
{ {
// we cannot infer the output type (well we could say it is 'STRING' right now because is all we support... // we cannot infer output type because there could be anything at the path, and, we lack a proper VARIANT type
return null; return null;
} }
} }
return new GetPathExpr(args); return new JsonValueExpr(args);
}
} }
} }
@ -391,8 +416,8 @@ public class NestedDataExpressions
@Override @Override
public Expr apply(List<Expr> args) public Expr apply(List<Expr> args)
{ {
final List<NestedPathPart> parts = getArg1JsonPathPartsFromLiteral(name(), args); final List<NestedPathPart> parts = getJsonPathPartsFromLiteral(name(), args.get(1));
class JsonQueryExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr final class JsonQueryExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{ {
public JsonQueryExpr(List<Expr> args) public JsonQueryExpr(List<Expr> args)
{ {
@ -428,114 +453,6 @@ public class NestedDataExpressions
} }
} }
public static class JsonValueExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "json_value";
@Override
public String name()
{
return NAME;
}
@Override
public Expr apply(List<Expr> args)
{
final List<NestedPathPart> parts = getArg1JsonPathPartsFromLiteral(name(), args);
class JsonValueExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{
public JsonValueExpr(List<Expr> args)
{
super(name(), args);
}
@Override
public ExprEval eval(ObjectBinding bindings)
{
ExprEval input = args.get(0).eval(bindings);
return ExprEval.bestEffortOf(
NestedPathFinder.findLiteral(unwrap(input), parts)
);
}
@Override
public Expr visit(Shuttle shuttle)
{
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
return shuttle.visit(new JsonValueExpr(newArgs));
}
@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
// we cannot infer the output type (well we could say it is 'STRING' right now because is all we support...
return null;
}
}
return new JsonValueExpr(args);
}
}
public static class ListPathsExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "list_paths";
@Override
public String name()
{
return NAME;
}
@Override
public Expr apply(List<Expr> args)
{
final StructuredDataProcessor processor = new StructuredDataProcessor()
{
@Override
public int processLiteralField(String fieldName, Object fieldValue)
{
// do nothing, we only want the list of fields returned by this processor
return 0;
}
};
class ListPathsExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{
public ListPathsExpr(List<Expr> args)
{
super(name(), args);
}
@Override
public ExprEval eval(ObjectBinding bindings)
{
ExprEval input = args.get(0).eval(bindings);
StructuredDataProcessor.ProcessResults info = processor.processFields(unwrap(input));
return ExprEval.ofType(
ExpressionType.STRING_ARRAY,
ImmutableList.copyOf(info.getLiteralFields())
);
}
@Override
public Expr visit(Shuttle shuttle)
{
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
return shuttle.visit(new ListPathsExpr(newArgs));
}
@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
return ExpressionType.STRING_ARRAY;
}
}
return new ListPathsExpr(args);
}
}
public static class JsonPathsExprMacro implements ExprMacroTable.ExprMacro public static class JsonPathsExprMacro implements ExprMacroTable.ExprMacro
{ {
public static final String NAME = "json_paths"; public static final String NAME = "json_paths";
@ -559,7 +476,7 @@ public class NestedDataExpressions
} }
}; };
class JsonPathsExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr final class JsonPathsExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{ {
public JsonPathsExpr(List<Expr> args) public JsonPathsExpr(List<Expr> args)
{ {
@ -600,9 +517,9 @@ public class NestedDataExpressions
} }
} }
public static class ListKeysExprMacro implements ExprMacroTable.ExprMacro public static class JsonKeysExprMacro implements ExprMacroTable.ExprMacro
{ {
public static final String NAME = "list_keys"; public static final String NAME = "json_keys";
@Override @Override
public String name() public String name()
@ -613,10 +530,10 @@ public class NestedDataExpressions
@Override @Override
public Expr apply(List<Expr> args) public Expr apply(List<Expr> args)
{ {
final List<NestedPathPart> parts = getArg1PathPartsFromLiteral(name(), args); final List<NestedPathPart> parts = getJsonPathPartsFromLiteral(name(), args.get(1));
class ListKeysExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr final class JsonKeysExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{ {
public ListKeysExpr(List<Expr> args) public JsonKeysExpr(List<Expr> args)
{ {
super(name(), args); super(name(), args);
} }
@ -636,7 +553,7 @@ public class NestedDataExpressions
public Expr visit(Shuttle shuttle) public Expr visit(Shuttle shuttle)
{ {
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList()); List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
return shuttle.visit(new ListKeysExpr(newArgs)); return shuttle.visit(new JsonKeysExpr(newArgs));
} }
@Nullable @Nullable
@ -646,18 +563,7 @@ public class NestedDataExpressions
return ExpressionType.STRING_ARRAY; return ExpressionType.STRING_ARRAY;
} }
} }
return new ListKeysExpr(args); return new JsonKeysExpr(args);
}
}
public static class JsonKeysExprMacro extends ListKeysExprMacro
{
public static final String NAME = "json_keys";
@Override
public String name()
{
return NAME;
} }
} }
@ -676,39 +582,18 @@ public class NestedDataExpressions
} }
static List<NestedPathPart> getArg1PathPartsFromLiteral(String fnName, List<Expr> args) static List<NestedPathPart> getJsonPathPartsFromLiteral(String fnName, Expr arg)
{ {
if (!(args.get(1).isLiteral() && args.get(1).getLiteralValue() instanceof String)) { if (!(arg.isLiteral() && arg.getLiteralValue() instanceof String)) {
throw new IAE( throw new IAE(
"Function[%s] second argument [%s] must be a literal [%s] value", "Function[%s] second argument [%s] must be a literal [%s] value",
fnName, fnName,
args.get(1).stringify(), arg.stringify(),
ExpressionType.STRING
);
}
final String path = (String) args.get(1).getLiteralValue();
List<NestedPathPart> parts;
try {
parts = NestedPathFinder.parseJsonPath(path);
}
catch (IllegalArgumentException iae) {
parts = NestedPathFinder.parseJqPath(path);
}
return parts;
}
static List<NestedPathPart> getArg1JsonPathPartsFromLiteral(String fnName, List<Expr> args)
{
if (!(args.get(1).isLiteral() && args.get(1).getLiteralValue() instanceof String)) {
throw new IAE(
"Function[%s] second argument [%s] must be a literal [%s] value",
fnName,
args.get(1).stringify(),
ExpressionType.STRING ExpressionType.STRING
); );
} }
final List<NestedPathPart> parts = NestedPathFinder.parseJsonPath( final List<NestedPathPart> parts = NestedPathFinder.parseJsonPath(
(String) args.get(1).getLiteralValue() (String) arg.getLiteralValue()
); );
return parts; return parts;
} }

View File

@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Supplier; import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.Pair;
@ -45,10 +46,6 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper();
private static final ExprMacroTable MACRO_TABLE = new ExprMacroTable( private static final ExprMacroTable MACRO_TABLE = new ExprMacroTable(
ImmutableList.of( ImmutableList.of(
new NestedDataExpressions.StructExprMacro(),
new NestedDataExpressions.GetPathExprMacro(),
new NestedDataExpressions.ListKeysExprMacro(),
new NestedDataExpressions.ListPathsExprMacro(),
new NestedDataExpressions.JsonPathsExprMacro(), new NestedDataExpressions.JsonPathsExprMacro(),
new NestedDataExpressions.JsonKeysExprMacro(), new NestedDataExpressions.JsonKeysExprMacro(),
new NestedDataExpressions.JsonObjectExprMacro(), new NestedDataExpressions.JsonObjectExprMacro(),
@ -84,20 +81,6 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
.build() .build()
); );
@Test
public void testStructExpression()
{
Expr expr = Parser.parse("struct('x',100,'y',200,'z',300)", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals(NEST, eval.value());
expr = Parser.parse("struct('x',array('a','b','c'),'y',struct('a','hello','b','world'))", MACRO_TABLE);
eval = expr.eval(inputBindings);
// decompose because of array equals
Assert.assertArrayEquals(new Object[]{"a", "b", "c"}, (Object[]) ((Map) eval.value()).get("x"));
Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y"));
}
@Test @Test
public void testJsonObjectExpression() public void testJsonObjectExpression()
{ {
@ -112,73 +95,30 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y")); Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y"));
} }
@Test
public void testListKeysExpression()
{
Expr expr = Parser.parse("list_keys(nest, '.')", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[]{"x", "y", "z"}, (Object[]) eval.value());
expr = Parser.parse("list_keys(nester, '.x')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[]{"0", "1", "2"}, (Object[]) eval.value());
expr = Parser.parse("list_keys(nester, '.y')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[]{"a", "b"}, (Object[]) eval.value());
expr = Parser.parse("list_keys(nester, '.x.a')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertNull(eval.value());
expr = Parser.parse("list_keys(nester, '.x.a.b')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertNull(eval.value());
}
@Test
public void testListPathsExpression()
{
Expr expr = Parser.parse("list_paths(nest)", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[]{".\"y\"", ".\"z\"", ".\"x\""}, (Object[]) eval.value());
expr = Parser.parse("list_paths(nester)", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[]{".\"x\"[0]", ".\"x\"[1]", ".\"x\"[2]", ".\"y\".\"b\"", ".\"y\".\"a\""}, (Object[]) eval.value());
}
@Test @Test
public void testJsonKeysExpression() public void testJsonKeysExpression()
{ {
Expr expr = Parser.parse("json_keys(nest, '.')", MACRO_TABLE); Expr expr = Parser.parse("json_keys(nest, '$.')", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings); ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[]{"x", "y", "z"}, (Object[]) eval.value()); Assert.assertArrayEquals(new Object[]{"x", "y", "z"}, (Object[]) eval.value());
expr = Parser.parse("json_keys(nester, '.x')", MACRO_TABLE); expr = Parser.parse("json_keys(nester, '$.x')", MACRO_TABLE);
eval = expr.eval(inputBindings); eval = expr.eval(inputBindings);
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[]{"0", "1", "2"}, (Object[]) eval.value()); Assert.assertArrayEquals(new Object[]{"0", "1", "2"}, (Object[]) eval.value());
expr = Parser.parse("json_keys(nester, '.y')", MACRO_TABLE); expr = Parser.parse("json_keys(nester, '$.y')", MACRO_TABLE);
eval = expr.eval(inputBindings); eval = expr.eval(inputBindings);
Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type());
Assert.assertArrayEquals(new Object[]{"a", "b"}, (Object[]) eval.value()); Assert.assertArrayEquals(new Object[]{"a", "b"}, (Object[]) eval.value());
expr = Parser.parse("json_keys(nester, '.x.a')", MACRO_TABLE); expr = Parser.parse("json_keys(nester, '$.x.a')", MACRO_TABLE);
eval = expr.eval(inputBindings); eval = expr.eval(inputBindings);
Assert.assertNull(eval.value()); Assert.assertNull(eval.value());
expr = Parser.parse("json_keys(nester, '.x.a.b')", MACRO_TABLE); expr = Parser.parse("json_keys(nester, '$.x.a.b')", MACRO_TABLE);
eval = expr.eval(inputBindings); eval = expr.eval(inputBindings);
Assert.assertNull(eval.value()); Assert.assertNull(eval.value());
} }
@ -197,45 +137,6 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
Assert.assertArrayEquals(new Object[]{"$.x[0]", "$.x[1]", "$.x[2]", "$.y.b", "$.y.a"}, (Object[]) eval.value()); Assert.assertArrayEquals(new Object[]{"$.x[0]", "$.x[1]", "$.x[2]", "$.y.b", "$.y.a"}, (Object[]) eval.value());
} }
@Test
public void testGetPathExpression()
{
Expr expr = Parser.parse("get_path(nest, '.x')", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals(100L, eval.value());
Assert.assertEquals(ExpressionType.LONG, eval.type());
expr = Parser.parse("get_path(nester, '.x')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertNull(eval.value());
expr = Parser.parse("get_path(nester, '.x[1]')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("b", eval.value());
Assert.assertEquals(ExpressionType.STRING, eval.type());
expr = Parser.parse("get_path(nester, '.x[23]')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertNull(eval.value());
expr = Parser.parse("get_path(nester, '.x[1].b')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertNull(eval.value());
expr = Parser.parse("get_path(nester, '.y[1]')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertNull(eval.value());
expr = Parser.parse("get_path(nester, '.y.a')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("hello", eval.value());
Assert.assertEquals(ExpressionType.STRING, eval.type());
expr = Parser.parse("get_path(nester, '.y.a.b.c[12]')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertNull(eval.value());
}
@Test @Test
public void testJsonValueExpression() public void testJsonValueExpression()
{ {
@ -270,6 +171,11 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
Assert.assertEquals("hello", eval.value()); Assert.assertEquals("hello", eval.value());
Assert.assertEquals(ExpressionType.STRING, eval.type()); Assert.assertEquals(ExpressionType.STRING, eval.type());
expr = Parser.parse("json_value(nester, '$.y.a', 'LONG')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(NullHandling.defaultLongValue(), eval.value());
Assert.assertEquals(ExpressionType.LONG, eval.type());
expr = Parser.parse("json_value(nester, '$.y.a.b.c[12]')", MACRO_TABLE); expr = Parser.parse("json_value(nester, '$.y.a.b.c[12]')", MACRO_TABLE);
eval = expr.eval(inputBindings); eval = expr.eval(inputBindings);
Assert.assertNull(eval.value()); Assert.assertNull(eval.value());
@ -278,6 +184,26 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
eval = expr.eval(inputBindings); eval = expr.eval(inputBindings);
Assert.assertEquals(1234L, eval.value()); Assert.assertEquals(1234L, eval.value());
Assert.assertEquals(ExpressionType.LONG, eval.type()); Assert.assertEquals(ExpressionType.LONG, eval.type());
expr = Parser.parse("json_value(long, '$', 'STRING')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("1234", eval.value());
Assert.assertEquals(ExpressionType.STRING, eval.type());
expr = Parser.parse("json_value(nest, '$.x')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(100L, eval.value());
Assert.assertEquals(ExpressionType.LONG, eval.type());
expr = Parser.parse("json_value(nest, '$.x', 'DOUBLE')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(100.0, eval.value());
Assert.assertEquals(ExpressionType.DOUBLE, eval.type());
expr = Parser.parse("json_value(nest, '$.x', 'STRING')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("100", eval.value());
Assert.assertEquals(ExpressionType.STRING, eval.type());
} }
@Test @Test

View File

@ -56,8 +56,6 @@ public class TestExprMacroTable extends ExprMacroTable
new HyperUniqueExpressions.HllEstimateExprMacro(), new HyperUniqueExpressions.HllEstimateExprMacro(),
new HyperUniqueExpressions.HllRoundEstimateExprMacro(), new HyperUniqueExpressions.HllRoundEstimateExprMacro(),
new NestedDataExpressions.JsonObjectExprMacro(), new NestedDataExpressions.JsonObjectExprMacro(),
new NestedDataExpressions.ListKeysExprMacro(),
new NestedDataExpressions.ListPathsExprMacro(),
new NestedDataExpressions.JsonKeysExprMacro(), new NestedDataExpressions.JsonKeysExprMacro(),
new NestedDataExpressions.JsonPathsExprMacro(), new NestedDataExpressions.JsonPathsExprMacro(),
new NestedDataExpressions.JsonValueExprMacro(), new NestedDataExpressions.JsonValueExprMacro(),

View File

@ -70,11 +70,7 @@ public class ExpressionModule implements Module
.add(HyperUniqueExpressions.HllAddExprMacro.class) .add(HyperUniqueExpressions.HllAddExprMacro.class)
.add(HyperUniqueExpressions.HllEstimateExprMacro.class) .add(HyperUniqueExpressions.HllEstimateExprMacro.class)
.add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class) .add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class)
.add(NestedDataExpressions.StructExprMacro.class)
.add(NestedDataExpressions.JsonObjectExprMacro.class) .add(NestedDataExpressions.JsonObjectExprMacro.class)
.add(NestedDataExpressions.GetPathExprMacro.class)
.add(NestedDataExpressions.ListKeysExprMacro.class)
.add(NestedDataExpressions.ListPathsExprMacro.class)
.add(NestedDataExpressions.JsonKeysExprMacro.class) .add(NestedDataExpressions.JsonKeysExprMacro.class)
.add(NestedDataExpressions.JsonPathsExprMacro.class) .add(NestedDataExpressions.JsonPathsExprMacro.class)
.add(NestedDataExpressions.JsonValueExprMacro.class) .add(NestedDataExpressions.JsonValueExprMacro.class)

View File

@ -19,19 +19,27 @@
package org.apache.druid.sql.calcite.expression.builtin; package org.apache.druid.sql.calcite.expression.builtin;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlDataTypeSpec;
import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlOperandCountRanges; import org.apache.calcite.sql.type.SqlOperandCountRanges;
import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.type.SqlTypeTransforms;
import org.apache.calcite.sql2rel.SqlRexConvertlet;
import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.InputBindings; import org.apache.druid.math.expr.InputBindings;
@ -42,123 +50,30 @@ import org.apache.druid.segment.nested.NestedDataComplexTypeSerde;
import org.apache.druid.segment.nested.NestedPathFinder; import org.apache.druid.segment.nested.NestedPathFinder;
import org.apache.druid.segment.nested.NestedPathPart; import org.apache.druid.segment.nested.NestedPathPart;
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
import org.apache.druid.sql.calcite.expression.AliasedOperatorConversion;
import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions; import org.apache.druid.sql.calcite.expression.Expressions;
import org.apache.druid.sql.calcite.expression.OperatorConversions; import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.planner.UnsupportedSQLQueryException; import org.apache.druid.sql.calcite.planner.UnsupportedSQLQueryException;
import org.apache.druid.sql.calcite.planner.convertlet.DruidConvertletFactory;
import org.apache.druid.sql.calcite.table.RowSignatures; import org.apache.druid.sql.calcite.table.RowSignatures;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List; import java.util.List;
public class NestedDataOperatorConversions public class NestedDataOperatorConversions
{ {
public static final DruidJsonValueConvertletFactory DRUID_JSON_VALUE_CONVERTLET_FACTORY_INSTANCE =
new DruidJsonValueConvertletFactory();
public static final SqlReturnTypeInference NESTED_RETURN_TYPE_INFERENCE = opBinding -> RowSignatures.makeComplexType( public static final SqlReturnTypeInference NESTED_RETURN_TYPE_INFERENCE = opBinding -> RowSignatures.makeComplexType(
opBinding.getTypeFactory(), opBinding.getTypeFactory(),
NestedDataComplexTypeSerde.TYPE, NestedDataComplexTypeSerde.TYPE,
true true
); );
public static class GetPathOperatorConversion implements SqlOperatorConversion
{
private static final String FUNCTION_NAME = StringUtils.toUpperCase("get_path");
private static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder(FUNCTION_NAME)
.operandTypeChecker(
OperandTypes.sequence(
"(expr,path)",
OperandTypes.family(SqlTypeFamily.ANY),
OperandTypes.family(SqlTypeFamily.STRING)
)
)
.returnTypeCascadeNullable(SqlTypeName.VARCHAR)
.functionCategory(SqlFunctionCategory.USER_DEFINED_FUNCTION)
.build();
@Override
public SqlOperator calciteOperator()
{
return SQL_FUNCTION;
}
@Nullable
@Override
public DruidExpression toDruidExpression(
PlannerContext plannerContext,
RowSignature rowSignature,
RexNode rexNode
)
{
final RexCall call = (RexCall) rexNode;
final List<DruidExpression> druidExpressions = Expressions.toDruidExpressions(
plannerContext,
rowSignature,
call.getOperands()
);
if (druidExpressions == null || druidExpressions.size() != 2) {
return null;
}
final Expr pathExpr = Parser.parse(druidExpressions.get(1).getExpression(), plannerContext.getExprMacroTable());
if (!pathExpr.isLiteral()) {
return null;
}
// pre-normalize path so that the same expressions with different jq syntax are collapsed
final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value();
final List<NestedPathPart> parts;
try {
parts = NestedPathFinder.parseJqPath(path);
}
catch (IllegalArgumentException iae) {
throw new UnsupportedSQLQueryException(
"Cannot use [%s]: [%s]",
call.getOperator().getName(),
iae.getMessage()
);
}
final String normalized = NestedPathFinder.toNormalizedJqPath(parts);
if (druidExpressions.get(0).isSimpleExtraction()) {
return DruidExpression.ofVirtualColumn(
Calcites.getColumnTypeForRelDataType(call.getType()),
(args) -> "get_path(" + args.get(0).getExpression() + ",'" + normalized + "')",
ImmutableList.of(
DruidExpression.ofColumn(NestedDataComplexTypeSerde.TYPE, druidExpressions.get(0).getDirectColumn())
),
(name, outputType, expression, macroTable) -> new NestedFieldVirtualColumn(
druidExpressions.get(0).getDirectColumn(),
name,
outputType,
parts,
false,
null,
null
)
);
}
throw new UnsupportedSQLQueryException(
"Cannot use [%s] on expression input: [%s]",
call.getOperator().getName(),
druidExpressions.get(0).getExpression()
);
}
}
public static class JsonGetPathAliasOperatorConversion extends AliasedOperatorConversion
{
public JsonGetPathAliasOperatorConversion()
{
super(new GetPathOperatorConversion(), StringUtils.toUpperCase("json_get_path"));
}
}
public static class JsonPathsOperatorConversion implements SqlOperatorConversion public static class JsonPathsOperatorConversion implements SqlOperatorConversion
{ {
private static final SqlFunction SQL_FUNCTION = OperatorConversions private static final SqlFunction SQL_FUNCTION = OperatorConversions
@ -316,12 +231,266 @@ public class NestedDataOperatorConversions
} }
} }
public static class JsonValueOperatorConversion implements SqlOperatorConversion
/**
* The {@link org.apache.calcite.sql2rel.StandardConvertletTable} converts json_value(.. RETURNING type) into
* cast(json_value_any(..), type).
*
* This is not that useful for us, so we have our own convertlet, to translate into specialized operators such
* as {@link JsonValueBigintOperatorConversion}, {@link JsonValueDoubleOperatorConversion}, or
* {@link JsonValueVarcharOperatorConversion}, before falling back to {@link JsonValueAnyOperatorConversion}.
*
* This convertlet still always wraps the function in a {@link SqlStdOperatorTable#CAST}, to smooth out type
* mismatches, such as VARCHAR(2000) vs VARCHAR or whatever else various type checkers like to complain about not
* exactly matching.
*/
public static class DruidJsonValueConvertletFactory implements DruidConvertletFactory
{ {
@Override
public SqlRexConvertlet createConvertlet(PlannerContext plannerContext)
{
return (cx, call) -> {
// we don't support modifying the behavior to be anything other than 'NULL ON EMPTY' / 'NULL ON ERROR'
Preconditions.checkArgument(
"SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL]".equals(call.operand(2).toString()),
"Unsupported JSON_VALUE parameter 'ON EMPTY' defined - please re-issue this query without this argument"
);
Preconditions.checkArgument(
"NULL".equals(call.operand(3).toString()),
"Unsupported JSON_VALUE parameter 'ON EMPTY' defined - please re-issue this query without this argument"
);
Preconditions.checkArgument(
"SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL]".equals(call.operand(4).toString()),
"Unsupported JSON_VALUE parameter 'ON ERROR' defined - please re-issue this query without this argument"
);
Preconditions.checkArgument(
"NULL".equals(call.operand(5).toString()),
"Unsupported JSON_VALUE parameter 'ON ERROR' defined - please re-issue this query without this argument"
);
SqlDataTypeSpec dataType = call.operand(6);
RelDataType sqlType = dataType.deriveType(cx.getValidator());
SqlNode rewrite;
if (SqlTypeName.INT_TYPES.contains(sqlType.getSqlTypeName())) {
rewrite = JsonValueBigintOperatorConversion.FUNCTION.createCall(
SqlParserPos.ZERO,
call.operand(0),
call.operand(1)
);
} else if (SqlTypeName.APPROX_TYPES.contains(sqlType.getSqlTypeName())) {
rewrite = JsonValueDoubleOperatorConversion.FUNCTION.createCall(
SqlParserPos.ZERO,
call.operand(0),
call.operand(1)
);
} else if (SqlTypeName.STRING_TYPES.contains(sqlType.getSqlTypeName())) {
rewrite = JsonValueVarcharOperatorConversion.FUNCTION.createCall(
SqlParserPos.ZERO,
call.operand(0),
call.operand(1)
);
} else {
// fallback to json_value_any, e.g. the 'standard' convertlet.
rewrite = JsonValueAnyOperatorConversion.FUNCTION.createCall(
SqlParserPos.ZERO,
call.operand(0),
call.operand(1)
);
}
// always cast anyway, to prevent haters from complaining that VARCHAR doesn't match VARCHAR(2000)
SqlNode caster = SqlStdOperatorTable.CAST.createCall(
SqlParserPos.ZERO,
rewrite,
call.operand(6)
);
return cx.convertExpression(caster);
};
}
@Override
public List<SqlOperator> operators()
{
return Collections.singletonList(SqlStdOperatorTable.JSON_VALUE);
}
}
public abstract static class JsonValueReturningTypeOperatorConversion implements SqlOperatorConversion
{
private final SqlFunction function;
private final ColumnType druidType;
public JsonValueReturningTypeOperatorConversion(SqlFunction function, ColumnType druidType)
{
this.druidType = druidType;
this.function = function;
}
@Override @Override
public SqlOperator calciteOperator() public SqlOperator calciteOperator()
{ {
return SqlStdOperatorTable.JSON_VALUE; return function;
}
@Nullable
@Override
public DruidExpression toDruidExpression(
PlannerContext plannerContext,
RowSignature rowSignature,
RexNode rexNode
)
{
final RexCall call = (RexCall) rexNode;
final List<DruidExpression> druidExpressions = Expressions.toDruidExpressions(
plannerContext,
rowSignature,
call.getOperands()
);
if (druidExpressions == null || druidExpressions.size() != 2) {
return null;
}
final Expr pathExpr = Parser.parse(druidExpressions.get(1).getExpression(), plannerContext.getExprMacroTable());
if (!pathExpr.isLiteral()) {
return null;
}
// pre-normalize path so that the same expressions with different jq syntax are collapsed
final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value();
final List<NestedPathPart> parts;
try {
parts = NestedPathFinder.parseJsonPath(path);
}
catch (IllegalArgumentException iae) {
throw new UnsupportedSQLQueryException(
"Cannot use [%s]: [%s]",
call.getOperator().getName(),
iae.getMessage()
);
}
final String jsonPath = NestedPathFinder.toNormalizedJsonPath(parts);
final DruidExpression.ExpressionGenerator builder = (args) ->
"json_value(" + args.get(0).getExpression() + ",'" + jsonPath + "', '" + druidType.asTypeString() + "')";
if (druidExpressions.get(0).isSimpleExtraction()) {
return DruidExpression.ofVirtualColumn(
druidType,
builder,
ImmutableList.of(
DruidExpression.ofColumn(NestedDataComplexTypeSerde.TYPE, druidExpressions.get(0).getDirectColumn())
),
(name, outputType, expression, macroTable) -> new NestedFieldVirtualColumn(
druidExpressions.get(0).getDirectColumn(),
name,
outputType,
parts,
false,
null,
null
)
);
}
return DruidExpression.ofExpression(druidType, builder, druidExpressions);
}
static SqlFunction buildFunction(String functionName, SqlTypeName typeName)
{
return OperatorConversions.operatorBuilder(functionName)
.operandTypeChecker(
OperandTypes.sequence(
"(expr,path)",
OperandTypes.family(SqlTypeFamily.ANY),
OperandTypes.family(SqlTypeFamily.STRING)
)
)
.returnTypeInference(
ReturnTypes.cascade(
opBinding -> opBinding.getTypeFactory().createSqlType(typeName),
SqlTypeTransforms.FORCE_NULLABLE
)
)
.functionCategory(SqlFunctionCategory.USER_DEFINED_FUNCTION)
.build();
}
}
public static class JsonValueBigintOperatorConversion extends JsonValueReturningTypeOperatorConversion
{
private static final SqlFunction FUNCTION = buildFunction("JSON_VALUE_BIGINT", SqlTypeName.BIGINT);
public JsonValueBigintOperatorConversion()
{
super(FUNCTION, ColumnType.LONG);
}
}
public static class JsonValueDoubleOperatorConversion extends JsonValueReturningTypeOperatorConversion
{
private static final SqlFunction FUNCTION = buildFunction("JSON_VALUE_DOUBLE", SqlTypeName.DOUBLE);
public JsonValueDoubleOperatorConversion()
{
super(FUNCTION, ColumnType.DOUBLE);
}
}
public static class JsonValueVarcharOperatorConversion extends JsonValueReturningTypeOperatorConversion
{
private static final SqlFunction FUNCTION = buildFunction("JSON_VALUE_VARCHAR", SqlTypeName.VARCHAR);
public JsonValueVarcharOperatorConversion()
{
super(FUNCTION, ColumnType.STRING);
}
}
public static class JsonValueAnyOperatorConversion implements SqlOperatorConversion
{
private static final SqlFunction FUNCTION =
OperatorConversions.operatorBuilder("JSON_VALUE_ANY")
.operandTypeChecker(
OperandTypes.or(
OperandTypes.sequence(
"(expr,path)",
OperandTypes.family(SqlTypeFamily.ANY),
OperandTypes.family(SqlTypeFamily.STRING)
),
OperandTypes.family(
SqlTypeFamily.ANY,
SqlTypeFamily.CHARACTER,
SqlTypeFamily.ANY,
SqlTypeFamily.ANY,
SqlTypeFamily.ANY,
SqlTypeFamily.ANY,
SqlTypeFamily.ANY
)
)
)
.operandTypeInference((callBinding, returnType, operandTypes) -> {
RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
if (operandTypes.length > 5) {
operandTypes[3] = typeFactory.createSqlType(SqlTypeName.ANY);
operandTypes[5] = typeFactory.createSqlType(SqlTypeName.ANY);
}
})
.returnTypeInference(
ReturnTypes.cascade(
opBinding -> opBinding.getTypeFactory().createTypeWithNullability(
// STRING is the closest thing we have to an ANY type
// however, this should really be using SqlTypeName.ANY.. someday
opBinding.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
true
),
SqlTypeTransforms.FORCE_NULLABLE
)
)
.functionCategory(SqlFunctionCategory.SYSTEM)
.build();
@Override
public SqlOperator calciteOperator()
{
return FUNCTION;
} }
@Nullable @Nullable
@ -334,22 +503,17 @@ public class NestedDataOperatorConversions
{ {
final RexCall call = (RexCall) rexNode; final RexCall call = (RexCall) rexNode;
// calcite puts a bunch of junk in here so the call looks something like // calcite parser can allow for a bunch of junk in here that we don't care about right now, so the call looks
// JSON_VALUE(`nested`.`nest`, '$.x', SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL], NULL, SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL], NULL, VARCHAR(2000)) // something like this:
// JSON_VALUE_ANY(`nested`.`nest`, '$.x', SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL], NULL, SQLJSONVALUEEMPTYORERRORBEHAVIOR[NULL], NULL)
// by the time it gets here // by the time it gets here
final List<DruidExpression> druidExpressions = Expressions.toDruidExpressions( final List<DruidExpression> druidExpressions = Expressions.toDruidExpressions(
plannerContext, plannerContext,
rowSignature, rowSignature,
call.getOperands().subList(0, 2) call.getOperands().size() > 2 ? call.getOperands().subList(0, 2) : call.getOperands()
); );
ColumnType inferredOutputType = ColumnType.STRING;
if (call.getOperands().size() == 7) {
ColumnType maybe = Calcites.getColumnTypeForRelDataType(call.getOperands().get(6).getType());
if (maybe != null && !ColumnType.UNKNOWN_COMPLEX.equals(maybe)) {
inferredOutputType = maybe;
}
}
if (druidExpressions == null || druidExpressions.size() != 2) { if (druidExpressions == null || druidExpressions.size() != 2) {
return null; return null;
@ -376,10 +540,13 @@ public class NestedDataOperatorConversions
final DruidExpression.ExpressionGenerator builder = (args) -> final DruidExpression.ExpressionGenerator builder = (args) ->
"json_value(" + args.get(0).getExpression() + ",'" + jsonPath + "')"; "json_value(" + args.get(0).getExpression() + ",'" + jsonPath + "')";
if (druidExpressions.get(0).isSimpleExtraction()) { // STRING is the closest thing we have to ANY, though maybe someday this
// can be replaced with a VARIANT type
final ColumnType columnType = ColumnType.STRING;
if (druidExpressions.get(0).isSimpleExtraction()) {
return DruidExpression.ofVirtualColumn( return DruidExpression.ofVirtualColumn(
inferredOutputType, columnType,
builder, builder,
ImmutableList.of( ImmutableList.of(
DruidExpression.ofColumn(NestedDataComplexTypeSerde.TYPE, druidExpressions.get(0).getDirectColumn()) DruidExpression.ofColumn(NestedDataComplexTypeSerde.TYPE, druidExpressions.get(0).getDirectColumn())
@ -395,24 +562,7 @@ public class NestedDataOperatorConversions
) )
); );
} }
return DruidExpression.ofExpression(ColumnType.STRING, builder, druidExpressions); return DruidExpression.ofExpression(columnType, builder, druidExpressions);
}
}
// calcite converts JSON_VALUE to JSON_VALUE_ANY so we have to wire that up too...
public static class JsonValueAnyOperatorConversion extends AliasedOperatorConversion
{
private static final String FUNCTION_NAME = StringUtils.toUpperCase("json_value_any");
public JsonValueAnyOperatorConversion()
{
super(new JsonValueOperatorConversion(), FUNCTION_NAME);
}
@Override
public SqlOperator calciteOperator()
{
return SqlStdOperatorTable.JSON_VALUE_ANY;
} }
} }

View File

@ -299,13 +299,13 @@ public class DruidOperatorTable implements SqlOperatorTable
private static final List<SqlOperatorConversion> NESTED_DATA_OPERATOR_CONVERSIONS = private static final List<SqlOperatorConversion> NESTED_DATA_OPERATOR_CONVERSIONS =
ImmutableList.<SqlOperatorConversion>builder() ImmutableList.<SqlOperatorConversion>builder()
.add(new NestedDataOperatorConversions.GetPathOperatorConversion())
.add(new NestedDataOperatorConversions.JsonGetPathAliasOperatorConversion())
.add(new NestedDataOperatorConversions.JsonKeysOperatorConversion()) .add(new NestedDataOperatorConversions.JsonKeysOperatorConversion())
.add(new NestedDataOperatorConversions.JsonPathsOperatorConversion()) .add(new NestedDataOperatorConversions.JsonPathsOperatorConversion())
.add(new NestedDataOperatorConversions.JsonQueryOperatorConversion()) .add(new NestedDataOperatorConversions.JsonQueryOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueAnyOperatorConversion()) .add(new NestedDataOperatorConversions.JsonValueAnyOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueBigintOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueDoubleOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueVarcharOperatorConversion())
.add(new NestedDataOperatorConversions.JsonObjectOperatorConversion()) .add(new NestedDataOperatorConversions.JsonObjectOperatorConversion())
.add(new NestedDataOperatorConversions.ToJsonStringOperatorConversion()) .add(new NestedDataOperatorConversions.ToJsonStringOperatorConversion())
.add(new NestedDataOperatorConversions.ParseJsonOperatorConversion()) .add(new NestedDataOperatorConversions.ParseJsonOperatorConversion())

View File

@ -28,6 +28,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql2rel.SqlRexConvertlet; import org.apache.calcite.sql2rel.SqlRexConvertlet;
import org.apache.calcite.sql2rel.SqlRexConvertletTable; import org.apache.calcite.sql2rel.SqlRexConvertletTable;
import org.apache.calcite.sql2rel.StandardConvertletTable; import org.apache.calcite.sql2rel.StandardConvertletTable;
import org.apache.druid.sql.calcite.expression.builtin.NestedDataOperatorConversions;
import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.planner.PlannerContext;
import java.util.ArrayList; import java.util.ArrayList;
@ -44,6 +45,7 @@ public class DruidConvertletTable implements SqlRexConvertletTable
ImmutableList.<DruidConvertletFactory>builder() ImmutableList.<DruidConvertletFactory>builder()
.add(CurrentTimestampAndFriendsConvertletFactory.INSTANCE) .add(CurrentTimestampAndFriendsConvertletFactory.INSTANCE)
.add(TimeInIntervalConvertletFactory.INSTANCE) .add(TimeInIntervalConvertletFactory.INSTANCE)
.add(NestedDataOperatorConversions.DRUID_JSON_VALUE_CONVERTLET_FACTORY_INSTANCE)
.build(); .build();
// Operators we don't have standard conversions for, but which can be converted into ones that do by // Operators we don't have standard conversions for, but which can be converted into ones that do by

View File

@ -239,6 +239,43 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
); );
} }
@Test
public void testGroupJsonValueAny()
{
testQuery(
"SELECT "
+ "JSON_VALUE_ANY(nest, '$.x'), "
+ "SUM(cnt) "
+ "FROM druid.nested GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.STRING)
)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0")
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{NullHandling.defaultStringValue(), 4L},
new Object[]{"100", 2L},
new Object[]{"200", 1L}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.STRING)
.add("EXPR$1", ColumnType.LONG)
.build()
);
}
@Test @Test
public void testGroupByJsonValue() public void testGroupByJsonValue()
{ {
@ -350,102 +387,6 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
); );
} }
@Test
public void testGroupByGetPaths()
{
testQuery(
"SELECT "
+ "GET_PATH(nest, '.x'), "
+ "GET_PATH(nest, '.\"x\"'), "
+ "GET_PATH(nest, '.[\"x\"]'), "
+ "SUM(cnt) "
+ "FROM druid.nested GROUP BY 1, 2, 3",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.STRING)
)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0"),
new DefaultDimensionSpec("v0", "d1"),
new DefaultDimensionSpec("v0", "d2")
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{
NullHandling.defaultStringValue(),
NullHandling.defaultStringValue(),
NullHandling.defaultStringValue(),
4L
},
new Object[]{"100", "100", "100", 2L},
new Object[]{"200", "200", "200", 1L}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.STRING)
.add("EXPR$1", ColumnType.STRING)
.add("EXPR$2", ColumnType.STRING)
.add("EXPR$3", ColumnType.LONG)
.build()
);
}
@Test
public void testGroupByJsonGetPaths()
{
testQuery(
"SELECT "
+ "JSON_GET_PATH(nest, '.x'), "
+ "JSON_GET_PATH(nest, '.\"x\"'), "
+ "JSON_GET_PATH(nest, '.[\"x\"]'), "
+ "SUM(cnt) "
+ "FROM druid.nested GROUP BY 1, 2, 3",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.STRING)
)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0"),
new DefaultDimensionSpec("v0", "d1"),
new DefaultDimensionSpec("v0", "d2")
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{
NullHandling.defaultStringValue(),
NullHandling.defaultStringValue(),
NullHandling.defaultStringValue(),
4L
},
new Object[]{"100", "100", "100", 2L},
new Object[]{"200", "200", "200", 1L}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.STRING)
.add("EXPR$1", ColumnType.STRING)
.add("EXPR$2", ColumnType.STRING)
.add("EXPR$3", ColumnType.LONG)
.build()
);
}
@Test @Test
public void testGroupByJsonValues() public void testGroupByJsonValues()
{ {
@ -2010,6 +1951,57 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
); );
} }
@Test
public void testReturningAndSumPathDouble()
{
testQuery(
"SELECT "
+ "SUM(JSON_VALUE(nest, '$.x' RETURNING DOUBLE)) "
+ "FROM druid.nested",
ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.granularity(Granularities.ALL)
.virtualColumns(new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.DOUBLE))
.aggregators(aggregators(new DoubleSumAggregatorFactory("a0", "v0")))
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{400.0}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.DOUBLE)
.build()
);
}
@Test
public void testReturningAndSumPathDecimal()
{
testQuery(
"SELECT "
+ "SUM(JSON_VALUE(nest, '$.x' RETURNING DECIMAL)) "
+ "FROM druid.nested",
ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.granularity(Granularities.ALL)
.virtualColumns(new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.DOUBLE))
.aggregators(aggregators(new DoubleSumAggregatorFactory("a0", "v0")))
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{400.0}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.DOUBLE)
.build()
);
}
@Test @Test
public void testReturningAndSumPathStrings() public void testReturningAndSumPathStrings()
@ -2043,7 +2035,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
cannotVectorize(); cannotVectorize();
testQuery( testQuery(
"SELECT " "SELECT "
+ "JSON_KEYS(nester, '.'), " + "JSON_KEYS(nester, '$'), "
+ "SUM(cnt) " + "SUM(cnt) "
+ "FROM druid.nested GROUP BY 1", + "FROM druid.nested GROUP BY 1",
ImmutableList.of( ImmutableList.of(
@ -2054,7 +2046,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.setVirtualColumns( .setVirtualColumns(
new ExpressionVirtualColumn( new ExpressionVirtualColumn(
"v0", "v0",
"json_keys(\"nester\",'.')", "json_keys(\"nester\",'$')",
ColumnType.STRING_ARRAY, ColumnType.STRING_ARRAY,
macroTable macroTable
) )
@ -2127,7 +2119,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
cannotVectorize(); cannotVectorize();
testQuery( testQuery(
"SELECT " "SELECT "
+ "JSON_KEYS(nest, '.'), " + "JSON_KEYS(nest, '$'), "
+ "SUM(cnt) " + "SUM(cnt) "
+ "FROM druid.nested GROUP BY 1", + "FROM druid.nested GROUP BY 1",
ImmutableList.of( ImmutableList.of(
@ -2138,7 +2130,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.setVirtualColumns( .setVirtualColumns(
new ExpressionVirtualColumn( new ExpressionVirtualColumn(
"v0", "v0",
"json_keys(\"nest\",'.')", "json_keys(\"nest\",'$')",
ColumnType.STRING_ARRAY, ColumnType.STRING_ARRAY,
macroTable macroTable
) )
@ -2253,7 +2245,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
(expected) -> { (expected) -> {
expected.expect(UnsupportedSQLQueryException.class); expected.expect(UnsupportedSQLQueryException.class);
expected.expectMessage( expected.expectMessage(
"Cannot use [JSON_VALUE_ANY]: [Bad format, '.array.[1]' is not a valid JSONPath path: must start with '$']"); "Cannot use [JSON_VALUE_VARCHAR]: [Bad format, '.array.[1]' is not a valid JSONPath path: must start with '$']");
} }
); );
} }
@ -2358,6 +2350,54 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
); );
} }
@Test
public void testCompositionTyping()
{
testQuery(
"SELECT "
+ "JSON_VALUE((JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x' RETURNING BIGINT))), '$.x' RETURNING BIGINT)\n"
+ "FROM druid.nested",
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
new ExpressionVirtualColumn(
"v0",
"json_value(json_object('x',\"v1\"),'$.x', 'LONG')",
ColumnType.LONG,
macroTable
),
new NestedFieldVirtualColumn(
"nest",
"v1",
ColumnType.LONG,
null,
false,
"$.x",
false
)
)
.columns("v0")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.build()
),
ImmutableList.of(
new Object[]{100L},
new Object[]{NullHandling.defaultLongValue()},
new Object[]{200L},
new Object[]{NullHandling.defaultLongValue()},
new Object[]{NullHandling.defaultLongValue()},
new Object[]{100L},
new Object[]{NullHandling.defaultLongValue()}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.LONG)
.build()
);
}
@Test @Test
public void testToJsonAndParseJson() public void testToJsonAndParseJson()
{ {
@ -2584,4 +2624,46 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
); );
} }
@Test
public void testJsonValueUnDocumentedButSupportedOptions()
{
testQuery(
"SELECT "
+ "SUM(JSON_VALUE(nest, '$.z' RETURNING BIGINT NULL ON EMPTY NULL ON ERROR)) "
+ "FROM druid.nested",
ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.granularity(Granularities.ALL)
.virtualColumns(new NestedFieldVirtualColumn("nest", "$.z", "v0", ColumnType.LONG))
.aggregators(aggregators(new LongSumAggregatorFactory("a0", "v0")))
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{700L}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.LONG)
.build()
);
}
@Test
public void testJsonValueUnsupportedOptions()
{
testQueryThrows(
"SELECT "
+ "SUM(JSON_VALUE(nest, '$.z' RETURNING BIGINT ERROR ON EMPTY ERROR ON ERROR)) "
+ "FROM druid.nested",
exception -> {
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage(
"Unsupported JSON_VALUE parameter 'ON EMPTY' defined - please re-issue this query without this argument"
);
}
);
}
} }