feat: json_merge expression and sql function (#17081)

This commit is contained in:
Lasse Mammen 2024-09-17 13:57:34 +01:00 committed by GitHub
parent 8af9b4729f
commit 307b8e3357
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 278 additions and 0 deletions

View File

@ -246,6 +246,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX<jso
| to_json_string(expr) | Convert `expr` into a JSON `STRING` value |
| json_keys(expr, path) | Get array of field names from `expr` at the specified JSONPath `path`, or null if the data does not exist or have any fields |
| json_paths(expr) | Get array of all JSONPath paths available from `expr` |
| json_merge(expr1, expr2[, expr3 ...]) | Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. |
### JSONPath syntax

View File

@ -855,6 +855,13 @@ Returns true if the IPv6 `address` belongs to the `subnet` literal, else false.
Returns an array of field names from `expr` at the specified `path`.
## JSON_MERGE
**Function type:** [JSON](sql-json-functions.md)
`JSON_MERGE(expr1, expr2[, expr3 ...])`
Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. Returning always a `COMPLEX<json>` type.
## JSON_OBJECT
**Function type:** [JSON](sql-json-functions.md)

View File

@ -38,6 +38,7 @@ You can use the following JSON functions to extract, transform, and create `COMP
| --- | --- |
|`JSON_KEYS(expr, path)`| Returns an array of field names from `expr` at the specified `path`.|
|`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` | Constructs a new `COMPLEX<json>` object. The `KEY` expressions must evaluate to string types. The `VALUE` expressions can be composed of any input type, including other `COMPLEX<json>` values. `JSON_OBJECT` can accept colon-separated key-value pairs. The following syntax is equivalent: `JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.|
|`JSON_MERGE(expr1, expr2[, expr3 ...])`| Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. Returning always a `COMPLEX<json>` type.|
|`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal values in `expr` in JSONPath format. |
|`JSON_QUERY(expr, path)`| Extracts a `COMPLEX<json>` value from `expr`, at the specified `path`. |
|`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY<COMPLEX<json>>` value from `expr` at the specified `path`. If value is not an `ARRAY`, it gets translated into a single element `ARRAY` containing the value at `path`. The primary use of this function is to extract arrays of objects to use as inputs to other [array functions](./sql-array-functions.md).|

View File

@ -81,6 +81,7 @@ public class ExpressionModule implements Module
.add(HyperUniqueExpressions.HllEstimateExprMacro.class)
.add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class)
.add(NestedDataExpressions.JsonObjectExprMacro.class)
.add(NestedDataExpressions.JsonMergeExprMacro.class)
.add(NestedDataExpressions.JsonKeysExprMacro.class)
.add(NestedDataExpressions.JsonPathsExprMacro.class)
.add(NestedDataExpressions.JsonValueExprMacro.class)

View File

@ -22,6 +22,7 @@ package org.apache.druid.query.expression;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
@ -99,6 +100,117 @@ public class NestedDataExpressions
}
}
public static class JsonMergeExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "json_merge";
private final ObjectMapper jsonMapper;
@Inject
public JsonMergeExprMacro(
@Json ObjectMapper jsonMapper
)
{
this.jsonMapper = jsonMapper;
}
@Override
public String name()
{
return NAME;
}
@Override
public Expr apply(List<Expr> args)
{
if (args.size() < 2) {
throw validationFailed("must have at least two arguments");
}
final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{
public ParseJsonExpr(List<Expr> args)
{
super(JsonMergeExprMacro.this, args);
}
@Override
public ExprEval eval(ObjectBinding bindings)
{
ExprEval arg = args.get(0).eval(bindings);
Object obj;
if (arg.value() == null) {
throw JsonMergeExprMacro.this.validationFailed(
"invalid input expected %s but got %s instead",
ExpressionType.STRING,
arg.type()
);
}
try {
obj = jsonMapper.readValue(getArgAsJson(arg), Object.class);
}
catch (JsonProcessingException e) {
throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", arg.asString());
}
ObjectReader updater = jsonMapper.readerForUpdating(obj);
for (int i = 1; i < args.size(); i++) {
ExprEval argSub = args.get(i).eval(bindings);
try {
String str = getArgAsJson(argSub);
if (str != null) {
obj = updater.readValue(str);
}
}
catch (JsonProcessingException e) {
throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", argSub.asString());
}
}
return ExprEval.ofComplex(ExpressionType.NESTED_DATA, obj);
}
@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
return ExpressionType.NESTED_DATA;
}
private String getArgAsJson(ExprEval arg)
{
if (arg.value() == null) {
return null;
}
if (arg.type().is(ExprType.STRING)) {
return arg.asString();
}
if (arg.type().is(ExprType.COMPLEX)) {
try {
return jsonMapper.writeValueAsString(unwrap(arg));
}
catch (JsonProcessingException e) {
throw JsonMergeExprMacro.this.processingFailed(e, "bad complex input [%s]", arg.asString());
}
}
throw JsonMergeExprMacro.this.validationFailed(
"invalid input expected %s but got %s instead",
ExpressionType.STRING,
arg.type()
);
}
}
return new ParseJsonExpr(args);
}
}
public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "to_json_string";

View File

@ -49,6 +49,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
new NestedDataExpressions.JsonPathsExprMacro(),
new NestedDataExpressions.JsonKeysExprMacro(),
new NestedDataExpressions.JsonObjectExprMacro(),
new NestedDataExpressions.JsonMergeExprMacro(JSON_MAPPER),
new NestedDataExpressions.JsonValueExprMacro(),
new NestedDataExpressions.JsonQueryExprMacro(),
new NestedDataExpressions.JsonQueryArrayExprMacro(),
@ -112,6 +113,63 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y"));
}
@Test
public void testJsonMergeExpression() throws JsonProcessingException
{
Expr expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}')", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
expr = Parser.parse("json_merge('{\"a\":\"x\"}', null)", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"x\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}','{\"c\":[1,2,3]}')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\",\"c\":[1,2,3]}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
expr = Parser.parse("json_merge(json_object('a', 'x'),json_object('b', 'y'))", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
expr = Parser.parse("json_merge('{\"a\":\"x\"}',json_merge('{\"a\":\"z\"}','{\"a\":\"y\"}'))", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
expr = Parser.parse("json_merge('[\"a\", \"b\"]', '[\"c\", \"d\"]')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("[\"a\",\"b\",\"c\",\"d\"]", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
}
@Test
public void testJsonMergeOverflow() throws JsonProcessingException
{
Expr.ObjectBinding input1 = InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
.put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah", "value", "blahblah")))
.build()
);
Expr.ObjectBinding input2 = InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
.put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah2", "value", "blahblah2")))
.build()
);
Expr expr = Parser.parse("json_merge(json_object(), json_object(json_value(attr, '$.key'), json_value(attr, '$.value')))", MACRO_TABLE);
ExprEval eval = expr.eval(input1);
Assert.assertEquals("{\"blah\":\"blahblah\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
eval = expr.eval(input2);
Assert.assertEquals("{\"blah2\":\"blahblah2\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
}
@Test
public void testJsonKeysExpression()
{

View File

@ -780,6 +780,52 @@ public class NestedDataOperatorConversions
}
}
public static class JsonMergeOperatorConversion implements SqlOperatorConversion
{
private static final String FUNCTION_NAME = "json_merge";
private static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder(FUNCTION_NAME)
.operandTypeChecker(OperandTypes.variadic(SqlOperandCountRanges.from(1)))
.operandTypeInference((callBinding, returnType, operandTypes) -> {
RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
for (int i = 0; i < operandTypes.length; i++) {
operandTypes[i] = typeFactory.createTypeWithNullability(
typeFactory.createSqlType(SqlTypeName.ANY),
true
);
}
})
.returnTypeInference(NESTED_RETURN_TYPE_INFERENCE)
.functionCategory(SqlFunctionCategory.SYSTEM)
.build();
@Override
public SqlOperator calciteOperator()
{
return SQL_FUNCTION;
}
@Nullable
@Override
public DruidExpression toDruidExpression(
PlannerContext plannerContext,
RowSignature rowSignature,
RexNode rexNode
)
{
return OperatorConversions.convertCall(
plannerContext,
rowSignature,
rexNode,
druidExpressions -> DruidExpression.ofExpression(
ColumnType.NESTED_DATA,
DruidExpression.functionCall("json_merge"),
druidExpressions
)
);
}
}
public static class ToJsonStringOperatorConversion implements SqlOperatorConversion
{
private static final String FUNCTION_NAME = "to_json_string";

View File

@ -353,6 +353,7 @@ public class DruidOperatorTable implements SqlOperatorTable
.add(new NestedDataOperatorConversions.JsonValueReturningArrayDoubleOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueReturningArrayVarcharOperatorConversion())
.add(new NestedDataOperatorConversions.JsonObjectOperatorConversion())
.add(new NestedDataOperatorConversions.JsonMergeOperatorConversion())
.add(new NestedDataOperatorConversions.ToJsonStringOperatorConversion())
.add(new NestedDataOperatorConversions.ParseJsonOperatorConversion())
.add(new NestedDataOperatorConversions.TryParseJsonOperatorConversion())

View File

@ -69,6 +69,7 @@ import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.segment.nested.NestedPathField;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
@ -4920,6 +4921,55 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testJsonMerging()
{
testQuery(
"SELECT "
+ "JSON_MERGE('{\"a\":\"x\"}',JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x')))\n"
+ "FROM druid.nested",
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
new ExpressionVirtualColumn(
"v0",
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))",
ColumnType.NESTED_DATA,
queryFramework().macroTable()
),
new NestedFieldVirtualColumn(
"nest",
"v1",
ColumnType.STRING,
ImmutableList.of(
new NestedPathField("x")
),
false,
null,
false
)
)
.columns("v0")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.build()
),
ImmutableList.of(
new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
new Object[]{"{\"a\":\"x\",\"x\":null}"},
new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"},
new Object[]{"{\"a\":\"x\",\"x\":null}"},
new Object[]{"{\"a\":\"x\",\"x\":null}"},
new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
new Object[]{"{\"a\":\"x\",\"x\":null}"}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.NESTED_DATA)
.build()
);
}
@Test
public void testCompositionTyping()
{

View File

@ -384,6 +384,7 @@ json_paths
json_query
json_query_array
json_value
json_merge
karlkfi
kbps
kerberos