diff --git a/docs/querying/math-expr.md b/docs/querying/math-expr.md index 38ced649c06..0893fc4e236 100644 --- a/docs/querying/math-expr.md +++ b/docs/querying/math-expr.md @@ -246,6 +246,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX` into one. Preserves the rightmost value when there are key overlaps. | ### JSONPath syntax diff --git a/docs/querying/sql-functions.md b/docs/querying/sql-functions.md index 5cdcbb25495..6859ca67391 100644 --- a/docs/querying/sql-functions.md +++ b/docs/querying/sql-functions.md @@ -855,6 +855,13 @@ Returns true if the IPv6 `address` belongs to the `subnet` literal, else false. Returns an array of field names from `expr` at the specified `path`. +## JSON_MERGE + +**Function type:** [JSON](sql-json-functions.md) + +`JSON_MERGE(expr1, expr2[, expr3 ...])` +Merges two or more JSON `STRING` or `COMPLEX` into one. Preserves the rightmost value when there are key overlaps. Returning always a `COMPLEX` type. + ## JSON_OBJECT **Function type:** [JSON](sql-json-functions.md) diff --git a/docs/querying/sql-json-functions.md b/docs/querying/sql-json-functions.md index 199c568c29b..35b4f5e3769 100644 --- a/docs/querying/sql-json-functions.md +++ b/docs/querying/sql-json-functions.md @@ -38,6 +38,7 @@ You can use the following JSON functions to extract, transform, and create `COMP | --- | --- | |`JSON_KEYS(expr, path)`| Returns an array of field names from `expr` at the specified `path`.| |`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` | Constructs a new `COMPLEX` object. The `KEY` expressions must evaluate to string types. The `VALUE` expressions can be composed of any input type, including other `COMPLEX` values. `JSON_OBJECT` can accept colon-separated key-value pairs. The following syntax is equivalent: `JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.| +|`JSON_MERGE(expr1, expr2[, expr3 ...])`| Merges two or more JSON `STRING` or `COMPLEX` into one. Preserves the rightmost value when there are key overlaps. Returning always a `COMPLEX` type.| |`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal values in `expr` in JSONPath format. | |`JSON_QUERY(expr, path)`| Extracts a `COMPLEX` value from `expr`, at the specified `path`. | |`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY>` value from `expr` at the specified `path`. If value is not an `ARRAY`, it gets translated into a single element `ARRAY` containing the value at `path`. The primary use of this function is to extract arrays of objects to use as inputs to other [array functions](./sql-array-functions.md).| diff --git a/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java b/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java index 917cf967f14..e1064234e56 100644 --- a/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java +++ b/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java @@ -81,6 +81,7 @@ public class ExpressionModule implements Module .add(HyperUniqueExpressions.HllEstimateExprMacro.class) .add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class) .add(NestedDataExpressions.JsonObjectExprMacro.class) + .add(NestedDataExpressions.JsonMergeExprMacro.class) .add(NestedDataExpressions.JsonKeysExprMacro.class) .add(NestedDataExpressions.JsonPathsExprMacro.class) .add(NestedDataExpressions.JsonValueExprMacro.class) diff --git a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java index 873b4f83188..0926ce78e0a 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java +++ b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java @@ -22,6 +22,7 @@ package org.apache.druid.query.expression; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; import org.apache.druid.guice.annotations.Json; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; @@ -99,6 +100,117 @@ public class NestedDataExpressions } } + public static class JsonMergeExprMacro implements ExprMacroTable.ExprMacro + { + public static final String NAME = "json_merge"; + + private final ObjectMapper jsonMapper; + + @Inject + public JsonMergeExprMacro( + @Json ObjectMapper jsonMapper + ) + { + this.jsonMapper = jsonMapper; + } + + @Override + public String name() + { + return NAME; + } + + @Override + public Expr apply(List args) + { + if (args.size() < 2) { + throw validationFailed("must have at least two arguments"); + } + + final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + public ParseJsonExpr(List args) + { + super(JsonMergeExprMacro.this, args); + } + + @Override + public ExprEval eval(ObjectBinding bindings) + { + ExprEval arg = args.get(0).eval(bindings); + Object obj; + + if (arg.value() == null) { + throw JsonMergeExprMacro.this.validationFailed( + "invalid input expected %s but got %s instead", + ExpressionType.STRING, + arg.type() + ); + } + + try { + obj = jsonMapper.readValue(getArgAsJson(arg), Object.class); + } + catch (JsonProcessingException e) { + throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", arg.asString()); + } + + ObjectReader updater = jsonMapper.readerForUpdating(obj); + + for (int i = 1; i < args.size(); i++) { + ExprEval argSub = args.get(i).eval(bindings); + + try { + String str = getArgAsJson(argSub); + if (str != null) { + obj = updater.readValue(str); + } + } + catch (JsonProcessingException e) { + throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", argSub.asString()); + } + } + + return ExprEval.ofComplex(ExpressionType.NESTED_DATA, obj); + } + + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + return ExpressionType.NESTED_DATA; + } + + private String getArgAsJson(ExprEval arg) + { + if (arg.value() == null) { + return null; + } + + if (arg.type().is(ExprType.STRING)) { + return arg.asString(); + } + + if (arg.type().is(ExprType.COMPLEX)) { + try { + return jsonMapper.writeValueAsString(unwrap(arg)); + } + catch (JsonProcessingException e) { + throw JsonMergeExprMacro.this.processingFailed(e, "bad complex input [%s]", arg.asString()); + } + } + + throw JsonMergeExprMacro.this.validationFailed( + "invalid input expected %s but got %s instead", + ExpressionType.STRING, + arg.type() + ); + } + } + return new ParseJsonExpr(args); + } + } + public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro { public static final String NAME = "to_json_string"; diff --git a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java index b14edb2d17b..c9fe553469a 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java @@ -49,6 +49,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest new NestedDataExpressions.JsonPathsExprMacro(), new NestedDataExpressions.JsonKeysExprMacro(), new NestedDataExpressions.JsonObjectExprMacro(), + new NestedDataExpressions.JsonMergeExprMacro(JSON_MAPPER), new NestedDataExpressions.JsonValueExprMacro(), new NestedDataExpressions.JsonQueryExprMacro(), new NestedDataExpressions.JsonQueryArrayExprMacro(), @@ -112,6 +113,63 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y")); } + @Test + public void testJsonMergeExpression() throws JsonProcessingException + { + Expr expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}')", MACRO_TABLE); + ExprEval eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge('{\"a\":\"x\"}', null)", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"x\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}','{\"c\":[1,2,3]}')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\",\"c\":[1,2,3]}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge(json_object('a', 'x'),json_object('b', 'y'))", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge('{\"a\":\"x\"}',json_merge('{\"a\":\"z\"}','{\"a\":\"y\"}'))", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge('[\"a\", \"b\"]', '[\"c\", \"d\"]')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("[\"a\",\"b\",\"c\",\"d\"]", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + } + + @Test + public void testJsonMergeOverflow() throws JsonProcessingException + { + Expr.ObjectBinding input1 = InputBindings.forInputSuppliers( + new ImmutableMap.Builder>() + .put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah", "value", "blahblah"))) + .build() + ); + Expr.ObjectBinding input2 = InputBindings.forInputSuppliers( + new ImmutableMap.Builder>() + .put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah2", "value", "blahblah2"))) + .build() + ); + + Expr expr = Parser.parse("json_merge(json_object(), json_object(json_value(attr, '$.key'), json_value(attr, '$.value')))", MACRO_TABLE); + ExprEval eval = expr.eval(input1); + Assert.assertEquals("{\"blah\":\"blahblah\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + eval = expr.eval(input2); + Assert.assertEquals("{\"blah2\":\"blahblah2\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + } + @Test public void testJsonKeysExpression() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java index a6006046553..9c6cfb0448b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java @@ -780,6 +780,52 @@ public class NestedDataOperatorConversions } } + public static class JsonMergeOperatorConversion implements SqlOperatorConversion + { + private static final String FUNCTION_NAME = "json_merge"; + private static final SqlFunction SQL_FUNCTION = OperatorConversions + .operatorBuilder(FUNCTION_NAME) + .operandTypeChecker(OperandTypes.variadic(SqlOperandCountRanges.from(1))) + .operandTypeInference((callBinding, returnType, operandTypes) -> { + RelDataTypeFactory typeFactory = callBinding.getTypeFactory(); + for (int i = 0; i < operandTypes.length; i++) { + operandTypes[i] = typeFactory.createTypeWithNullability( + typeFactory.createSqlType(SqlTypeName.ANY), + true + ); + } + }) + .returnTypeInference(NESTED_RETURN_TYPE_INFERENCE) + .functionCategory(SqlFunctionCategory.SYSTEM) + .build(); + + @Override + public SqlOperator calciteOperator() + { + return SQL_FUNCTION; + } + + @Nullable + @Override + public DruidExpression toDruidExpression( + PlannerContext plannerContext, + RowSignature rowSignature, + RexNode rexNode + ) + { + return OperatorConversions.convertCall( + plannerContext, + rowSignature, + rexNode, + druidExpressions -> DruidExpression.ofExpression( + ColumnType.NESTED_DATA, + DruidExpression.functionCall("json_merge"), + druidExpressions + ) + ); + } + } + public static class ToJsonStringOperatorConversion implements SqlOperatorConversion { private static final String FUNCTION_NAME = "to_json_string"; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java index 27efe16270e..0fb1c9fb9ff 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java @@ -353,6 +353,7 @@ public class DruidOperatorTable implements SqlOperatorTable .add(new NestedDataOperatorConversions.JsonValueReturningArrayDoubleOperatorConversion()) .add(new NestedDataOperatorConversions.JsonValueReturningArrayVarcharOperatorConversion()) .add(new NestedDataOperatorConversions.JsonObjectOperatorConversion()) + .add(new NestedDataOperatorConversions.JsonMergeOperatorConversion()) .add(new NestedDataOperatorConversions.ToJsonStringOperatorConversion()) .add(new NestedDataOperatorConversions.ParseJsonOperatorConversion()) .add(new NestedDataOperatorConversions.TryParseJsonOperatorConversion()) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 88ad05a4a27..b03b6698b1d 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -69,6 +69,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.join.JoinableFactoryWrapper; +import org.apache.druid.segment.nested.NestedPathField; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; @@ -4920,6 +4921,55 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testJsonMerging() + { + testQuery( + "SELECT " + + "JSON_MERGE('{\"a\":\"x\"}',JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x')))\n" + + "FROM druid.nested", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + new ExpressionVirtualColumn( + "v0", + "json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))", + ColumnType.NESTED_DATA, + queryFramework().macroTable() + ), + new NestedFieldVirtualColumn( + "nest", + "v1", + ColumnType.STRING, + ImmutableList.of( + new NestedPathField("x") + ), + false, + null, + false + ) + ) + .columns("v0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .build() + ), + ImmutableList.of( + new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"}, + new Object[]{"{\"a\":\"x\",\"x\":null}"}, + new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"}, + new Object[]{"{\"a\":\"x\",\"x\":null}"}, + new Object[]{"{\"a\":\"x\",\"x\":null}"}, + new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"}, + new Object[]{"{\"a\":\"x\",\"x\":null}"} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.NESTED_DATA) + .build() + ); + } + @Test public void testCompositionTyping() { diff --git a/website/.spelling b/website/.spelling index 8175755f804..894cd40d959 100644 --- a/website/.spelling +++ b/website/.spelling @@ -384,6 +384,7 @@ json_paths json_query json_query_array json_value +json_merge karlkfi kbps kerberos