mirror of https://github.com/apache/druid.git
feat: json_merge expression and sql function (#17081)
This commit is contained in:
parent
8af9b4729f
commit
307b8e3357
|
@ -246,6 +246,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX<jso
|
|||
| to_json_string(expr) | Convert `expr` into a JSON `STRING` value |
|
||||
| json_keys(expr, path) | Get array of field names from `expr` at the specified JSONPath `path`, or null if the data does not exist or have any fields |
|
||||
| json_paths(expr) | Get array of all JSONPath paths available from `expr` |
|
||||
| json_merge(expr1, expr2[, expr3 ...]) | Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. |
|
||||
|
||||
### JSONPath syntax
|
||||
|
||||
|
|
|
@ -855,6 +855,13 @@ Returns true if the IPv6 `address` belongs to the `subnet` literal, else false.
|
|||
|
||||
Returns an array of field names from `expr` at the specified `path`.
|
||||
|
||||
## JSON_MERGE
|
||||
|
||||
**Function type:** [JSON](sql-json-functions.md)
|
||||
|
||||
`JSON_MERGE(expr1, expr2[, expr3 ...])`
|
||||
Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. Returning always a `COMPLEX<json>` type.
|
||||
|
||||
## JSON_OBJECT
|
||||
|
||||
**Function type:** [JSON](sql-json-functions.md)
|
||||
|
|
|
@ -38,6 +38,7 @@ You can use the following JSON functions to extract, transform, and create `COMP
|
|||
| --- | --- |
|
||||
|`JSON_KEYS(expr, path)`| Returns an array of field names from `expr` at the specified `path`.|
|
||||
|`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` | Constructs a new `COMPLEX<json>` object. The `KEY` expressions must evaluate to string types. The `VALUE` expressions can be composed of any input type, including other `COMPLEX<json>` values. `JSON_OBJECT` can accept colon-separated key-value pairs. The following syntax is equivalent: `JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.|
|
||||
|`JSON_MERGE(expr1, expr2[, expr3 ...])`| Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. Returning always a `COMPLEX<json>` type.|
|
||||
|`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal values in `expr` in JSONPath format. |
|
||||
|`JSON_QUERY(expr, path)`| Extracts a `COMPLEX<json>` value from `expr`, at the specified `path`. |
|
||||
|`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY<COMPLEX<json>>` value from `expr` at the specified `path`. If value is not an `ARRAY`, it gets translated into a single element `ARRAY` containing the value at `path`. The primary use of this function is to extract arrays of objects to use as inputs to other [array functions](./sql-array-functions.md).|
|
||||
|
|
|
@ -81,6 +81,7 @@ public class ExpressionModule implements Module
|
|||
.add(HyperUniqueExpressions.HllEstimateExprMacro.class)
|
||||
.add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class)
|
||||
.add(NestedDataExpressions.JsonObjectExprMacro.class)
|
||||
.add(NestedDataExpressions.JsonMergeExprMacro.class)
|
||||
.add(NestedDataExpressions.JsonKeysExprMacro.class)
|
||||
.add(NestedDataExpressions.JsonPathsExprMacro.class)
|
||||
.add(NestedDataExpressions.JsonValueExprMacro.class)
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.druid.query.expression;
|
|||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.ObjectReader;
|
||||
import org.apache.druid.guice.annotations.Json;
|
||||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
|
@ -99,6 +100,117 @@ public class NestedDataExpressions
|
|||
}
|
||||
}
|
||||
|
||||
public static class JsonMergeExprMacro implements ExprMacroTable.ExprMacro
|
||||
{
|
||||
public static final String NAME = "json_merge";
|
||||
|
||||
private final ObjectMapper jsonMapper;
|
||||
|
||||
@Inject
|
||||
public JsonMergeExprMacro(
|
||||
@Json ObjectMapper jsonMapper
|
||||
)
|
||||
{
|
||||
this.jsonMapper = jsonMapper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name()
|
||||
{
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Expr apply(List<Expr> args)
|
||||
{
|
||||
if (args.size() < 2) {
|
||||
throw validationFailed("must have at least two arguments");
|
||||
}
|
||||
|
||||
final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
|
||||
{
|
||||
public ParseJsonExpr(List<Expr> args)
|
||||
{
|
||||
super(JsonMergeExprMacro.this, args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExprEval eval(ObjectBinding bindings)
|
||||
{
|
||||
ExprEval arg = args.get(0).eval(bindings);
|
||||
Object obj;
|
||||
|
||||
if (arg.value() == null) {
|
||||
throw JsonMergeExprMacro.this.validationFailed(
|
||||
"invalid input expected %s but got %s instead",
|
||||
ExpressionType.STRING,
|
||||
arg.type()
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
obj = jsonMapper.readValue(getArgAsJson(arg), Object.class);
|
||||
}
|
||||
catch (JsonProcessingException e) {
|
||||
throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", arg.asString());
|
||||
}
|
||||
|
||||
ObjectReader updater = jsonMapper.readerForUpdating(obj);
|
||||
|
||||
for (int i = 1; i < args.size(); i++) {
|
||||
ExprEval argSub = args.get(i).eval(bindings);
|
||||
|
||||
try {
|
||||
String str = getArgAsJson(argSub);
|
||||
if (str != null) {
|
||||
obj = updater.readValue(str);
|
||||
}
|
||||
}
|
||||
catch (JsonProcessingException e) {
|
||||
throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", argSub.asString());
|
||||
}
|
||||
}
|
||||
|
||||
return ExprEval.ofComplex(ExpressionType.NESTED_DATA, obj);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public ExpressionType getOutputType(InputBindingInspector inspector)
|
||||
{
|
||||
return ExpressionType.NESTED_DATA;
|
||||
}
|
||||
|
||||
private String getArgAsJson(ExprEval arg)
|
||||
{
|
||||
if (arg.value() == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (arg.type().is(ExprType.STRING)) {
|
||||
return arg.asString();
|
||||
}
|
||||
|
||||
if (arg.type().is(ExprType.COMPLEX)) {
|
||||
try {
|
||||
return jsonMapper.writeValueAsString(unwrap(arg));
|
||||
}
|
||||
catch (JsonProcessingException e) {
|
||||
throw JsonMergeExprMacro.this.processingFailed(e, "bad complex input [%s]", arg.asString());
|
||||
}
|
||||
}
|
||||
|
||||
throw JsonMergeExprMacro.this.validationFailed(
|
||||
"invalid input expected %s but got %s instead",
|
||||
ExpressionType.STRING,
|
||||
arg.type()
|
||||
);
|
||||
}
|
||||
}
|
||||
return new ParseJsonExpr(args);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro
|
||||
{
|
||||
public static final String NAME = "to_json_string";
|
||||
|
|
|
@ -49,6 +49,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
|
|||
new NestedDataExpressions.JsonPathsExprMacro(),
|
||||
new NestedDataExpressions.JsonKeysExprMacro(),
|
||||
new NestedDataExpressions.JsonObjectExprMacro(),
|
||||
new NestedDataExpressions.JsonMergeExprMacro(JSON_MAPPER),
|
||||
new NestedDataExpressions.JsonValueExprMacro(),
|
||||
new NestedDataExpressions.JsonQueryExprMacro(),
|
||||
new NestedDataExpressions.JsonQueryArrayExprMacro(),
|
||||
|
@ -112,6 +113,63 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
|
|||
Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJsonMergeExpression() throws JsonProcessingException
|
||||
{
|
||||
Expr expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}')", MACRO_TABLE);
|
||||
ExprEval eval = expr.eval(inputBindings);
|
||||
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
|
||||
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
|
||||
|
||||
expr = Parser.parse("json_merge('{\"a\":\"x\"}', null)", MACRO_TABLE);
|
||||
eval = expr.eval(inputBindings);
|
||||
Assert.assertEquals("{\"a\":\"x\"}", JSON_MAPPER.writeValueAsString(eval.value()));
|
||||
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
|
||||
|
||||
expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}','{\"c\":[1,2,3]}')", MACRO_TABLE);
|
||||
eval = expr.eval(inputBindings);
|
||||
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\",\"c\":[1,2,3]}", JSON_MAPPER.writeValueAsString(eval.value()));
|
||||
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
|
||||
|
||||
expr = Parser.parse("json_merge(json_object('a', 'x'),json_object('b', 'y'))", MACRO_TABLE);
|
||||
eval = expr.eval(inputBindings);
|
||||
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
|
||||
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
|
||||
|
||||
expr = Parser.parse("json_merge('{\"a\":\"x\"}',json_merge('{\"a\":\"z\"}','{\"a\":\"y\"}'))", MACRO_TABLE);
|
||||
eval = expr.eval(inputBindings);
|
||||
Assert.assertEquals("{\"a\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
|
||||
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
|
||||
|
||||
expr = Parser.parse("json_merge('[\"a\", \"b\"]', '[\"c\", \"d\"]')", MACRO_TABLE);
|
||||
eval = expr.eval(inputBindings);
|
||||
Assert.assertEquals("[\"a\",\"b\",\"c\",\"d\"]", JSON_MAPPER.writeValueAsString(eval.value()));
|
||||
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJsonMergeOverflow() throws JsonProcessingException
|
||||
{
|
||||
Expr.ObjectBinding input1 = InputBindings.forInputSuppliers(
|
||||
new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
|
||||
.put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah", "value", "blahblah")))
|
||||
.build()
|
||||
);
|
||||
Expr.ObjectBinding input2 = InputBindings.forInputSuppliers(
|
||||
new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
|
||||
.put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah2", "value", "blahblah2")))
|
||||
.build()
|
||||
);
|
||||
|
||||
Expr expr = Parser.parse("json_merge(json_object(), json_object(json_value(attr, '$.key'), json_value(attr, '$.value')))", MACRO_TABLE);
|
||||
ExprEval eval = expr.eval(input1);
|
||||
Assert.assertEquals("{\"blah\":\"blahblah\"}", JSON_MAPPER.writeValueAsString(eval.value()));
|
||||
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
|
||||
eval = expr.eval(input2);
|
||||
Assert.assertEquals("{\"blah2\":\"blahblah2\"}", JSON_MAPPER.writeValueAsString(eval.value()));
|
||||
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJsonKeysExpression()
|
||||
{
|
||||
|
|
|
@ -780,6 +780,52 @@ public class NestedDataOperatorConversions
|
|||
}
|
||||
}
|
||||
|
||||
public static class JsonMergeOperatorConversion implements SqlOperatorConversion
|
||||
{
|
||||
private static final String FUNCTION_NAME = "json_merge";
|
||||
private static final SqlFunction SQL_FUNCTION = OperatorConversions
|
||||
.operatorBuilder(FUNCTION_NAME)
|
||||
.operandTypeChecker(OperandTypes.variadic(SqlOperandCountRanges.from(1)))
|
||||
.operandTypeInference((callBinding, returnType, operandTypes) -> {
|
||||
RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
|
||||
for (int i = 0; i < operandTypes.length; i++) {
|
||||
operandTypes[i] = typeFactory.createTypeWithNullability(
|
||||
typeFactory.createSqlType(SqlTypeName.ANY),
|
||||
true
|
||||
);
|
||||
}
|
||||
})
|
||||
.returnTypeInference(NESTED_RETURN_TYPE_INFERENCE)
|
||||
.functionCategory(SqlFunctionCategory.SYSTEM)
|
||||
.build();
|
||||
|
||||
@Override
|
||||
public SqlOperator calciteOperator()
|
||||
{
|
||||
return SQL_FUNCTION;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public DruidExpression toDruidExpression(
|
||||
PlannerContext plannerContext,
|
||||
RowSignature rowSignature,
|
||||
RexNode rexNode
|
||||
)
|
||||
{
|
||||
return OperatorConversions.convertCall(
|
||||
plannerContext,
|
||||
rowSignature,
|
||||
rexNode,
|
||||
druidExpressions -> DruidExpression.ofExpression(
|
||||
ColumnType.NESTED_DATA,
|
||||
DruidExpression.functionCall("json_merge"),
|
||||
druidExpressions
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ToJsonStringOperatorConversion implements SqlOperatorConversion
|
||||
{
|
||||
private static final String FUNCTION_NAME = "to_json_string";
|
||||
|
|
|
@ -353,6 +353,7 @@ public class DruidOperatorTable implements SqlOperatorTable
|
|||
.add(new NestedDataOperatorConversions.JsonValueReturningArrayDoubleOperatorConversion())
|
||||
.add(new NestedDataOperatorConversions.JsonValueReturningArrayVarcharOperatorConversion())
|
||||
.add(new NestedDataOperatorConversions.JsonObjectOperatorConversion())
|
||||
.add(new NestedDataOperatorConversions.JsonMergeOperatorConversion())
|
||||
.add(new NestedDataOperatorConversions.ToJsonStringOperatorConversion())
|
||||
.add(new NestedDataOperatorConversions.ParseJsonOperatorConversion())
|
||||
.add(new NestedDataOperatorConversions.TryParseJsonOperatorConversion())
|
||||
|
|
|
@ -69,6 +69,7 @@ import org.apache.druid.segment.column.RowSignature;
|
|||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.join.JoinableFactoryWrapper;
|
||||
import org.apache.druid.segment.nested.NestedPathField;
|
||||
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
|
||||
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
|
||||
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
|
||||
|
@ -4920,6 +4921,55 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJsonMerging()
|
||||
{
|
||||
testQuery(
|
||||
"SELECT "
|
||||
+ "JSON_MERGE('{\"a\":\"x\"}',JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x')))\n"
|
||||
+ "FROM druid.nested",
|
||||
ImmutableList.of(
|
||||
Druids.newScanQueryBuilder()
|
||||
.dataSource(DATA_SOURCE)
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.virtualColumns(
|
||||
new ExpressionVirtualColumn(
|
||||
"v0",
|
||||
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))",
|
||||
ColumnType.NESTED_DATA,
|
||||
queryFramework().macroTable()
|
||||
),
|
||||
new NestedFieldVirtualColumn(
|
||||
"nest",
|
||||
"v1",
|
||||
ColumnType.STRING,
|
||||
ImmutableList.of(
|
||||
new NestedPathField("x")
|
||||
),
|
||||
false,
|
||||
null,
|
||||
false
|
||||
)
|
||||
)
|
||||
.columns("v0")
|
||||
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
|
||||
.build()
|
||||
),
|
||||
ImmutableList.of(
|
||||
new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
|
||||
new Object[]{"{\"a\":\"x\",\"x\":null}"},
|
||||
new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"},
|
||||
new Object[]{"{\"a\":\"x\",\"x\":null}"},
|
||||
new Object[]{"{\"a\":\"x\",\"x\":null}"},
|
||||
new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
|
||||
new Object[]{"{\"a\":\"x\",\"x\":null}"}
|
||||
),
|
||||
RowSignature.builder()
|
||||
.add("EXPR$0", ColumnType.NESTED_DATA)
|
||||
.build()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompositionTyping()
|
||||
{
|
||||
|
|
|
@ -384,6 +384,7 @@ json_paths
|
|||
json_query
|
||||
json_query_array
|
||||
json_value
|
||||
json_merge
|
||||
karlkfi
|
||||
kbps
|
||||
kerberos
|
||||
|
|
Loading…
Reference in New Issue