From 72aba00e0991f301e12db77055cd43ffd2f85fe1 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 25 Aug 2022 17:11:28 -0700 Subject: [PATCH] add json function support for paths with negative array indexes (#12972) --- .../nested/NestedPathArrayElement.java | 7 +- .../virtual/NestedFieldVirtualColumn.java | 78 +++++++++++----- .../segment/nested/NestedPathFinderTest.java | 24 +++++ .../virtual/NestedFieldVirtualColumnTest.java | 1 + .../calcite/CalciteNestedDataQueryTest.java | 91 +++++++++++++++++++ 5 files changed, 177 insertions(+), 24 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedPathArrayElement.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedPathArrayElement.java index 095976735dd..5bba77fbc97 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedPathArrayElement.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedPathArrayElement.java @@ -44,7 +44,12 @@ public class NestedPathArrayElement implements NestedPathPart // handle lists or arrays because who knows what might end up here, depending on how is created if (input instanceof List) { List currentList = (List) input; - if (currentList.size() > index) { + final int currentSize = currentList.size(); + if (index < 0) { + if (currentSize + index >= 0) { + return currentList.get(currentSize + index); + } + } else if (currentList.size() > index) { return currentList.get(index); } } else if (input instanceof Object[]) { diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index e972bb95e31..df06eb44f32 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -45,6 +45,7 @@ import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.ReadableOffset; import org.apache.druid.segment.nested.NestedDataComplexColumn; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; +import org.apache.druid.segment.nested.NestedPathArrayElement; import org.apache.druid.segment.nested.NestedPathFinder; import org.apache.druid.segment.nested.NestedPathPart; import org.apache.druid.segment.nested.StructuredData; @@ -89,6 +90,8 @@ public class NestedFieldVirtualColumn implements VirtualColumn private final List parts; private final boolean processFromRaw; + private final boolean hasNegativeArrayIndex; + @JsonCreator public NestedFieldVirtualColumn( @JsonProperty("columnName") String columnName, @@ -114,6 +117,17 @@ public class NestedFieldVirtualColumn implements VirtualColumn boolean isInputJq = useJqSyntax != null && useJqSyntax; this.parts = isInputJq ? NestedPathFinder.parseJqPath(path) : NestedPathFinder.parseJsonPath(path); } + boolean hasNegative = false; + for (NestedPathPart part : this.parts) { + if (part instanceof NestedPathArrayElement) { + NestedPathArrayElement elementPart = (NestedPathArrayElement) part; + if (elementPart.getIndex() < 0) { + hasNegative = true; + break; + } + } + } + this.hasNegativeArrayIndex = hasNegative; this.expectedType = expectedType; this.processFromRaw = processFromRaw == null ? false : processFromRaw; } @@ -192,27 +206,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn // written to segment, so we fall back to processing the structured data from a column value selector on the // complex column ColumnValueSelector valueSelector = makeColumnValueSelector(dimensionSpec.getOutputName(), factory); - - class FieldDimensionSelector extends BaseSingleValueDimensionSelector - { - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("valueSelector", valueSelector); - } - - @Nullable - @Override - protected String getValue() - { - Object val = valueSelector.getObject(); - if (val == null || val instanceof String) { - return (String) val; - } - return String.valueOf(val); - } - } - return new FieldDimensionSelector(); + return new FieldDimensionSelector(valueSelector); } @Override @@ -244,6 +238,14 @@ public class NestedFieldVirtualColumn implements VirtualColumn // complex column itself didn't exist return DimensionSelector.constant(null); } + if (hasNegativeArrayIndex) { + return new FieldDimensionSelector( + new RawFieldLiteralColumnValueSelector( + column.makeColumnValueSelector(offset), + parts + ) + ); + } return column.makeDimensionSelector(parts, offset, dimensionSpec.getExtractionFn()); } @@ -265,13 +267,15 @@ public class NestedFieldVirtualColumn implements VirtualColumn // is JSON_VALUE which only returns literals, so we can use the nested columns value selector return processFromRaw ? new RawFieldColumnSelector(column.makeColumnValueSelector(offset), parts) - : column.makeColumnValueSelector(parts, offset); + : hasNegativeArrayIndex + ? new RawFieldLiteralColumnValueSelector(column.makeColumnValueSelector(offset), parts) + : column.makeColumnValueSelector(parts, offset); } @Override public boolean canVectorize(ColumnInspector inspector) { - return true; + return !hasNegativeArrayIndex; } @Nullable @@ -286,6 +290,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn if (column == null) { return NilVectorSelector.create(offset); } + return column.makeSingleValueDimensionVectorSelector(parts, offset); } @@ -748,4 +753,31 @@ public class NestedFieldVirtualColumn implements VirtualColumn return StructuredData.wrap(NestedPathFinder.find(data == null ? null : data.getValue(), parts)); } } + + public static class FieldDimensionSelector extends BaseSingleValueDimensionSelector + { + private final ColumnValueSelector valueSelector; + + public FieldDimensionSelector(ColumnValueSelector valueSelector) + { + this.valueSelector = valueSelector; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("valueSelector", valueSelector); + } + + @Nullable + @Override + protected String getValue() + { + Object val = valueSelector.getObject(); + if (val == null || val instanceof String) { + return (String) val; + } + return String.valueOf(val); + } + } } diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedPathFinderTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedPathFinderTest.java index b60a30c8a60..161a752ce1e 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedPathFinderTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedPathFinderTest.java @@ -228,6 +228,18 @@ public class NestedPathFinderTest Assert.assertEquals(".\"x\"[1]", NestedPathFinder.toNormalizedJqPath(pathParts)); Assert.assertEquals("$.x[1]", NestedPathFinder.toNormalizedJsonPath(pathParts)); + + // { "x" : ["a", "b"]} + pathParts = NestedPathFinder.parseJsonPath("$.x[-1]"); + Assert.assertEquals(2, pathParts.size()); + + Assert.assertTrue(pathParts.get(0) instanceof NestedPathField); + Assert.assertEquals("x", pathParts.get(0).getPartIdentifier()); + Assert.assertTrue(pathParts.get(1) instanceof NestedPathArrayElement); + Assert.assertEquals("-1", pathParts.get(1).getPartIdentifier()); + Assert.assertEquals(".\"x\"[-1]", NestedPathFinder.toNormalizedJqPath(pathParts)); + Assert.assertEquals("$.x[-1]", NestedPathFinder.toNormalizedJsonPath(pathParts)); + // { "x" : ["a", "b"]} pathParts = NestedPathFinder.parseJsonPath("$['x'][1]"); Assert.assertEquals(2, pathParts.size()); @@ -422,6 +434,18 @@ public class NestedPathFinderTest Assert.assertEquals("b", NestedPathFinder.find(NESTER, pathParts)); Assert.assertEquals("b", NestedPathFinder.findStringLiteral(NESTER, pathParts)); + pathParts = NestedPathFinder.parseJqPath(".x[-1]"); + Assert.assertEquals("c", NestedPathFinder.find(NESTER, pathParts)); + Assert.assertEquals("c", NestedPathFinder.findStringLiteral(NESTER, pathParts)); + + pathParts = NestedPathFinder.parseJqPath(".x[-2]"); + Assert.assertEquals("b", NestedPathFinder.find(NESTER, pathParts)); + Assert.assertEquals("b", NestedPathFinder.findStringLiteral(NESTER, pathParts)); + + pathParts = NestedPathFinder.parseJqPath(".x[-4]"); + Assert.assertNull(NestedPathFinder.find(NESTER, pathParts)); + Assert.assertNull(NestedPathFinder.findStringLiteral(NESTER, pathParts)); + // nonexistent pathParts = NestedPathFinder.parseJqPath(".x[1].y.z"); Assert.assertNull(NestedPathFinder.find(NESTER, pathParts)); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumnTest.java index 2e5692b6a17..581c8674da9 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumnTest.java @@ -91,6 +91,7 @@ public class NestedFieldVirtualColumnTest { EqualsVerifier.forClass(NestedFieldVirtualColumn.class) .withNonnullFields("columnName", "outputName") + .withIgnoredFields("hasNegativeArrayIndex") .usingGetClass() .verify(); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index b9315033fa1..8a05ee74f83 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -2415,4 +2415,95 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ); } + + @Test + public void testGroupByNegativeJsonPathIndex() + { + // negative array index cannot vectorize + cannotVectorize(); + testQuery( + "SELECT " + + "JSON_VALUE(nester, '$.array[-1]'), " + + "SUM(cnt) " + + "FROM druid.nested GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new NestedFieldVirtualColumn("nester", "$.array[-1]", "v0", ColumnType.STRING) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "d0") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue(), 5L}, + new Object[]{"b", 2L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testJsonPathNegativeIndex() + { + testQuery( + "SELECT JSON_VALUE(nester, '$.array[-1]'), JSON_QUERY(nester, '$.array[-1]'), JSON_KEYS(nester, '$.array[-1]') FROM druid.nested", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + new NestedFieldVirtualColumn( + "nester", + "v0", + ColumnType.STRING, + null, + false, + "$.array[-1]", + false + ), + new NestedFieldVirtualColumn( + "nester", + "v1", + NestedDataComplexTypeSerde.TYPE, + null, + true, + "$.array[-1]", + false + ), + expressionVirtualColumn("v2", "json_keys(\"nester\",'$.array[-1]')", ColumnType.STRING_ARRAY) + ) + .columns("v0", "v1", "v2") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + ImmutableList.of( + new Object[]{"b", "\"b\"", null}, + new Object[]{NullHandling.defaultStringValue(), null, null}, + new Object[]{NullHandling.defaultStringValue(), null, null}, + new Object[]{NullHandling.defaultStringValue(), null, null}, + new Object[]{NullHandling.defaultStringValue(), null, null}, + new Object[]{"b", "\"b\"", null}, + new Object[]{NullHandling.defaultStringValue(), null, null} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", NestedDataComplexTypeSerde.TYPE) + .add("EXPR$2", ColumnType.STRING_ARRAY) + .build() + + ); + } }