add json function support for paths with negative array indexes (#12972)

This commit is contained in:
Clint Wylie 2022-08-25 17:11:28 -07:00 committed by GitHub
parent 4c61378ad1
commit 72aba00e09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 177 additions and 24 deletions

View File

@ -44,7 +44,12 @@ public class NestedPathArrayElement implements NestedPathPart
// handle lists or arrays because who knows what might end up here, depending on how is created
if (input instanceof List) {
List<?> currentList = (List<?>) input;
if (currentList.size() > index) {
final int currentSize = currentList.size();
if (index < 0) {
if (currentSize + index >= 0) {
return currentList.get(currentSize + index);
}
} else if (currentList.size() > index) {
return currentList.get(index);
}
} else if (input instanceof Object[]) {

View File

@ -45,6 +45,7 @@ import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.ReadableOffset;
import org.apache.druid.segment.nested.NestedDataComplexColumn;
import org.apache.druid.segment.nested.NestedDataComplexTypeSerde;
import org.apache.druid.segment.nested.NestedPathArrayElement;
import org.apache.druid.segment.nested.NestedPathFinder;
import org.apache.druid.segment.nested.NestedPathPart;
import org.apache.druid.segment.nested.StructuredData;
@ -89,6 +90,8 @@ public class NestedFieldVirtualColumn implements VirtualColumn
private final List<NestedPathPart> parts;
private final boolean processFromRaw;
private final boolean hasNegativeArrayIndex;
@JsonCreator
public NestedFieldVirtualColumn(
@JsonProperty("columnName") String columnName,
@ -114,6 +117,17 @@ public class NestedFieldVirtualColumn implements VirtualColumn
boolean isInputJq = useJqSyntax != null && useJqSyntax;
this.parts = isInputJq ? NestedPathFinder.parseJqPath(path) : NestedPathFinder.parseJsonPath(path);
}
boolean hasNegative = false;
for (NestedPathPart part : this.parts) {
if (part instanceof NestedPathArrayElement) {
NestedPathArrayElement elementPart = (NestedPathArrayElement) part;
if (elementPart.getIndex() < 0) {
hasNegative = true;
break;
}
}
}
this.hasNegativeArrayIndex = hasNegative;
this.expectedType = expectedType;
this.processFromRaw = processFromRaw == null ? false : processFromRaw;
}
@ -192,27 +206,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn
// written to segment, so we fall back to processing the structured data from a column value selector on the
// complex column
ColumnValueSelector valueSelector = makeColumnValueSelector(dimensionSpec.getOutputName(), factory);
class FieldDimensionSelector extends BaseSingleValueDimensionSelector
{
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("valueSelector", valueSelector);
}
@Nullable
@Override
protected String getValue()
{
Object val = valueSelector.getObject();
if (val == null || val instanceof String) {
return (String) val;
}
return String.valueOf(val);
}
}
return new FieldDimensionSelector();
return new FieldDimensionSelector(valueSelector);
}
@Override
@ -244,6 +238,14 @@ public class NestedFieldVirtualColumn implements VirtualColumn
// complex column itself didn't exist
return DimensionSelector.constant(null);
}
if (hasNegativeArrayIndex) {
return new FieldDimensionSelector(
new RawFieldLiteralColumnValueSelector(
column.makeColumnValueSelector(offset),
parts
)
);
}
return column.makeDimensionSelector(parts, offset, dimensionSpec.getExtractionFn());
}
@ -265,13 +267,15 @@ public class NestedFieldVirtualColumn implements VirtualColumn
// is JSON_VALUE which only returns literals, so we can use the nested columns value selector
return processFromRaw
? new RawFieldColumnSelector(column.makeColumnValueSelector(offset), parts)
: column.makeColumnValueSelector(parts, offset);
: hasNegativeArrayIndex
? new RawFieldLiteralColumnValueSelector(column.makeColumnValueSelector(offset), parts)
: column.makeColumnValueSelector(parts, offset);
}
@Override
public boolean canVectorize(ColumnInspector inspector)
{
return true;
return !hasNegativeArrayIndex;
}
@Nullable
@ -286,6 +290,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn
if (column == null) {
return NilVectorSelector.create(offset);
}
return column.makeSingleValueDimensionVectorSelector(parts, offset);
}
@ -748,4 +753,31 @@ public class NestedFieldVirtualColumn implements VirtualColumn
return StructuredData.wrap(NestedPathFinder.find(data == null ? null : data.getValue(), parts));
}
}
public static class FieldDimensionSelector extends BaseSingleValueDimensionSelector
{
private final ColumnValueSelector valueSelector;
public FieldDimensionSelector(ColumnValueSelector valueSelector)
{
this.valueSelector = valueSelector;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("valueSelector", valueSelector);
}
@Nullable
@Override
protected String getValue()
{
Object val = valueSelector.getObject();
if (val == null || val instanceof String) {
return (String) val;
}
return String.valueOf(val);
}
}
}

View File

@ -228,6 +228,18 @@ public class NestedPathFinderTest
Assert.assertEquals(".\"x\"[1]", NestedPathFinder.toNormalizedJqPath(pathParts));
Assert.assertEquals("$.x[1]", NestedPathFinder.toNormalizedJsonPath(pathParts));
// { "x" : ["a", "b"]}
pathParts = NestedPathFinder.parseJsonPath("$.x[-1]");
Assert.assertEquals(2, pathParts.size());
Assert.assertTrue(pathParts.get(0) instanceof NestedPathField);
Assert.assertEquals("x", pathParts.get(0).getPartIdentifier());
Assert.assertTrue(pathParts.get(1) instanceof NestedPathArrayElement);
Assert.assertEquals("-1", pathParts.get(1).getPartIdentifier());
Assert.assertEquals(".\"x\"[-1]", NestedPathFinder.toNormalizedJqPath(pathParts));
Assert.assertEquals("$.x[-1]", NestedPathFinder.toNormalizedJsonPath(pathParts));
// { "x" : ["a", "b"]}
pathParts = NestedPathFinder.parseJsonPath("$['x'][1]");
Assert.assertEquals(2, pathParts.size());
@ -422,6 +434,18 @@ public class NestedPathFinderTest
Assert.assertEquals("b", NestedPathFinder.find(NESTER, pathParts));
Assert.assertEquals("b", NestedPathFinder.findStringLiteral(NESTER, pathParts));
pathParts = NestedPathFinder.parseJqPath(".x[-1]");
Assert.assertEquals("c", NestedPathFinder.find(NESTER, pathParts));
Assert.assertEquals("c", NestedPathFinder.findStringLiteral(NESTER, pathParts));
pathParts = NestedPathFinder.parseJqPath(".x[-2]");
Assert.assertEquals("b", NestedPathFinder.find(NESTER, pathParts));
Assert.assertEquals("b", NestedPathFinder.findStringLiteral(NESTER, pathParts));
pathParts = NestedPathFinder.parseJqPath(".x[-4]");
Assert.assertNull(NestedPathFinder.find(NESTER, pathParts));
Assert.assertNull(NestedPathFinder.findStringLiteral(NESTER, pathParts));
// nonexistent
pathParts = NestedPathFinder.parseJqPath(".x[1].y.z");
Assert.assertNull(NestedPathFinder.find(NESTER, pathParts));

View File

@ -91,6 +91,7 @@ public class NestedFieldVirtualColumnTest
{
EqualsVerifier.forClass(NestedFieldVirtualColumn.class)
.withNonnullFields("columnName", "outputName")
.withIgnoredFields("hasNegativeArrayIndex")
.usingGetClass()
.verify();
}

View File

@ -2415,4 +2415,95 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.build()
);
}
@Test
public void testGroupByNegativeJsonPathIndex()
{
// negative array index cannot vectorize
cannotVectorize();
testQuery(
"SELECT "
+ "JSON_VALUE(nester, '$.array[-1]'), "
+ "SUM(cnt) "
+ "FROM druid.nested GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
new NestedFieldVirtualColumn("nester", "$.array[-1]", "v0", ColumnType.STRING)
)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0")
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{NullHandling.defaultStringValue(), 5L},
new Object[]{"b", 2L}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.STRING)
.add("EXPR$1", ColumnType.LONG)
.build()
);
}
@Test
public void testJsonPathNegativeIndex()
{
testQuery(
"SELECT JSON_VALUE(nester, '$.array[-1]'), JSON_QUERY(nester, '$.array[-1]'), JSON_KEYS(nester, '$.array[-1]') FROM druid.nested",
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
new NestedFieldVirtualColumn(
"nester",
"v0",
ColumnType.STRING,
null,
false,
"$.array[-1]",
false
),
new NestedFieldVirtualColumn(
"nester",
"v1",
NestedDataComplexTypeSerde.TYPE,
null,
true,
"$.array[-1]",
false
),
expressionVirtualColumn("v2", "json_keys(\"nester\",'$.array[-1]')", ColumnType.STRING_ARRAY)
)
.columns("v0", "v1", "v2")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.build()
),
ImmutableList.of(
new Object[]{"b", "\"b\"", null},
new Object[]{NullHandling.defaultStringValue(), null, null},
new Object[]{NullHandling.defaultStringValue(), null, null},
new Object[]{NullHandling.defaultStringValue(), null, null},
new Object[]{NullHandling.defaultStringValue(), null, null},
new Object[]{"b", "\"b\"", null},
new Object[]{NullHandling.defaultStringValue(), null, null}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.STRING)
.add("EXPR$1", NestedDataComplexTypeSerde.TYPE)
.add("EXPR$2", ColumnType.STRING_ARRAY)
.build()
);
}
}