From e7d2e8b914bf89a780490de109b8b7850b0e141f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 17 Apr 2023 14:21:32 -0700 Subject: [PATCH] fix bug filtering nested columns with expression filters (#14096) --- .../nested/NestedCommonFormatColumn.java | 3 +- .../nested/NestedDataComplexTypeSerde.java | 3 +- .../NestedCommonFormatColumnPartSerde.java | 5 ++ .../nested/NestedDataColumnSupplierTest.java | 46 ++++++++++++------- .../NestedDataColumnSupplierV4Test.java | 21 +++++---- .../calcite/CalciteNestedDataQueryTest.java | 44 ++++++++++++++++++ 6 files changed, 95 insertions(+), 27 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java index b52670c148f..4d20437c502 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java @@ -136,9 +136,10 @@ public interface NestedCommonFormatColumn extends BaseColumn .setDictionaryValuesSorted(true) .setDictionaryValuesUnique(true) .setHasBitmapIndexes(true) + .setFilterable(true) .setHasNulls(hasNulls); } - return ColumnCapabilitiesImpl.createDefault().setType(logicalType).setHasNulls(hasNulls); + return ColumnCapabilitiesImpl.createDefault().setType(logicalType).setHasNulls(hasNulls).setFilterable(true); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java index a35aa93c25b..f1f298e0889 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java @@ -106,6 +106,7 @@ public class NestedDataComplexTypeSerde extends ComplexMetricSerde } builder.setComplexColumnSupplier(supplier); builder.setColumnFormat(new NestedColumnFormatV4()); + builder.setFilterable(true); } @Override @@ -187,7 +188,7 @@ public class NestedDataComplexTypeSerde extends ComplexMetricSerde @Override public ColumnCapabilities toColumnCapabilities() { - return ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA).setHasNulls(true); + return ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA).setHasNulls(true).setFilterable(true); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java index 4585364fae7..13366f43bf7 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java @@ -121,6 +121,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde builder.setStandardTypeColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); }); } if (logicalType.is(ValueType.LONG)) { @@ -140,6 +141,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde builder.setStandardTypeColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); }); } if (logicalType.is(ValueType.DOUBLE)) { @@ -159,6 +161,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde builder.setStandardTypeColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); }); } if (logicalType.isArray()) { @@ -178,6 +181,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde builder.setType(logicalType); builder.setStandardTypeColumnSupplier(supplier); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); }); } return (buffer, builder, columnConfig) -> { @@ -198,6 +202,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde builder.setType(logicalType); builder.setStandardTypeColumnSupplier(supplier); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, hasNulls)); + builder.setFilterable(true); }; } diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index d8c385232aa..b771d4411a1 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -48,7 +48,9 @@ import org.apache.druid.segment.ObjectColumnSelector; import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DruidPredicateIndex; @@ -56,6 +58,8 @@ import org.apache.druid.segment.column.NullValueIndex; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.serde.ColumnPartSerde; +import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import org.apache.druid.segment.vector.BitmapVectorOffset; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -245,16 +249,21 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest public void testBasicFunctionality() throws IOException { ColumnBuilder bob = new ColumnBuilder(); - bob.setFileMapper(fileMapper); - NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( + NestedCommonFormatColumnPartSerde partSerde = NestedCommonFormatColumnPartSerde.createDeserializer( + ColumnType.NESTED_DATA, false, - baseBuffer, - bob, - ALWAYS_USE_INDEXES, - bitmapSerdeFactory, - ByteOrder.nativeOrder() + ByteOrder.nativeOrder(), + RoaringBitmapSerdeFactory.getInstance() ); - try (NestedDataComplexColumn column = (NestedDataComplexColumn) supplier.get()) { + bob.setFileMapper(fileMapper); + ColumnPartSerde.Deserializer deserializer = partSerde.getDeserializer(); + deserializer.read(baseBuffer, bob, ALWAYS_USE_INDEXES); + final ColumnHolder holder = bob.build(); + final ColumnCapabilities capabilities = holder.getCapabilities(); + Assert.assertEquals(ColumnType.NESTED_DATA, capabilities.toColumnType()); + Assert.assertTrue(capabilities.isFilterable()); + Assert.assertTrue(holder.getColumnFormat() instanceof NestedCommonFormatColumn.Format); + try (NestedDataComplexColumn column = (NestedDataComplexColumn) holder.getColumn()) { smokeTest(column); } } @@ -263,16 +272,21 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest public void testArrayFunctionality() throws IOException { ColumnBuilder bob = new ColumnBuilder(); - bob.setFileMapper(arrayFileMapper); - NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( + NestedCommonFormatColumnPartSerde partSerde = NestedCommonFormatColumnPartSerde.createDeserializer( + ColumnType.NESTED_DATA, false, - arrayBaseBuffer, - bob, - () -> 0, - bitmapSerdeFactory, - ByteOrder.nativeOrder() + ByteOrder.nativeOrder(), + RoaringBitmapSerdeFactory.getInstance() ); - try (NestedDataComplexColumn column = (NestedDataComplexColumn) supplier.get()) { + bob.setFileMapper(arrayFileMapper); + ColumnPartSerde.Deserializer deserializer = partSerde.getDeserializer(); + deserializer.read(arrayBaseBuffer, bob, ALWAYS_USE_INDEXES); + final ColumnHolder holder = bob.build(); + final ColumnCapabilities capabilities = holder.getCapabilities(); + Assert.assertEquals(ColumnType.NESTED_DATA, capabilities.toColumnType()); + Assert.assertTrue(capabilities.isFilterable()); + Assert.assertTrue(holder.getColumnFormat() instanceof NestedCommonFormatColumn.Format); + try (NestedDataComplexColumn column = (NestedDataComplexColumn) holder.getColumn()) { smokeTestArrays(column); } } diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java index 4ac538f6612..f8d9dc4033d 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java @@ -48,6 +48,7 @@ import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; @@ -55,6 +56,8 @@ import org.apache.druid.segment.column.DruidPredicateIndex; import org.apache.druid.segment.column.NullValueIndex; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.serde.ColumnPartSerde; +import org.apache.druid.segment.serde.ComplexColumnPartSerde; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -214,15 +217,15 @@ public class NestedDataColumnSupplierV4Test extends InitializedNullHandlingTest { ColumnBuilder bob = new ColumnBuilder(); bob.setFileMapper(fileMapper); - NestedDataColumnSupplierV4 supplier = NestedDataColumnSupplierV4.read( - baseBuffer, - bob, - NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES, - NestedDataComplexTypeSerde.OBJECT_MAPPER, - new OnlyPositionalReadsTypeStrategy<>(ColumnType.LONG.getStrategy()), - new OnlyPositionalReadsTypeStrategy<>(ColumnType.DOUBLE.getStrategy()) - ); - try (NestedDataComplexColumn column = (NestedDataComplexColumn) supplier.get()) { + ComplexColumnPartSerde partSerde = ComplexColumnPartSerde.createDeserializer(NestedDataComplexTypeSerde.TYPE_NAME); + ColumnPartSerde.Deserializer deserializer = partSerde.getDeserializer(); + deserializer.read(baseBuffer, bob, NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES); + final ColumnHolder holder = bob.build(); + final ColumnCapabilities capabilities = holder.getCapabilities(); + Assert.assertEquals(ColumnType.NESTED_DATA, capabilities.toColumnType()); + Assert.assertTrue(capabilities.isFilterable()); + Assert.assertTrue(holder.getColumnFormat() instanceof NestedDataComplexTypeSerde.NestedColumnFormatV4); + try (NestedDataComplexColumn column = (NestedDataComplexColumn) holder.getColumn()) { smokeTest(column); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 99b15fdc304..5c7729eb60c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -2177,6 +2177,50 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testGroupByPathSelectorFilterCoalesce() + { + cannotVectorize(); + testQuery( + "SELECT " + + "JSON_VALUE(nest, '$.x'), " + + "SUM(cnt) " + + "FROM druid.nested WHERE COALESCE(JSON_VALUE(nest, '$.x'), '0') = '100' GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new NestedFieldVirtualColumn("nest", "$.x", "v0", ColumnType.STRING) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "d0") + ) + ) + .setDimFilter( + expressionFilter( + "case_searched(notnull(json_value(\"nest\",'$.x', 'STRING')),(json_value(\"nest\",'$.x', 'STRING') == '100'),0)" + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{ + "100", + 2L + } + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + @Test public void testJsonAndArrayAgg() {