From 02ae3e74ffcf99437bc6a608baf770d764f2c52f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 31 May 2022 11:38:56 -0700 Subject: [PATCH] RowBasedColumnSelectorFactory: Add "useStringValueOfNullInLists" parameter. (#12578) RowBasedColumnSelectorFactory inherited strange behavior from Rows.objectToStrings for nulls that appear in lists: instead of being left as a null, it is replaced with the string "null". Some callers may need compatibility with this strange behavior, but it should be opt-in. Query-time call sites are changed to opt-out of this behavior, since it is not consistent with query-time expectations. The IncrementalIndex ingestion-time call site retains the old behavior, as this is traditionally when Rows.objectToStrings would be used. --- .../epinephelinae/RowBasedGrouperHelper.java | 1 + .../TimeseriesQueryQueryToolChest.java | 1 + .../RowBasedColumnSelectorFactory.java | 52 +++++++++++++------ .../apache/druid/segment/RowBasedCursor.java | 1 + .../segment/incremental/IncrementalIndex.java | 1 + .../druid/segment/transform/Transformer.java | 1 + .../druid/query/filter/InDimFilterTest.java | 3 +- .../druid/segment/filter/BaseFilterTest.java | 1 + .../virtual/ExpressionVirtualColumnTest.java | 4 ++ ...ListFilteredVirtualColumnSelectorTest.java | 1 + .../VirtualizedColumnSelectorFactoryTest.java | 1 + .../expression/ExpressionTestHelper.java | 1 + 12 files changed, 52 insertions(+), 16 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java index bdf930f3e29..4bc851b7e17 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java @@ -403,6 +403,7 @@ public class RowBasedGrouperHelper adapter, supplier::get, decoratedSignature, + false, false ); } diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java index f127864b250..82f802bf988 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java @@ -233,6 +233,7 @@ public class TimeseriesQueryQueryToolChest extends QueryToolChest new MapBasedRow(null, null), aggregatorsSignature, + false, false ) ); diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java index a55f18856c7..64a8f56147d 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java @@ -55,6 +55,7 @@ public class RowBasedColumnSelectorFactory implements ColumnSelectorFactory private final RowAdapter adapter; private final ColumnInspector columnInspector; private final boolean throwParseExceptions; + private final boolean useStringValueOfNullInLists; /** * Package-private constructor for {@link RowBasedCursor}. Allows passing in a rowIdSupplier, which enables @@ -65,7 +66,8 @@ public class RowBasedColumnSelectorFactory implements ColumnSelectorFactory @Nullable final RowIdSupplier rowIdSupplier, final RowAdapter adapter, final ColumnInspector columnInspector, - final boolean throwParseExceptions + final boolean throwParseExceptions, + final boolean useStringValueOfNullInLists ) { this.rowSupplier = rowSupplier; @@ -74,29 +76,42 @@ public class RowBasedColumnSelectorFactory implements ColumnSelectorFactory this.columnInspector = Preconditions.checkNotNull(columnInspector, "columnInspector must be nonnull"); this.throwParseExceptions = throwParseExceptions; + this.useStringValueOfNullInLists = useStringValueOfNullInLists; } /** * Create an instance based on any object, along with a {@link RowAdapter} for that object. * - * @param adapter adapter for these row objects - * @param supplier supplier of row objects - * @param columnInspector will be used for reporting available columns and their capabilities. Note that this - * factory will still allow creation of selectors on any named field in the rows, even if - * it doesn't appear in "columnInspector". (It only needs to be accessible via - * {@link RowAdapter#columnFunction}.) As a result, you can achieve an untyped mode by - * passing in {@link org.apache.druid.segment.column.RowSignature#empty()}. - * @param throwParseExceptions whether numeric selectors should throw parse exceptions or use a default/null value - * when their inputs are not actually numeric + * @param adapter adapter for these row objects + * @param supplier supplier of row objects + * @param columnInspector will be used for reporting available columns and their capabilities. Note that + * this factory will still allow creation of selectors on any named field in the + * rows, even if it doesn't appear in "columnInspector". (It only needs to be + * accessible via {@link RowAdapter#columnFunction}.) As a result, you can achieve + * an untyped mode by passing in + * {@link org.apache.druid.segment.column.RowSignature#empty()}. + * @param throwParseExceptions whether numeric selectors should throw parse exceptions or use a default/null + * value when their inputs are not actually numeric + * @param useStringValueOfNullInLists whether nulls in multi-value strings should be replaced with the string "null". + * for example: the list ["a", null] would be converted to ["a", "null"]. Useful + * for callers that need compatibility with {@link Rows#objectToStrings}. */ public static RowBasedColumnSelectorFactory create( final RowAdapter adapter, final Supplier supplier, final ColumnInspector columnInspector, - final boolean throwParseExceptions + final boolean throwParseExceptions, + final boolean useStringValueOfNullInLists ) { - return new RowBasedColumnSelectorFactory<>(supplier, null, adapter, columnInspector, throwParseExceptions); + return new RowBasedColumnSelectorFactory<>( + supplier, + null, + adapter, + columnInspector, + throwParseExceptions, + useStringValueOfNullInLists + ); } @Nullable @@ -341,19 +356,26 @@ public class RowBasedColumnSelectorFactory implements ColumnSelectorFactory dimensionValues = Collections.singletonList(extractionFn.apply(s)); } } else if (rawValue instanceof List) { - // Consistent behavior with Rows.objectToStrings, but applies extractionFn too. //noinspection rawtypes final List values = new ArrayList<>(((List) rawValue).size()); //noinspection rawtypes for (final Object item : ((List) rawValue)) { + final String itemString; + + if (useStringValueOfNullInLists) { + itemString = String.valueOf(item); + } else { + itemString = item == null ? null : String.valueOf(item); + } + // Behavior with null item is to convert it to string "null". This is not what most other areas of Druid // would do when treating a null as a string, but it's consistent with Rows.objectToStrings, which is // commonly used when retrieving strings from input-row-like objects. if (extractionFn == null) { - values.add(String.valueOf(item)); + values.add(itemString); } else { - values.add(extractionFn.apply(String.valueOf(item))); + values.add(extractionFn.apply(itemString)); } } diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedCursor.java b/processing/src/main/java/org/apache/druid/segment/RowBasedCursor.java index cc2422e38a2..c0c95251177 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedCursor.java @@ -70,6 +70,7 @@ public class RowBasedCursor implements Cursor () -> rowId, rowAdapter, rowSignature, + false, false ) ); diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index c2e36cade3a..9bc1be2836f 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -126,6 +126,7 @@ public abstract class IncrementalIndex extends AbstractIndex implements Iterable RowAdapters.standardRow(), in::get, RowSignature.empty(), + true, true ); diff --git a/processing/src/main/java/org/apache/druid/segment/transform/Transformer.java b/processing/src/main/java/org/apache/druid/segment/transform/Transformer.java index 7490d152c5a..fa13af9b09c 100644 --- a/processing/src/main/java/org/apache/druid/segment/transform/Transformer.java +++ b/processing/src/main/java/org/apache/druid/segment/transform/Transformer.java @@ -60,6 +60,7 @@ public class Transformer RowAdapters.standardRow(), rowSupplierForValueMatcher::get, RowSignature.empty(), // sad + false, false ) ); diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index ddcf9785289..cf47b2243a1 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -232,7 +232,8 @@ public class InDimFilterTest extends InitializedNullHandlingTest RowAdapters.standardRow(), () -> new MapBasedRow(0, row), RowSignature.builder().add("dim", ColumnType.STRING).build(), - true + true, + false ); final ValueMatcher matcher = filter.toFilter().makeMatcher(columnSelectorFactory); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 77b90ce88db..d7000115bb2 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -745,6 +745,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest RowAdapters.standardRow(), rowSupplier::get, rowSignatureBuilder.build(), + false, false ) ) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index a6a792f76cc..89ddc9bfe30 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -206,6 +206,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest RowAdapters.standardRow(), CURRENT_ROW::get, RowSignature.empty(), + false, false ); @@ -746,6 +747,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest RowAdapters.standardRow(), CURRENT_ROW::get, RowSignature.builder().add("x", ColumnType.LONG).build(), + false, false ), Parser.parse(SCALE_LONG.getExpression(), TestExprMacroTable.INSTANCE) @@ -769,6 +771,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest RowAdapters.standardRow(), CURRENT_ROW::get, RowSignature.builder().add("x", ColumnType.DOUBLE).build(), + false, false ), Parser.parse(SCALE_FLOAT.getExpression(), TestExprMacroTable.INSTANCE) @@ -792,6 +795,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest RowAdapters.standardRow(), CURRENT_ROW::get, RowSignature.builder().add("x", ColumnType.FLOAT).build(), + false, false ), Parser.parse(SCALE_FLOAT.getExpression(), TestExprMacroTable.INSTANCE) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java index 33b79e767cf..e614d067568 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumnSelectorTest.java @@ -294,6 +294,7 @@ public class ListFilteredVirtualColumnSelectorTest extends InitializedNullHandli RowAdapters.standardRow(), () -> new MapBasedRow(0L, ImmutableMap.of(COLUMN_NAME, ImmutableList.of("a", "b", "c", "d"))), rowSignature, + false, false ), VirtualColumns.create(ImmutableList.of(virtualColumn)) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualizedColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualizedColumnSelectorFactoryTest.java index 02bcb59fe77..118f2f27de1 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualizedColumnSelectorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualizedColumnSelectorFactoryTest.java @@ -41,6 +41,7 @@ public class VirtualizedColumnSelectorFactoryTest extends InitializedNullHandlin RowAdapters.standardRow(), () -> new MapBasedRow(0L, ImmutableMap.of("x", 10L, "y", 20.0)), RowSignature.builder().add("x", ColumnType.LONG).add("y", ColumnType.DOUBLE).build(), + false, false ), VirtualColumns.create( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java index 0d5485b29eb..df5002983df 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java @@ -344,6 +344,7 @@ class ExpressionTestHelper RowAdapters.standardRow(), () -> new MapBasedRow(0L, bindings), rowSignature, + false, false ), VirtualColumns.create(virtualColumns)