diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index 63b8598ef63..160415924a3 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -1170,10 +1170,15 @@ public class NestedFieldVirtualColumn implements VirtualColumn if (theColumn instanceof CompressedNestedDataComplexColumn) { final CompressedNestedDataComplexColumn nestedColumn = (CompressedNestedDataComplexColumn) theColumn; final ColumnIndexSupplier nestedColumnPathIndexSupplier = nestedColumn.getColumnIndexSupplier(parts); + if (nestedColumnPathIndexSupplier == null && processFromRaw) { + // if processing from raw, a non-exstent path from parts doesn't mean the path doesn't really exist + // so fall back to no indexes + return NoIndexesColumnIndexSupplier.getInstance(); + } if (expectedType != null) { final Set types = nestedColumn.getColumnTypes(parts); // if the expected output type is numeric but not all of the input types are numeric, we might have additional - // null values than what the null value bitmap is tracking, wrap it + // null values than what the null value bitmap is tracking, fall back to not using indexes if (expectedType.isNumeric() && (types == null || types.stream().anyMatch(t -> !t.isNumeric()))) { return NoIndexesColumnIndexSupplier.getInstance(); } diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 8435ea42f5f..cedef264a53 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -37,6 +37,8 @@ import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.filter.BoundDimFilter; +import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; @@ -799,6 +801,122 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); } + @Test + public void testIngestAndScanSegmentsAndFilterPartialPathArrayIndex() throws Exception + { + Query scanQuery = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .filters( + NotDimFilter.of(NullFilter.forColumn("v0")) + ) + .virtualColumns( + new NestedFieldVirtualColumn( + "complexObj", + "v0", + ColumnType.NESTED_DATA, + null, + true, + "$.y[0]", + false + ) + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()) + .build(); + List segs = NestedDataTestUtils.createSegmentsForJsonInput( + tempFolder, + closer, + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE, + Granularities.HOUR, + true, + IndexSpec.DEFAULT + ); + + List realtimeSegs = ImmutableList.of( + NestedDataTestUtils.createIncrementalIndexForJsonInput( + tempFolder, + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE, + Granularities.NONE, + true + ) + ); + + final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); + final Sequence seqRealtime = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery); + List results = seq.toList(); + List resultsRealtime = seqRealtime.toList(); + logResults(results); + logResults(resultsRealtime); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(4, ((List) results.get(0).getEvents()).size()); + Assert.assertEquals(results.size(), resultsRealtime.size()); + Assert.assertEquals(results.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); + } + + @Test + public void testIngestAndScanSegmentsAndFilterPartialPath() throws Exception + { + Query scanQuery = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .filters( + NotDimFilter.of(NullFilter.forColumn("v0")) + ) + .virtualColumns( + new NestedFieldVirtualColumn( + "obj", + "v0", + ColumnType.NESTED_DATA, + null, + true, + "$.b", + false + ) + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()) + .build(); + List segs = NestedDataTestUtils.createSegmentsForJsonInput( + tempFolder, + closer, + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE, + Granularities.HOUR, + true, + IndexSpec.DEFAULT + ); + + List realtimeSegs = ImmutableList.of( + NestedDataTestUtils.createIncrementalIndexForJsonInput( + tempFolder, + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE, + Granularities.NONE, + true + ) + ); + + final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); + final Sequence seqRealtime = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery); + List results = seq.toList(); + List resultsRealtime = seqRealtime.toList(); + logResults(results); + logResults(resultsRealtime); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(6, ((List) results.get(0).getEvents()).size()); + Assert.assertEquals(results.size(), resultsRealtime.size()); + Assert.assertEquals(results.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); + } + private static void logResults(List results) { StringBuilder bob = new StringBuilder();