fix issue with auto column grouping (#16489)

* fix issue with auto column grouping
changes:
* fixes bug where AutoTypeColumnIndexer reports incorrect cardinality, allowing it to incorrectly use array grouper algorithm for realtime queries producing incorrect results for strings
* fixes bug where auto LONG and DOUBLE type columns incorrectly report not having null values, resulting in incorrect null handling when grouping

* fix test
This commit is contained in:
Clint Wylie 2024-05-26 22:48:17 -07:00 committed by GitHub
parent 6bc29534a7
commit 4e1de50e30
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 315 additions and 33 deletions

View File

@ -281,7 +281,7 @@ public class AutoTypeColumnIndexer implements DimensionIndexer<StructuredData, S
@Override @Override
public int getCardinality() public int getCardinality()
{ {
return globalDictionary.getCardinality(); return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
} }
@Override @Override

View File

@ -203,6 +203,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
capabilitiesBuilder.setDictionaryValuesSorted(true); capabilitiesBuilder.setDictionaryValuesSorted(true);
capabilitiesBuilder.setDictionaryValuesUnique(true); capabilitiesBuilder.setDictionaryValuesUnique(true);
builder.setType(logicalType); builder.setType(logicalType);
builder.setHasNulls(hasNulls);
builder.setNestedCommonFormatColumnSupplier(supplier); builder.setNestedCommonFormatColumnSupplier(supplier);
builder.setIndexSupplier(supplier, true, false); builder.setIndexSupplier(supplier, true, false);
builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue(), enforceLogicalType)); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue(), enforceLogicalType));
@ -225,6 +226,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
// technically, these columns are dictionary encoded, however they do not implement the DictionaryEncodedColumn // technically, these columns are dictionary encoded, however they do not implement the DictionaryEncodedColumn
// interface, so do not make the claim in the ColumnCapabilities // interface, so do not make the claim in the ColumnCapabilities
builder.setType(logicalType); builder.setType(logicalType);
builder.setHasNulls(hasNulls);
builder.setNestedCommonFormatColumnSupplier(supplier); builder.setNestedCommonFormatColumnSupplier(supplier);
builder.setIndexSupplier(supplier, true, false); builder.setIndexSupplier(supplier, true, false);
builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue(), enforceLogicalType)); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue(), enforceLogicalType));
@ -247,6 +249,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
// technically, these columns are dictionary encoded, however they do not implement the DictionaryEncodedColumn // technically, these columns are dictionary encoded, however they do not implement the DictionaryEncodedColumn
// interface, so do not make the claim in the ColumnCapabilities // interface, so do not make the claim in the ColumnCapabilities
builder.setType(logicalType); builder.setType(logicalType);
builder.setHasNulls(hasNulls);
builder.setNestedCommonFormatColumnSupplier(supplier); builder.setNestedCommonFormatColumnSupplier(supplier);
builder.setIndexSupplier(supplier, true, false); builder.setIndexSupplier(supplier, true, false);
builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue(), enforceLogicalType)); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue(), enforceLogicalType));
@ -275,6 +278,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
capabilitiesBuilder.setDictionaryValuesUnique(true); capabilitiesBuilder.setDictionaryValuesUnique(true);
} }
builder.setType(logicalType); builder.setType(logicalType);
builder.setHasNulls(hasNulls);
builder.setNestedCommonFormatColumnSupplier(supplier); builder.setNestedCommonFormatColumnSupplier(supplier);
builder.setIndexSupplier(supplier, true, false); builder.setIndexSupplier(supplier, true, false);
builder.setColumnFormat(new NestedCommonFormatColumn.Format( builder.setColumnFormat(new NestedCommonFormatColumn.Format(
@ -306,6 +310,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
ColumnType simpleType = supplier.getLogicalType(); ColumnType simpleType = supplier.getLogicalType();
ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : simpleType; ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : simpleType;
builder.setType(logicalType); builder.setType(logicalType);
builder.setHasNulls(hasNulls);
builder.setNestedCommonFormatColumnSupplier(supplier); builder.setNestedCommonFormatColumnSupplier(supplier);
// in default value mode, SQL planning by default uses selector filters for things like 'is null', which does // in default value mode, SQL planning by default uses selector filters for things like 'is null', which does
// not work correctly for complex types (or arrays). so, only hook up this index in sql compatible mode so that // not work correctly for complex types (or arrays). so, only hook up this index in sql compatible mode so that

View File

@ -622,6 +622,30 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
); );
} }
@Test
public void testGroupByRootAuto()
{
GroupByQuery groupQuery = GroupByQuery.builder()
.setDataSource("test_datasource")
.setGranularity(Granularities.ALL)
.setInterval(Intervals.ETERNITY)
.setDimensions(DefaultDimensionSpec.of("dim"))
.setAggregatorSpecs(new CountAggregatorFactory("count"))
.setContext(getContext())
.build();
runResults(
groupQuery,
ImmutableList.of(
new Object[]{"100", 2L},
new Object[]{"hello", 12L},
new Object[]{"world", 2L}
)
);
}
private void runResults( private void runResults(
GroupByQuery groupQuery, GroupByQuery groupQuery,
List<Object[]> expectedResults List<Object[]> expectedResults

View File

@ -70,69 +70,69 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
{ {
AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer("test", null); AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer("test", null);
int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2; int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
Assert.assertEquals(baseCardinality, indexer.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
EncodedKeyComponent<StructuredData> key; EncodedKeyComponent<StructuredData> key;
// new raw value, new field, new dictionary entry // new raw value, new field, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false);
Assert.assertEquals(228, key.getEffectiveSizeBytes()); Assert.assertEquals(228, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
// adding same value only adds estimated size of value itself // adding same value only adds estimated size of value itself
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false);
Assert.assertEquals(112, key.getEffectiveSizeBytes()); Assert.assertEquals(112, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
// new raw value, new field, new dictionary entry // new raw value, new field, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
Assert.assertEquals(94, key.getEffectiveSizeBytes()); Assert.assertEquals(94, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 2, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality());
// adding same value only adds estimated size of value itself // adding same value only adds estimated size of value itself
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 2, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality());
// new raw value, new dictionary entry // new raw value, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false);
Assert.assertEquals(48, key.getEffectiveSizeBytes()); Assert.assertEquals(48, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 3, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 3, indexer.globalDictionary.getCardinality());
// new raw value, new fields // new raw value, new fields
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false);
Assert.assertEquals(168, key.getEffectiveSizeBytes()); Assert.assertEquals(168, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
// new raw value, re-use fields and dictionary // new raw value, re-use fields and dictionary
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false);
Assert.assertEquals(104, key.getEffectiveSizeBytes()); Assert.assertEquals(104, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
// new raw value, new fields // new raw value, new fields
key = indexer.processRowValsToUnsortedEncodedKeyComponent( key = indexer.processRowValsToUnsortedEncodedKeyComponent(
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
false false
); );
Assert.assertEquals(166, key.getEffectiveSizeBytes()); Assert.assertEquals(166, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
// new raw value // new raw value
key = indexer.processRowValsToUnsortedEncodedKeyComponent( key = indexer.processRowValsToUnsortedEncodedKeyComponent(
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
false false
); );
Assert.assertEquals(166, key.getEffectiveSizeBytes()); Assert.assertEquals(166, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false); key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false);
if (NullHandling.replaceWithDefault()) { if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(0, key.getEffectiveSizeBytes()); Assert.assertEquals(0, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 7, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 7, indexer.globalDictionary.getCardinality());
} else { } else {
Assert.assertEquals(104, key.getEffectiveSizeBytes()); Assert.assertEquals(104, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 7, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 7, indexer.globalDictionary.getCardinality());
} }
key = indexer.processRowValsToUnsortedEncodedKeyComponent(0L, false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(0L, false);
if (NullHandling.replaceWithDefault()) { if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 7, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 7, indexer.globalDictionary.getCardinality());
} else { } else {
Assert.assertEquals(48, key.getEffectiveSizeBytes()); Assert.assertEquals(48, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 8, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 8, indexer.globalDictionary.getCardinality());
} }
} }
@ -673,14 +673,14 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
key = indexer.processRowValsToUnsortedEncodedKeyComponent(null, true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(null, true);
Assert.assertEquals(0, key.getEffectiveSizeBytes()); Assert.assertEquals(0, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality, indexer.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(null, true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(null, true);
Assert.assertEquals(0, key.getEffectiveSizeBytes()); Assert.assertEquals(0, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality, indexer.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(null, true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(null, true);
Assert.assertEquals(0, key.getEffectiveSizeBytes()); Assert.assertEquals(0, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality, indexer.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
Assert.assertTrue(indexer.hasNulls); Assert.assertTrue(indexer.hasNulls);
@ -698,14 +698,14 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
key = indexer.processRowValsToUnsortedEncodedKeyComponent("abcd", true); key = indexer.processRowValsToUnsortedEncodedKeyComponent("abcd", true);
Assert.assertEquals(166, key.getEffectiveSizeBytes()); Assert.assertEquals(166, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent("abcd", true); key = indexer.processRowValsToUnsortedEncodedKeyComponent("abcd", true);
Assert.assertEquals(52, key.getEffectiveSizeBytes()); Assert.assertEquals(52, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent("abcd", true); key = indexer.processRowValsToUnsortedEncodedKeyComponent("abcd", true);
Assert.assertEquals(52, key.getEffectiveSizeBytes()); Assert.assertEquals(52, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
Assert.assertFalse(indexer.hasNulls); Assert.assertFalse(indexer.hasNulls);
Assert.assertFalse(indexer.hasNestedData); Assert.assertFalse(indexer.hasNestedData);
@ -722,14 +722,14 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
key = indexer.processRowValsToUnsortedEncodedKeyComponent(1234L, true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(1234L, true);
Assert.assertEquals(94, key.getEffectiveSizeBytes()); Assert.assertEquals(94, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(1234L, true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(1234L, true);
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(1234L, true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(1234L, true);
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
Assert.assertFalse(indexer.hasNulls); Assert.assertFalse(indexer.hasNulls);
Assert.assertFalse(indexer.hasNestedData); Assert.assertFalse(indexer.hasNestedData);
@ -746,14 +746,14 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(), true);
Assert.assertEquals(54, key.getEffectiveSizeBytes()); Assert.assertEquals(54, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(), true);
Assert.assertEquals(8, key.getEffectiveSizeBytes()); Assert.assertEquals(8, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(), true);
Assert.assertEquals(8, key.getEffectiveSizeBytes()); Assert.assertEquals(8, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
Assert.assertFalse(indexer.hasNulls); Assert.assertFalse(indexer.hasNulls);
Assert.assertFalse(indexer.hasNestedData); Assert.assertFalse(indexer.hasNestedData);
@ -770,14 +770,14 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 3L), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 3L), true);
Assert.assertEquals(246, key.getEffectiveSizeBytes()); Assert.assertEquals(246, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 4, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 4, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 3L), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 3L), true);
Assert.assertEquals(104, key.getEffectiveSizeBytes()); Assert.assertEquals(104, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 4, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 4, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 3L), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 3L), true);
Assert.assertEquals(104, key.getEffectiveSizeBytes()); Assert.assertEquals(104, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 4, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 4, indexer.globalDictionary.getCardinality());
Assert.assertFalse(indexer.hasNulls); Assert.assertFalse(indexer.hasNulls);
Assert.assertFalse(indexer.hasNestedData); Assert.assertFalse(indexer.hasNestedData);
@ -794,14 +794,14 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of(), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of(), true);
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality, indexer.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of(), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of(), true);
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality, indexer.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of(), true); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of(), true);
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality, indexer.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
Assert.assertFalse(indexer.hasNulls); Assert.assertFalse(indexer.hasNulls);
Assert.assertTrue(indexer.hasNestedData); Assert.assertTrue(indexer.hasNestedData);

View File

@ -36,6 +36,7 @@ import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.lookup.LookupExtractorFactoryContainer; import org.apache.druid.query.lookup.LookupExtractorFactoryContainer;
import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider; import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider;
import org.apache.druid.segment.FrameBasedInlineSegmentWrangler; import org.apache.druid.segment.FrameBasedInlineSegmentWrangler;
import org.apache.druid.segment.IncrementalIndexSegment;
import org.apache.druid.segment.InlineSegmentWrangler; import org.apache.druid.segment.InlineSegmentWrangler;
import org.apache.druid.segment.LookupSegmentWrangler; import org.apache.druid.segment.LookupSegmentWrangler;
import org.apache.druid.segment.MapSegmentWrangler; import org.apache.druid.segment.MapSegmentWrangler;
@ -44,6 +45,7 @@ import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.ReferenceCountingSegment; import org.apache.druid.segment.ReferenceCountingSegment;
import org.apache.druid.segment.Segment; import org.apache.druid.segment.Segment;
import org.apache.druid.segment.SegmentWrangler; import org.apache.druid.segment.SegmentWrangler;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.join.JoinableFactory; import org.apache.druid.segment.join.JoinableFactory;
import org.apache.druid.segment.join.JoinableFactoryWrapper; import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.server.initialization.ServerConfig; import org.apache.druid.server.initialization.ServerConfig;
@ -196,6 +198,11 @@ public class SpecificSegmentsQuerySegmentWalker implements QuerySegmentWalker, C
return add(descriptor, new QueryableIndexSegment(index, descriptor.getId())); return add(descriptor, new QueryableIndexSegment(index, descriptor.getId()));
} }
public SpecificSegmentsQuerySegmentWalker add(final DataSegment descriptor, final IncrementalIndex index)
{
return add(descriptor, new IncrementalIndexSegment(index, descriptor.getId()));
}
public List<DataSegment> getSegments() public List<DataSegment> getSegments()
{ {
return segments; return segments;

View File

@ -66,6 +66,7 @@ import org.apache.druid.segment.IndexBuilder;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.join.JoinableFactoryWrapper; import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
@ -96,6 +97,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
public static final String DATA_SOURCE_MIXED_2 = "nested_mix_2"; public static final String DATA_SOURCE_MIXED_2 = "nested_mix_2";
public static final String DATA_SOURCE_ARRAYS = "arrays"; public static final String DATA_SOURCE_ARRAYS = "arrays";
public static final String DATA_SOURCE_ALL = "all_auto"; public static final String DATA_SOURCE_ALL = "all_auto";
public static final String DATA_SOURCE_ALL_REALTIME = "all_auto_realtime";
public static final List<ImmutableMap<String, Object>> RAW_ROWS = ImmutableList.of( public static final List<ImmutableMap<String, Object>> RAW_ROWS = ImmutableList.of(
ImmutableMap.<String, Object>builder() ImmutableMap.<String, Object>builder()
@ -334,6 +336,30 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.inputTmpDir(tempDirProducer.newTempFolder()) .inputTmpDir(tempDirProducer.newTempFolder())
.buildMMappedIndex(); .buildMMappedIndex();
final IncrementalIndex indexAllTypesAutoRealtime =
IndexBuilder.create()
.tmpDir(tempDirProducer.newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
.withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec())
.withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec())
.withMetrics(
new CountAggregatorFactory("cnt")
)
.withRollup(false)
.build()
)
.inputSource(
ResourceInputSource.of(
NestedDataTestUtils.class.getClassLoader(),
NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE
)
)
.inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT)
.inputTmpDir(tempDirProducer.newTempFolder())
.buildIncrementalIndex();
SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(injector, conglomerate); SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(injector, conglomerate);
walker.add( walker.add(
@ -399,6 +425,15 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.size(0) .size(0)
.build(), .build(),
indexAllTypesAuto indexAllTypesAuto
).add(
DataSegment.builder()
.dataSource(DATA_SOURCE_ALL_REALTIME)
.version("1")
.interval(indexAllTypesAutoRealtime.getInterval())
.shardSpec(new LinearShardSpec(1))
.size(0)
.build(),
indexAllTypesAutoRealtime
); );
return walker; return walker;
@ -7322,4 +7357,215 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.build() .build()
); );
} }
@Test
public void testGroupByAutoString()
{
final List<Object[]> expected;
if (NullHandling.sqlCompatible()) {
expected = ImmutableList.of(
new Object[]{null, 1L},
new Object[]{"", 1L},
new Object[]{"a", 1L},
new Object[]{"b", 1L},
new Object[]{"c", 1L},
new Object[]{"d", 1L},
new Object[]{"null", 1L}
);
} else {
expected = ImmutableList.of(
new Object[]{NullHandling.defaultStringValue(), 2L},
new Object[]{"a", 1L},
new Object[]{"b", 1L},
new Object[]{"c", 1L},
new Object[]{"d", 1L},
new Object[]{"null", 1L}
);
}
testQuery(
"SELECT "
+ "str, "
+ "SUM(cnt) "
+ "FROM druid.all_auto GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE_ALL)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("str", "d0")
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
expected,
RowSignature.builder()
.add("str", ColumnType.STRING)
.add("EXPR$1", ColumnType.LONG)
.build()
);
cannotVectorize();
msqIncompatible();
testQuery(
"SELECT "
+ "str, "
+ "SUM(cnt) "
+ "FROM druid.all_auto_realtime GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE_ALL_REALTIME)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("str", "d0")
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
expected,
RowSignature.builder()
.add("str", ColumnType.STRING)
.add("EXPR$1", ColumnType.LONG)
.build()
);
}
@Test
public void testGroupByAutoLong()
{
final List<Object[]> expected = ImmutableList.of(
new Object[]{NullHandling.defaultLongValue(), 2L},
new Object[]{1L, 1L},
new Object[]{2L, 1L},
new Object[]{3L, 1L},
new Object[]{4L, 1L},
new Object[]{5L, 1L}
);
testQuery(
"SELECT "
+ "long, "
+ "SUM(cnt) "
+ "FROM druid.all_auto GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE_ALL)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("long", "d0", ColumnType.LONG)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
expected,
RowSignature.builder()
.add("long", ColumnType.LONG)
.add("EXPR$1", ColumnType.LONG)
.build()
);
cannotVectorize();
msqIncompatible();
testQuery(
"SELECT "
+ "long, "
+ "SUM(cnt) "
+ "FROM druid.all_auto_realtime GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE_ALL_REALTIME)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("long", "d0", ColumnType.LONG)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
expected,
RowSignature.builder()
.add("long", ColumnType.LONG)
.add("EXPR$1", ColumnType.LONG)
.build()
);
}
@Test
public void testGroupByAutoDouble()
{
final List<Object[]> expected = ImmutableList.of(
new Object[]{NullHandling.defaultDoubleValue(), 2L},
new Object[]{1.0D, 1L},
new Object[]{2.0D, 1L},
new Object[]{3.3D, 1L},
new Object[]{4.4D, 1L},
new Object[]{5.9D, 1L}
);
testQuery(
"SELECT "
+ "\"double\", "
+ "SUM(cnt) "
+ "FROM druid.all_auto GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE_ALL)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("double", "d0", ColumnType.DOUBLE)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
expected,
RowSignature.builder()
.add("double", ColumnType.DOUBLE)
.add("EXPR$1", ColumnType.LONG)
.build()
);
cannotVectorize();
msqIncompatible();
testQuery(
"SELECT "
+ "\"double\", "
+ "SUM(cnt) "
+ "FROM druid.all_auto_realtime GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE_ALL_REALTIME)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("double", "d0", ColumnType.DOUBLE)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
expected,
RowSignature.builder()
.add("double", ColumnType.DOUBLE)
.add("EXPR$1", ColumnType.LONG)
.build()
);
}
} }