fix NestedDataColumnIndexerV4 to not report cardinality (#16507)

* fix NestedDataColumnIndexerV4 to not report cardinality
changes:
* fix issue similar to #16489 but for NestedDataColumnIndexerV4, which can report STRING type if it only processes a single type of values. this should be less common than the auto indexer problem
* fix some issues with sql benchmarks
This commit is contained in:
Clint Wylie 2024-06-11 20:58:12 -07:00 committed by GitHub
parent 3f5f5921e0
commit fee509df2e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 56 additions and 43 deletions

View File

@ -360,13 +360,16 @@ public class SqlExpressionBenchmark
try { try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries( SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory, plannerFactory,
QUERIES.get(Integer.parseInt(query)) QUERIES.get(Integer.parseInt(query))
); );
log.info("non-vectorized and vectorized results match");
} }
catch (Throwable ignored) { catch (Throwable ex) {
// the show must go on log.warn(ex, "non-vectorized and vectorized results do not match");
} }
final String sql = QUERIES.get(Integer.parseInt(query)); final String sql = QUERIES.get(Integer.parseInt(query));
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) { try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
@ -378,8 +381,8 @@ public class SqlExpressionBenchmark
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class)) .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
); );
} }
catch (JsonProcessingException ignored) { catch (JsonProcessingException ex) {
log.warn(ex, "explain failed");
} }
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) { try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
@ -393,8 +396,8 @@ public class SqlExpressionBenchmark
} }
log.info("Total result row count:" + rowCounter); log.info("Total result row count:" + rowCounter);
} }
catch (Throwable ignored) { catch (Throwable ex) {
log.warn(ex, "failed to count rows");
} }
} }

View File

@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.QueryRunnerFactoryConglomerate;
@ -91,6 +92,8 @@ public class SqlGroupByBenchmark
NestedDataModule.registerHandlersAndSerde(); NestedDataModule.registerHandlersAndSerde();
} }
private static final Logger log = new Logger(SqlGroupByBenchmark.class);
private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig()
{ {
@Override @Override
@ -349,12 +352,14 @@ public class SqlGroupByBenchmark
try { try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries( SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory, plannerFactory,
sqlQuery(groupingDimension) sqlQuery(groupingDimension)
); );
log.info("non-vectorized and vectorized results match");
} }
catch (Throwable ignored) { catch (Throwable ex) {
// the show must go on log.warn(ex, "non-vectorized and vectorized results do not match");
} }
} }

View File

@ -20,7 +20,6 @@
package org.apache.druid.benchmark.query; package org.apache.druid.benchmark.query;
import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
@ -301,7 +300,7 @@ public class SqlNestedDataBenchmark
private SqlEngine engine; private SqlEngine engine;
@Nullable @Nullable
private PlannerFactory plannerFactory; private PlannerFactory plannerFactory;
private Closer closer = Closer.create(); private final Closer closer = Closer.create();
@Setup(Level.Trial) @Setup(Level.Trial)
public void setup() public void setup()
@ -345,16 +344,19 @@ public class SqlNestedDataBenchmark
} }
final QueryableIndex index; final QueryableIndex index;
if ("auto".equals(schema)) { if ("auto".equals(schema)) {
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec() Iterable<DimensionSchema> columnSchemas = Iterables.concat(
schemaInfo.getDimensionsSpec()
.getDimensions() .getDimensions()
.stream() .stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null)) .map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList()); .collect(Collectors.toList()),
Collections.singletonList(new AutoTypeColumnSchema("nested", null))
);
index = segmentGenerator.generate( index = segmentGenerator.generate(
dataSegment, dataSegment,
schemaInfo, schemaInfo,
DimensionsSpec.builder().setDimensions(columnSchemas).build(), DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
TransformSpec.NONE, transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(), IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
Granularities.NONE, Granularities.NONE,
rowsPerSegment rowsPerSegment
@ -368,7 +370,7 @@ public class SqlNestedDataBenchmark
dataSegment, dataSegment,
schemaInfo, schemaInfo,
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(), DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
TransformSpec.NONE, transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(), IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
Granularities.NONE, Granularities.NONE,
rowsPerSegment rowsPerSegment
@ -405,12 +407,14 @@ public class SqlNestedDataBenchmark
try { try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries( SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory, plannerFactory,
QUERIES.get(Integer.parseInt(query)) QUERIES.get(Integer.parseInt(query))
); );
log.info("non-vectorized and vectorized results match");
} }
catch (Throwable ex) { catch (Throwable ex) {
log.warn(ex, "failed to sanity check"); log.warn(ex, "non-vectorized and vectorized results do not match");
} }
final String sql = QUERIES.get(Integer.parseInt(query)); final String sql = QUERIES.get(Integer.parseInt(query));
@ -424,11 +428,8 @@ public class SqlNestedDataBenchmark
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class)) .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
); );
} }
catch (JsonMappingException e) { catch (JsonProcessingException ex) {
throw new RuntimeException(e); log.warn(ex, "explain failed");
}
catch (JsonProcessingException e) {
throw new RuntimeException(e);
} }
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) { try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {

View File

@ -154,7 +154,7 @@ public class NestedDataColumnIndexerV4 implements DimensionIndexer<StructuredDat
@Override @Override
public int getCardinality() public int getCardinality()
{ {
return globalDictionary.getCardinality(); return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
} }
@Override @Override

View File

@ -69,6 +69,7 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
public void testKeySizeEstimation() public void testKeySizeEstimation()
{ {
AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer("test", null); AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer("test", null);
Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality());
int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2; int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
@ -134,6 +135,7 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
Assert.assertEquals(48, key.getEffectiveSizeBytes()); Assert.assertEquals(48, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 8, indexer.globalDictionary.getCardinality()); Assert.assertEquals(baseCardinality + 8, indexer.globalDictionary.getCardinality());
} }
Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality());
} }
@Test @Test

View File

@ -68,71 +68,73 @@ public class NestedDataColumnIndexerV4Test extends InitializedNullHandlingTest
public void testKeySizeEstimation() public void testKeySizeEstimation()
{ {
NestedDataColumnIndexerV4 indexer = new NestedDataColumnIndexerV4(); NestedDataColumnIndexerV4 indexer = new NestedDataColumnIndexerV4();
Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality());
int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2; int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
Assert.assertEquals(baseCardinality, indexer.getCardinality()); Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
EncodedKeyComponent<StructuredData> key; EncodedKeyComponent<StructuredData> key;
// new raw value, new field, new dictionary entry // new raw value, new field, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false);
Assert.assertEquals(228, key.getEffectiveSizeBytes()); Assert.assertEquals(228, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
// adding same value only adds estimated size of value itself // adding same value only adds estimated size of value itself
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false);
Assert.assertEquals(112, key.getEffectiveSizeBytes()); Assert.assertEquals(112, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
// new raw value, new field, new dictionary entry // new raw value, new field, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
Assert.assertEquals(94, key.getEffectiveSizeBytes()); Assert.assertEquals(94, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 2, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality());
// adding same value only adds estimated size of value itself // adding same value only adds estimated size of value itself
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 2, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality());
// new raw value, new dictionary entry // new raw value, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false);
Assert.assertEquals(48, key.getEffectiveSizeBytes()); Assert.assertEquals(48, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 3, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 3, indexer.globalDictionary.getCardinality());
// new raw value, new fields // new raw value, new fields
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false);
Assert.assertEquals(276, key.getEffectiveSizeBytes()); Assert.assertEquals(276, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
// new raw value, re-use fields and dictionary // new raw value, re-use fields and dictionary
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false);
Assert.assertEquals(56, key.getEffectiveSizeBytes()); Assert.assertEquals(56, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
// new raw value, new fields // new raw value, new fields
key = indexer.processRowValsToUnsortedEncodedKeyComponent( key = indexer.processRowValsToUnsortedEncodedKeyComponent(
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
false false
); );
Assert.assertEquals(286, key.getEffectiveSizeBytes()); Assert.assertEquals(286, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
// new raw value // new raw value
key = indexer.processRowValsToUnsortedEncodedKeyComponent( key = indexer.processRowValsToUnsortedEncodedKeyComponent(
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
false false
); );
Assert.assertEquals(118, key.getEffectiveSizeBytes()); Assert.assertEquals(118, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false); key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false);
if (NullHandling.replaceWithDefault()) { if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(0, key.getEffectiveSizeBytes()); Assert.assertEquals(0, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
} else { } else {
Assert.assertEquals(104, key.getEffectiveSizeBytes()); Assert.assertEquals(104, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
} }
key = indexer.processRowValsToUnsortedEncodedKeyComponent(0, false); key = indexer.processRowValsToUnsortedEncodedKeyComponent(0, false);
if (NullHandling.replaceWithDefault()) { if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(16, key.getEffectiveSizeBytes()); Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
} else { } else {
Assert.assertEquals(48, key.getEffectiveSizeBytes()); Assert.assertEquals(48, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 7, indexer.getCardinality()); Assert.assertEquals(baseCardinality + 7, indexer.globalDictionary.getCardinality());
} }
Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality());
} }
@Test @Test

View File

@ -178,10 +178,10 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
@Test @Test
public void testQuery() public void testQuery()
{ {
sanityTestVectorizedSqlQueries(PLANNER_FACTORY, query); sanityTestVectorizedSqlQueries(ENGINE, PLANNER_FACTORY, query);
} }
public static void sanityTestVectorizedSqlQueries(PlannerFactory plannerFactory, String query) public static void sanityTestVectorizedSqlQueries(SqlEngine engine, PlannerFactory plannerFactory, String query)
{ {
final Map<String, Object> vector = ImmutableMap.of( final Map<String, Object> vector = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, "force", QueryContexts.VECTORIZE_KEY, "force",
@ -193,8 +193,8 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
); );
try ( try (
final DruidPlanner vectorPlanner = plannerFactory.createPlannerForTesting(ENGINE, query, vector); final DruidPlanner vectorPlanner = plannerFactory.createPlannerForTesting(engine, query, vector);
final DruidPlanner nonVectorPlanner = plannerFactory.createPlannerForTesting(ENGINE, query, nonvector) final DruidPlanner nonVectorPlanner = plannerFactory.createPlannerForTesting(engine, query, nonvector)
) { ) {
final PlannerResult vectorPlan = vectorPlanner.plan(); final PlannerResult vectorPlan = vectorPlanner.plan();
final PlannerResult nonVectorPlan = nonVectorPlanner.plan(); final PlannerResult nonVectorPlan = nonVectorPlanner.plan();