mirror of https://github.com/apache/druid.git
fix NestedDataColumnIndexerV4 to not report cardinality (#16507)
* fix NestedDataColumnIndexerV4 to not report cardinality changes: * fix issue similar to #16489 but for NestedDataColumnIndexerV4, which can report STRING type if it only processes a single type of values. this should be less common than the auto indexer problem * fix some issues with sql benchmarks
This commit is contained in:
parent
3f5f5921e0
commit
fee509df2e
|
@ -360,13 +360,16 @@ public class SqlExpressionBenchmark
|
|||
|
||||
try {
|
||||
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
|
||||
engine,
|
||||
plannerFactory,
|
||||
QUERIES.get(Integer.parseInt(query))
|
||||
);
|
||||
log.info("non-vectorized and vectorized results match");
|
||||
}
|
||||
catch (Throwable ignored) {
|
||||
// the show must go on
|
||||
catch (Throwable ex) {
|
||||
log.warn(ex, "non-vectorized and vectorized results do not match");
|
||||
}
|
||||
|
||||
final String sql = QUERIES.get(Integer.parseInt(query));
|
||||
|
||||
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
|
||||
|
@ -378,8 +381,8 @@ public class SqlExpressionBenchmark
|
|||
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
|
||||
);
|
||||
}
|
||||
catch (JsonProcessingException ignored) {
|
||||
|
||||
catch (JsonProcessingException ex) {
|
||||
log.warn(ex, "explain failed");
|
||||
}
|
||||
|
||||
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
|
||||
|
@ -393,8 +396,8 @@ public class SqlExpressionBenchmark
|
|||
}
|
||||
log.info("Total result row count:" + rowCounter);
|
||||
}
|
||||
catch (Throwable ignored) {
|
||||
|
||||
catch (Throwable ex) {
|
||||
log.warn(ex, "failed to count rows");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.StringUtils;
|
|||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
import org.apache.druid.java.util.common.guava.Sequence;
|
||||
import org.apache.druid.java.util.common.io.Closer;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.math.expr.ExpressionProcessing;
|
||||
import org.apache.druid.query.DruidProcessingConfig;
|
||||
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
|
||||
|
@ -91,6 +92,8 @@ public class SqlGroupByBenchmark
|
|||
NestedDataModule.registerHandlersAndSerde();
|
||||
}
|
||||
|
||||
private static final Logger log = new Logger(SqlGroupByBenchmark.class);
|
||||
|
||||
private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig()
|
||||
{
|
||||
@Override
|
||||
|
@ -349,12 +352,14 @@ public class SqlGroupByBenchmark
|
|||
|
||||
try {
|
||||
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
|
||||
engine,
|
||||
plannerFactory,
|
||||
sqlQuery(groupingDimension)
|
||||
);
|
||||
log.info("non-vectorized and vectorized results match");
|
||||
}
|
||||
catch (Throwable ignored) {
|
||||
// the show must go on
|
||||
catch (Throwable ex) {
|
||||
log.warn(ex, "non-vectorized and vectorized results do not match");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
package org.apache.druid.benchmark.query;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.JsonMappingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
@ -301,7 +300,7 @@ public class SqlNestedDataBenchmark
|
|||
private SqlEngine engine;
|
||||
@Nullable
|
||||
private PlannerFactory plannerFactory;
|
||||
private Closer closer = Closer.create();
|
||||
private final Closer closer = Closer.create();
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void setup()
|
||||
|
@ -345,16 +344,19 @@ public class SqlNestedDataBenchmark
|
|||
}
|
||||
final QueryableIndex index;
|
||||
if ("auto".equals(schema)) {
|
||||
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
|
||||
.getDimensions()
|
||||
.stream()
|
||||
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
|
||||
.collect(Collectors.toList());
|
||||
Iterable<DimensionSchema> columnSchemas = Iterables.concat(
|
||||
schemaInfo.getDimensionsSpec()
|
||||
.getDimensions()
|
||||
.stream()
|
||||
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
|
||||
.collect(Collectors.toList()),
|
||||
Collections.singletonList(new AutoTypeColumnSchema("nested", null))
|
||||
);
|
||||
index = segmentGenerator.generate(
|
||||
dataSegment,
|
||||
schemaInfo,
|
||||
DimensionsSpec.builder().setDimensions(columnSchemas).build(),
|
||||
TransformSpec.NONE,
|
||||
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
|
||||
transformSpec,
|
||||
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
|
||||
Granularities.NONE,
|
||||
rowsPerSegment
|
||||
|
@ -368,7 +370,7 @@ public class SqlNestedDataBenchmark
|
|||
dataSegment,
|
||||
schemaInfo,
|
||||
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
|
||||
TransformSpec.NONE,
|
||||
transformSpec,
|
||||
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
|
||||
Granularities.NONE,
|
||||
rowsPerSegment
|
||||
|
@ -405,12 +407,14 @@ public class SqlNestedDataBenchmark
|
|||
|
||||
try {
|
||||
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
|
||||
engine,
|
||||
plannerFactory,
|
||||
QUERIES.get(Integer.parseInt(query))
|
||||
);
|
||||
log.info("non-vectorized and vectorized results match");
|
||||
}
|
||||
catch (Throwable ex) {
|
||||
log.warn(ex, "failed to sanity check");
|
||||
log.warn(ex, "non-vectorized and vectorized results do not match");
|
||||
}
|
||||
|
||||
final String sql = QUERIES.get(Integer.parseInt(query));
|
||||
|
@ -424,11 +428,8 @@ public class SqlNestedDataBenchmark
|
|||
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
|
||||
);
|
||||
}
|
||||
catch (JsonMappingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
catch (JsonProcessingException e) {
|
||||
throw new RuntimeException(e);
|
||||
catch (JsonProcessingException ex) {
|
||||
log.warn(ex, "explain failed");
|
||||
}
|
||||
|
||||
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
|
||||
|
|
|
@ -154,7 +154,7 @@ public class NestedDataColumnIndexerV4 implements DimensionIndexer<StructuredDat
|
|||
@Override
|
||||
public int getCardinality()
|
||||
{
|
||||
return globalDictionary.getCardinality();
|
||||
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -69,6 +69,7 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
|
|||
public void testKeySizeEstimation()
|
||||
{
|
||||
AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer("test", null);
|
||||
Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality());
|
||||
int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
|
||||
Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
|
||||
|
||||
|
@ -134,6 +135,7 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest
|
|||
Assert.assertEquals(48, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 8, indexer.globalDictionary.getCardinality());
|
||||
}
|
||||
Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -68,71 +68,73 @@ public class NestedDataColumnIndexerV4Test extends InitializedNullHandlingTest
|
|||
public void testKeySizeEstimation()
|
||||
{
|
||||
NestedDataColumnIndexerV4 indexer = new NestedDataColumnIndexerV4();
|
||||
Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality());
|
||||
int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
|
||||
Assert.assertEquals(baseCardinality, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());
|
||||
|
||||
EncodedKeyComponent<StructuredData> key;
|
||||
// new raw value, new field, new dictionary entry
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false);
|
||||
Assert.assertEquals(228, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
|
||||
// adding same value only adds estimated size of value itself
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false);
|
||||
Assert.assertEquals(112, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
|
||||
// new raw value, new field, new dictionary entry
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
|
||||
Assert.assertEquals(94, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 2, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality());
|
||||
// adding same value only adds estimated size of value itself
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
|
||||
Assert.assertEquals(16, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 2, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality());
|
||||
// new raw value, new dictionary entry
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false);
|
||||
Assert.assertEquals(48, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 3, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 3, indexer.globalDictionary.getCardinality());
|
||||
|
||||
// new raw value, new fields
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false);
|
||||
Assert.assertEquals(276, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
|
||||
// new raw value, re-use fields and dictionary
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false);
|
||||
Assert.assertEquals(56, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
|
||||
// new raw value, new fields
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(
|
||||
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
|
||||
false
|
||||
);
|
||||
Assert.assertEquals(286, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
|
||||
// new raw value
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(
|
||||
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
|
||||
false
|
||||
);
|
||||
Assert.assertEquals(118, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
|
||||
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false);
|
||||
if (NullHandling.replaceWithDefault()) {
|
||||
Assert.assertEquals(0, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
|
||||
} else {
|
||||
Assert.assertEquals(104, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
|
||||
}
|
||||
|
||||
key = indexer.processRowValsToUnsortedEncodedKeyComponent(0, false);
|
||||
if (NullHandling.replaceWithDefault()) {
|
||||
Assert.assertEquals(16, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
|
||||
} else {
|
||||
Assert.assertEquals(48, key.getEffectiveSizeBytes());
|
||||
Assert.assertEquals(baseCardinality + 7, indexer.getCardinality());
|
||||
Assert.assertEquals(baseCardinality + 7, indexer.globalDictionary.getCardinality());
|
||||
}
|
||||
Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -178,10 +178,10 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
|
|||
@Test
|
||||
public void testQuery()
|
||||
{
|
||||
sanityTestVectorizedSqlQueries(PLANNER_FACTORY, query);
|
||||
sanityTestVectorizedSqlQueries(ENGINE, PLANNER_FACTORY, query);
|
||||
}
|
||||
|
||||
public static void sanityTestVectorizedSqlQueries(PlannerFactory plannerFactory, String query)
|
||||
public static void sanityTestVectorizedSqlQueries(SqlEngine engine, PlannerFactory plannerFactory, String query)
|
||||
{
|
||||
final Map<String, Object> vector = ImmutableMap.of(
|
||||
QueryContexts.VECTORIZE_KEY, "force",
|
||||
|
@ -193,8 +193,8 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
|
|||
);
|
||||
|
||||
try (
|
||||
final DruidPlanner vectorPlanner = plannerFactory.createPlannerForTesting(ENGINE, query, vector);
|
||||
final DruidPlanner nonVectorPlanner = plannerFactory.createPlannerForTesting(ENGINE, query, nonvector)
|
||||
final DruidPlanner vectorPlanner = plannerFactory.createPlannerForTesting(engine, query, vector);
|
||||
final DruidPlanner nonVectorPlanner = plannerFactory.createPlannerForTesting(engine, query, nonvector)
|
||||
) {
|
||||
final PlannerResult vectorPlan = vectorPlanner.plan();
|
||||
final PlannerResult nonVectorPlan = nonVectorPlanner.plan();
|
||||
|
|
Loading…
Reference in New Issue