mirror of https://github.com/apache/druid.git
benchmark schema with numeric dimensions and null column values (#9036)
* benchmark schema with null column values * oops * adjustments * rename again, different null percentage so rows more variety * more schema
This commit is contained in:
parent
3c31493772
commit
c2e9ab8100
|
@ -19,6 +19,12 @@
|
|||
|
||||
package org.apache.druid.benchmark.datagen;
|
||||
|
||||
import org.apache.druid.data.input.impl.DimensionSchema;
|
||||
import org.apache.druid.data.input.impl.DoubleDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.FloatDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.LongDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.StringDimensionSchema;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -144,6 +150,22 @@ public class BenchmarkColumnSchema
|
|||
return new BenchmarkColumnValueGenerator(this, seed);
|
||||
}
|
||||
|
||||
public DimensionSchema getDimensionSchema()
|
||||
{
|
||||
switch (type) {
|
||||
case LONG:
|
||||
return new LongDimensionSchema(name);
|
||||
case FLOAT:
|
||||
return new FloatDimensionSchema(name);
|
||||
case DOUBLE:
|
||||
return new DoubleDimensionSchema(name);
|
||||
case STRING:
|
||||
return new StringDimensionSchema(name);
|
||||
default:
|
||||
throw new IAE("unable to make dimension schema for %s", type);
|
||||
}
|
||||
}
|
||||
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
|
|
|
@ -116,6 +116,13 @@ public class BenchmarkColumnValueGenerator
|
|||
ret = Long.parseLong(input.toString());
|
||||
}
|
||||
break;
|
||||
case DOUBLE:
|
||||
if (input instanceof Number) {
|
||||
ret = ((Number) input).doubleValue();
|
||||
} else {
|
||||
ret = Double.parseDouble(input.toString());
|
||||
}
|
||||
break;
|
||||
case FLOAT:
|
||||
if (input instanceof Number) {
|
||||
ret = ((Number) input).floatValue();
|
||||
|
|
|
@ -19,10 +19,13 @@
|
|||
|
||||
package org.apache.druid.benchmark.datagen;
|
||||
|
||||
import org.apache.druid.data.input.impl.DimensionSchema;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class BenchmarkSchemaInfo
|
||||
{
|
||||
|
@ -49,6 +52,16 @@ public class BenchmarkSchemaInfo
|
|||
return columnSchemas;
|
||||
}
|
||||
|
||||
public DimensionsSpec getDimensionsSpec()
|
||||
{
|
||||
List<DimensionSchema> specs = getColumnSchemas().stream()
|
||||
.filter(x -> !x.isMetric())
|
||||
.map(BenchmarkColumnSchema::getDimensionSchema)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
return new DimensionsSpec(specs);
|
||||
}
|
||||
|
||||
public List<AggregatorFactory> getAggs()
|
||||
{
|
||||
return aggs;
|
||||
|
|
|
@ -225,4 +225,73 @@ public class BenchmarkSchemas
|
|||
);
|
||||
SCHEMA_MAP.put("rollo", rolloSchema);
|
||||
}
|
||||
|
||||
static { // simple schema with null valued rows, no aggs on numeric columns
|
||||
List<BenchmarkColumnSchema> nullsSchemaColumns = ImmutableList.of(
|
||||
// string dims with nulls
|
||||
BenchmarkColumnSchema.makeZipf("stringZipf", ValueType.STRING, false, 1, 0.8, 1, 101, 1.5),
|
||||
BenchmarkColumnSchema.makeDiscreteUniform("stringUniform", ValueType.STRING, false, 1, 0.3, 1, 100000),
|
||||
BenchmarkColumnSchema.makeSequential("stringSequential", ValueType.STRING, false, 1, 0.5, 0, 1000),
|
||||
|
||||
// numeric dims with nulls
|
||||
BenchmarkColumnSchema.makeSequential("longSequential", ValueType.LONG, false, 1, 0.45, 0, 10000),
|
||||
BenchmarkColumnSchema.makeDiscreteUniform("longUniform", ValueType.LONG, false, 1, 0.25, 0, 500),
|
||||
BenchmarkColumnSchema.makeZipf("doubleZipf", ValueType.DOUBLE, false, 1, 0.1, 0, 1000, 2.0),
|
||||
BenchmarkColumnSchema.makeZipf("floatZipf", ValueType.FLOAT, false, 1, 0.1, 0, 1000, 2.0)
|
||||
);
|
||||
|
||||
List<AggregatorFactory> simpleNullsSchemaIngestAggs = new ArrayList<>();
|
||||
simpleNullsSchemaIngestAggs.add(new CountAggregatorFactory("rows"));
|
||||
|
||||
Interval nullsSchemaDataInterval = Intervals.of("2000-01-01/P1D");
|
||||
|
||||
BenchmarkSchemaInfo nullsSchema = new BenchmarkSchemaInfo(
|
||||
nullsSchemaColumns,
|
||||
simpleNullsSchemaIngestAggs,
|
||||
nullsSchemaDataInterval,
|
||||
false
|
||||
);
|
||||
|
||||
SCHEMA_MAP.put("nulls", nullsSchema);
|
||||
}
|
||||
|
||||
static { // simple schema with null valued rows, no aggs on numeric columns
|
||||
List<BenchmarkColumnSchema> nullsSchemaColumns = ImmutableList.of(
|
||||
// string dims
|
||||
BenchmarkColumnSchema.makeZipf("stringZipf", ValueType.STRING, false, 1, null, 1, 101, 1.5),
|
||||
BenchmarkColumnSchema.makeDiscreteUniform("stringUniform", ValueType.STRING, false, 1, null, 1, 100000),
|
||||
BenchmarkColumnSchema.makeSequential("stringSequential", ValueType.STRING, false, 1, null, 0, 1000),
|
||||
|
||||
// numeric dims
|
||||
BenchmarkColumnSchema.makeSequential("longSequential", ValueType.LONG, false, 1, null, 0, 10000),
|
||||
BenchmarkColumnSchema.makeDiscreteUniform("longUniform", ValueType.LONG, false, 1, null, 0, 500),
|
||||
BenchmarkColumnSchema.makeZipf("doubleZipf", ValueType.DOUBLE, false, 1, null, 0, 1000, 2.0),
|
||||
BenchmarkColumnSchema.makeZipf("floatZipf", ValueType.FLOAT, false, 1, null, 0, 1000, 2.0),
|
||||
|
||||
// string dims with nulls
|
||||
BenchmarkColumnSchema.makeZipf("stringZipfWithNulls", ValueType.STRING, false, 1, 0.8, 1, 101, 1.5),
|
||||
BenchmarkColumnSchema.makeDiscreteUniform("stringUniformWithNulls", ValueType.STRING, false, 1, 0.3, 1, 100000),
|
||||
BenchmarkColumnSchema.makeSequential("stringSequentialWithNulls", ValueType.STRING, false, 1, 0.5, 0, 1000),
|
||||
|
||||
// numeric dims with nulls
|
||||
BenchmarkColumnSchema.makeSequential("longSequentialWithNulls", ValueType.LONG, false, 1, 0.45, 0, 10000),
|
||||
BenchmarkColumnSchema.makeDiscreteUniform("longUniformWithNulls", ValueType.LONG, false, 1, 0.25, 0, 500),
|
||||
BenchmarkColumnSchema.makeZipf("doubleZipfWithNulls", ValueType.DOUBLE, false, 1, 0.1, 0, 1000, 2.0),
|
||||
BenchmarkColumnSchema.makeZipf("floatZipfWithNulls", ValueType.FLOAT, false, 1, 0.1, 0, 1000, 2.0)
|
||||
);
|
||||
|
||||
List<AggregatorFactory> simpleNullsSchemaIngestAggs = new ArrayList<>();
|
||||
simpleNullsSchemaIngestAggs.add(new CountAggregatorFactory("rows"));
|
||||
|
||||
Interval nullsSchemaDataInterval = Intervals.of("2000-01-01/P1D");
|
||||
|
||||
BenchmarkSchemaInfo nullsSchema = new BenchmarkSchemaInfo(
|
||||
nullsSchemaColumns,
|
||||
simpleNullsSchemaIngestAggs,
|
||||
nullsSchemaDataInterval,
|
||||
false
|
||||
);
|
||||
|
||||
SCHEMA_MAP.put("nulls-and-non-nulls", nullsSchema);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,16 +19,9 @@
|
|||
|
||||
package org.apache.druid.benchmark.datagen;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.hash.Hashing;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.data.input.InputRow;
|
||||
import org.apache.druid.data.input.impl.DimensionSchema;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.DoubleDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.FloatDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.LongDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.StringDimensionSchema;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
|
@ -71,7 +64,7 @@ public class SegmentGenerator implements Closeable
|
|||
static {
|
||||
NullHandling.initializeForTests();
|
||||
}
|
||||
|
||||
|
||||
private final File cacheDir;
|
||||
private final boolean cleanupCacheDir;
|
||||
|
||||
|
@ -145,30 +138,8 @@ public class SegmentGenerator implements Closeable
|
|||
numRows
|
||||
);
|
||||
|
||||
final List<DimensionSchema> dimensions = new ArrayList<>();
|
||||
for (BenchmarkColumnSchema columnSchema : schemaInfo.getColumnSchemas()) {
|
||||
if (schemaInfo.getAggs().stream().noneMatch(agg -> agg.getName().equals(columnSchema.getName()))) {
|
||||
switch (columnSchema.getType()) {
|
||||
case STRING:
|
||||
dimensions.add(new StringDimensionSchema(columnSchema.getName()));
|
||||
break;
|
||||
case LONG:
|
||||
dimensions.add(new LongDimensionSchema(columnSchema.getName()));
|
||||
break;
|
||||
case DOUBLE:
|
||||
dimensions.add(new DoubleDimensionSchema(columnSchema.getName()));
|
||||
break;
|
||||
case FLOAT:
|
||||
dimensions.add(new FloatDimensionSchema(columnSchema.getName()));
|
||||
break;
|
||||
default:
|
||||
throw new ISE("Unhandleable type[%s]", columnSchema.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
|
||||
.withDimensionsSpec(new DimensionsSpec(dimensions, ImmutableList.of(), ImmutableList.of()))
|
||||
.withDimensionsSpec(schemaInfo.getDimensionsSpec())
|
||||
.withMetrics(schemaInfo.getAggsArray())
|
||||
.withRollup(schemaInfo.isWithRollup())
|
||||
.withQueryGranularity(granularity)
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.apache.druid.benchmark.query;
|
||||
|
||||
import com.fasterxml.jackson.databind.InjectableValues;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.dataformat.smile.SmileFactory;
|
||||
import com.google.common.base.Supplier;
|
||||
|
@ -41,6 +42,7 @@ import org.apache.druid.java.util.common.granularity.Granularities;
|
|||
import org.apache.druid.java.util.common.granularity.Granularity;
|
||||
import org.apache.druid.java.util.common.guava.Sequence;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.math.expr.ExprMacroTable;
|
||||
import org.apache.druid.offheap.OffheapBufferGenerator;
|
||||
import org.apache.druid.query.DruidProcessingConfig;
|
||||
import org.apache.druid.query.FinalizeResultsQueryRunner;
|
||||
|
@ -58,6 +60,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
|
|||
import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde;
|
||||
import org.apache.druid.query.context.ResponseContext;
|
||||
import org.apache.druid.query.dimension.DefaultDimensionSpec;
|
||||
import org.apache.druid.query.expression.TestExprMacroTable;
|
||||
import org.apache.druid.query.filter.BoundDimFilter;
|
||||
import org.apache.druid.query.groupby.GroupByQuery;
|
||||
import org.apache.druid.query.groupby.GroupByQueryConfig;
|
||||
|
@ -166,7 +169,11 @@ public class GroupByBenchmark
|
|||
static {
|
||||
JSON_MAPPER = new DefaultObjectMapper();
|
||||
INDEX_IO = new IndexIO(
|
||||
JSON_MAPPER,
|
||||
JSON_MAPPER.setInjectableValues(
|
||||
new InjectableValues.Std()
|
||||
.addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE)
|
||||
.addValue(ObjectMapper.class.getName(), JSON_MAPPER)
|
||||
),
|
||||
new ColumnConfig()
|
||||
{
|
||||
@Override
|
||||
|
@ -391,6 +398,34 @@ public class GroupByBenchmark
|
|||
simpleFloatQueries.put("A", queryA);
|
||||
}
|
||||
SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
|
||||
|
||||
// simple one column schema, for testing performance difference between querying on numeric values as Strings and
|
||||
// directly as longs
|
||||
Map<String, GroupByQuery> nullQueries = new LinkedHashMap<>();
|
||||
BenchmarkSchemaInfo nullSchema = BenchmarkSchemas.SCHEMA_MAP.get("nulls");
|
||||
|
||||
{ // simple-null
|
||||
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(nullSchema.getDataInterval()));
|
||||
List<AggregatorFactory> queryAggs = new ArrayList<>();
|
||||
queryAggs.add(new DoubleSumAggregatorFactory(
|
||||
"doubleSum",
|
||||
"doubleZipf"
|
||||
));
|
||||
GroupByQuery queryA = GroupByQuery
|
||||
.builder()
|
||||
.setDataSource("blah")
|
||||
.setQuerySegmentSpec(intervalSpec)
|
||||
.setDimensions(new DefaultDimensionSpec("stringZipf", "stringZipf", ValueType.STRING))
|
||||
.setAggregatorSpecs(
|
||||
queryAggs
|
||||
)
|
||||
.setGranularity(Granularity.fromString(queryGranularity))
|
||||
.setContext(ImmutableMap.of("vectorize", vectorize))
|
||||
.build();
|
||||
|
||||
nullQueries.put("A", queryA);
|
||||
}
|
||||
SCHEMA_QUERY_MAP.put("nulls", nullQueries);
|
||||
}
|
||||
|
||||
@Setup(Level.Trial)
|
||||
|
@ -549,6 +584,7 @@ public class GroupByBenchmark
|
|||
return new IncrementalIndex.Builder()
|
||||
.setIndexSchema(
|
||||
new IncrementalIndexSchema.Builder()
|
||||
.withDimensionsSpec(schemaInfo.getDimensionsSpec())
|
||||
.withMetrics(schemaInfo.getAggsArray())
|
||||
.withRollup(withRollup)
|
||||
.build()
|
||||
|
|
Loading…
Reference in New Issue