diff --git a/.travis.yml b/.travis.yml index 0b8c577730d..a8aca05e9f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,7 +60,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -pl processing + - MAVEN_OPTS='-Xmx800m' mvn test -B -pl processing - sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" - free -m @@ -71,7 +71,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -Ddruid.generic.useDefaultValueForNull=false -pl processing + - MAVEN_OPTS='-Xmx800m' mvn test -B -Ddruid.generic.useDefaultValueForNull=false -pl processing - sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" - free -m @@ -82,7 +82,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -pl server + - MAVEN_OPTS='-Xmx800m' mvn test -B -pl server # server module test with SQL Compatibility enabled - env: @@ -91,7 +91,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -pl server -Ddruid.generic.useDefaultValueForNull=false + - MAVEN_OPTS='-Xmx800m' mvn test -B -pl server -Ddruid.generic.useDefaultValueForNull=false # other modules test @@ -101,7 +101,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -pl '!processing,!server' + - MAVEN_OPTS='-Xmx800m' mvn test -B -pl '!processing,!server' - sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" - free -m @@ -112,7 +112,7 @@ matrix: before_script: unset _JAVA_OPTIONS script: # Set MAVEN_OPTS for Surefire launcher - - MAVEN_OPTS='-Xmx512m' mvn test -B -Ddruid.generic.useDefaultValueForNull=false -pl '!processing,!server' + - MAVEN_OPTS='-Xmx800m' mvn test -B -Ddruid.generic.useDefaultValueForNull=false -pl '!processing,!server' - sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" - free -m diff --git a/benchmarks/pom.xml b/benchmarks/pom.xml index ffc89334d32..37f657b0a96 100644 --- a/benchmarks/pom.xml +++ b/benchmarks/pom.xml @@ -83,12 +83,24 @@ ${project.parent.version} test-jar + + org.apache.druid + druid-core + ${project.parent.version} + test-jar + org.apache.druid druid-processing ${project.parent.version} test-jar + + org.apache.druid + druid-server + ${project.parent.version} + test-jar + org.apache.druid druid-sql diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionAggregationBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionAggregationBenchmark.java index 69b8f80bccf..bde4da9f131 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionAggregationBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionAggregationBenchmark.java @@ -27,6 +27,7 @@ import org.apache.druid.benchmark.datagen.SegmentGenerator; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.js.JavaScriptConfig; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; @@ -73,15 +74,17 @@ public class ExpressionAggregationBenchmark @Param({"1000000"}) private int rowsPerSegment; - private SegmentGenerator segmentGenerator; private QueryableIndex index; private JavaScriptAggregatorFactory javaScriptAggregatorFactory; private DoubleSumAggregatorFactory expressionAggregatorFactory; private ByteBuffer aggregationBuffer = ByteBuffer.allocate(Double.BYTES); + private Closer closer; @Setup(Level.Trial) public void setup() { + this.closer = Closer.create(); + final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo( ImmutableList.of( BenchmarkColumnSchema.makeNormal("x", ValueType.FLOAT, false, 1, 0d, 0d, 10000d, false), @@ -99,8 +102,10 @@ public class ExpressionAggregationBenchmark .shardSpec(new LinearShardSpec(0)) .build(); - this.segmentGenerator = new SegmentGenerator(); - this.index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + this.index = closer.register( + segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment) + ); this.javaScriptAggregatorFactory = new JavaScriptAggregatorFactory( "name", ImmutableList.of("x", "y"), @@ -120,15 +125,7 @@ public class ExpressionAggregationBenchmark @TearDown(Level.Trial) public void tearDown() throws Exception { - if (index != null) { - index.close(); - index = null; - } - - if (segmentGenerator != null) { - segmentGenerator.close(); - segmentGenerator = null; - } + closer.close(); } @Benchmark diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java index 9953c0e3a40..2f92f0b85ba 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java @@ -26,6 +26,7 @@ import org.apache.druid.benchmark.datagen.SegmentGenerator; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.ExtractionDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; @@ -72,12 +73,14 @@ public class ExpressionSelectorBenchmark @Param({"1000000"}) private int rowsPerSegment; - private SegmentGenerator segmentGenerator; private QueryableIndex index; + private Closer closer; @Setup(Level.Trial) public void setup() { + this.closer = Closer.create(); + final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo( ImmutableList.of( BenchmarkColumnSchema.makeZipf( @@ -113,22 +116,16 @@ public class ExpressionSelectorBenchmark .shardSpec(new LinearShardSpec(0)) .build(); - this.segmentGenerator = new SegmentGenerator(); - this.index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, rowsPerSegment); + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + this.index = closer.register( + segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, rowsPerSegment) + ); } @TearDown(Level.Trial) public void tearDown() throws Exception { - if (index != null) { - index.close(); - index = null; - } - - if (segmentGenerator != null) { - segmentGenerator.close(); - segmentGenerator = null; - } + closer.close(); } @Benchmark diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java index 448cc006114..d7a98e81c31 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java @@ -20,6 +20,7 @@ package org.apache.druid.benchmark; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; import com.google.common.io.Files; import org.apache.commons.io.FileUtils; import org.apache.druid.benchmark.datagen.BenchmarkDataGenerator; @@ -31,7 +32,6 @@ import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.js.JavaScriptConfig; import org.apache.druid.query.Druids; import org.apache.druid.query.FinalizeResultsQueryRunner; import org.apache.druid.query.Query; @@ -47,7 +47,6 @@ import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde; import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.InDimFilter; -import org.apache.druid.query.filter.JavaScriptDimFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.RegexDimFilter; import org.apache.druid.query.filter.SearchQueryDimFilter; @@ -106,6 +105,9 @@ public class FilteredAggregatorBenchmark @Param({"basic"}) private String schema; + @Param({"false", "true"}) + private String vectorize; + private static final Logger log = new Logger(FilteredAggregatorBenchmark.class); private static final int RNG_SEED = 9999; private static final IndexMergerV9 INDEX_MERGER_V9; @@ -160,12 +162,6 @@ public class FilteredAggregatorBenchmark filter = new OrDimFilter( Arrays.asList( new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), - new JavaScriptDimFilter( - "dimSequential", - "function(x) { return false }", - null, - JavaScriptConfig.getEnabledInstance() - ), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null) @@ -233,7 +229,7 @@ public class FilteredAggregatorBenchmark .buildOnheap(); } - private static List runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query) + private static List runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query, String vectorize) { QueryToolChest toolChest = factory.getToolchest(); QueryRunner theRunner = new FinalizeResultsQueryRunner<>( @@ -241,7 +237,10 @@ public class FilteredAggregatorBenchmark toolChest ); - Sequence queryResult = theRunner.run(QueryPlus.wrap(query), new HashMap<>()); + final QueryPlus queryToRun = QueryPlus.wrap( + query.withOverriddenContext(ImmutableMap.of("vectorize", vectorize)) + ); + Sequence queryResult = theRunner.run(queryToRun, new HashMap<>()); return queryResult.toList(); } @@ -268,7 +267,12 @@ public class FilteredAggregatorBenchmark new IncrementalIndexSegment(incIndex, SegmentId.dummy("incIndex")) ); - List> results = FilteredAggregatorBenchmark.runQuery(factory, runner, query); + List> results = FilteredAggregatorBenchmark.runQuery( + factory, + runner, + query, + vectorize + ); for (Result result : results) { blackhole.consume(result); } @@ -285,7 +289,12 @@ public class FilteredAggregatorBenchmark new QueryableIndexSegment(qIndex, SegmentId.dummy("qIndex")) ); - List> results = FilteredAggregatorBenchmark.runQuery(factory, runner, query); + List> results = FilteredAggregatorBenchmark.runQuery( + factory, + runner, + query, + vectorize + ); for (Result result : results) { blackhole.consume(result); } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkColumnSchema.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkColumnSchema.java index 0cf6d7c473f..549140e62bd 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkColumnSchema.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkColumnSchema.java @@ -424,4 +424,26 @@ public class BenchmarkColumnSchema schema.enumeratedProbabilities = enumeratedProbabilities; return schema; } + + @Override + public String toString() + { + return "BenchmarkColumnSchema{" + + "distributionType=" + distributionType + + ", name='" + name + '\'' + + ", type=" + type + + ", isMetric=" + isMetric + + ", rowSize=" + rowSize + + ", nullProbability=" + nullProbability + + ", enumeratedValues=" + enumeratedValues + + ", enumeratedProbabilities=" + enumeratedProbabilities + + ", startInt=" + startInt + + ", endInt=" + endInt + + ", startDouble=" + startDouble + + ", endDouble=" + endDouble + + ", zipfExponent=" + zipfExponent + + ", mean=" + mean + + ", standardDeviation=" + standardDeviation + + '}'; + } } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemaInfo.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemaInfo.java index 1f43ce22d33..7a2720931e8 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemaInfo.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemaInfo.java @@ -68,4 +68,15 @@ public class BenchmarkSchemaInfo { return withRollup; } + + @Override + public String toString() + { + return "BenchmarkSchemaInfo{" + + "columnSchemas=" + columnSchemas + + ", aggs=" + aggs + + ", dataInterval=" + dataInterval + + ", withRollup=" + withRollup + + '}'; + } } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemas.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemas.java index 3e35a66bad4..69a9d901a1e 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemas.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/BenchmarkSchemas.java @@ -94,7 +94,7 @@ public class BenchmarkSchemas basicSchemaIngestAggsExpression.add(new DoubleMinAggregatorFactory("minFloatZipf", "metFloatZipf")); basicSchemaIngestAggsExpression.add(new HyperUniquesAggregatorFactory("hyper", "dimHyperUnique")); - Interval basicSchemaDataInterval = Intervals.utc(0, 1000000); + Interval basicSchemaDataInterval = Intervals.of("2000-01-01/P1D"); BenchmarkSchemaInfo basicSchema = new BenchmarkSchemaInfo( basicSchemaColumns, diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java index c716d476de4..74466d07de9 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java @@ -20,7 +20,7 @@ package org.apache.druid.benchmark.datagen; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; +import com.google.common.hash.Hashing; import com.google.common.io.Files; import org.apache.commons.io.FileUtils; import org.apache.druid.data.input.InputRow; @@ -31,6 +31,7 @@ import org.apache.druid.data.input.impl.FloatDimensionSchema; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -40,18 +41,20 @@ import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexIndexableAdapter; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.serde.ComplexMetrics; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; +import javax.annotation.Nullable; import java.io.Closeable; import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; public class SegmentGenerator implements Closeable @@ -59,15 +62,43 @@ public class SegmentGenerator implements Closeable private static final Logger log = new Logger(SegmentGenerator.class); private static final int MAX_ROWS_IN_MEMORY = 200000; - private static final int STARTING_SEED = 9999; // Consistent seed for reproducibility - private final File tempDir; - private final AtomicInteger seed; + // Setup can take a long time due to the need to generate large segments. + // Allow users to specify a cache directory via a JVM property or an environment variable. + private static final String CACHE_DIR_PROPERTY = "druid.benchmark.cacheDir"; + private static final String CACHE_DIR_ENV_VAR = "DRUID_BENCHMARK_CACHE_DIR"; + + private final File cacheDir; + private final boolean cleanupCacheDir; public SegmentGenerator() { - this.tempDir = Files.createTempDir(); - this.seed = new AtomicInteger(STARTING_SEED); + this(null); + } + + public SegmentGenerator(@Nullable final File cacheDir) + { + if (cacheDir != null) { + this.cacheDir = cacheDir; + this.cleanupCacheDir = false; + } else { + final String userConfiguredCacheDir = System.getProperty(CACHE_DIR_PROPERTY, System.getenv(CACHE_DIR_ENV_VAR)); + if (userConfiguredCacheDir != null) { + this.cacheDir = new File(userConfiguredCacheDir); + this.cleanupCacheDir = false; + } else { + log.warn("No cache directory provided; benchmark data caching is disabled. " + + "Set the 'druid.benchmark.cacheDir' property or 'DRUID_BENCHMARK_CACHE_DIR' environment variable " + + "to use caching."); + this.cacheDir = Files.createTempDir(); + this.cleanupCacheDir = true; + } + } + } + + public File getCacheDir() + { + return cacheDir; } public QueryableIndex generate( @@ -80,9 +111,32 @@ public class SegmentGenerator implements Closeable // In case we need to generate hyperUniques. ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde()); + final String dataHash = Hashing.sha256() + .newHasher() + .putString(dataSegment.getId().toString(), StandardCharsets.UTF_8) + .putString(schemaInfo.toString(), StandardCharsets.UTF_8) + .putString(granularity.toString(), StandardCharsets.UTF_8) + .putInt(numRows) + .hash() + .toString(); + + final File outDir = new File(getSegmentDir(dataSegment.getId(), dataHash), "merged"); + + if (outDir.exists()) { + try { + log.info("Found segment with hash[%s] cached in directory[%s].", dataHash, outDir); + return TestHelper.getTestIndexIO().loadIndex(outDir); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + log.info("Writing segment with hash[%s] to directory[%s].", dataHash, outDir); + final BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator( schemaInfo.getColumnSchemas(), - seed.getAndIncrement(), + dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */ schemaInfo.getDataInterval(), numRows ); @@ -124,61 +178,69 @@ public class SegmentGenerator implements Closeable rows.add(row); if ((i + 1) % 20000 == 0) { - log.info("%,d/%,d rows generated.", i + 1, numRows); + log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment); } if (rows.size() % MAX_ROWS_IN_MEMORY == 0) { - indexes.add(makeIndex(dataSegment.getId(), indexes.size(), rows, indexSchema)); + indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema)); rows.clear(); } } - log.info("%,d/%,d rows generated.", numRows, numRows); + log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment); if (rows.size() > 0) { - indexes.add(makeIndex(dataSegment.getId(), indexes.size(), rows, indexSchema)); + indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema)); rows.clear(); } + final QueryableIndex retVal; + if (indexes.isEmpty()) { throw new ISE("No rows to index?"); - } else if (indexes.size() == 1) { - return Iterables.getOnlyElement(indexes); } else { try { - final QueryableIndex merged = TestHelper.getTestIndexIO().loadIndex( - TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()).merge( - indexes.stream().map(QueryableIndexIndexableAdapter::new).collect(Collectors.toList()), - false, - schemaInfo.getAggs() - .stream() - .map(AggregatorFactory::getCombiningFactory) - .toArray(AggregatorFactory[]::new), - new File(tempDir, "merged"), - new IndexSpec() - ) - ); + retVal = TestHelper + .getTestIndexIO() + .loadIndex( + TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .merge( + indexes.stream().map(QueryableIndexIndexableAdapter::new).collect(Collectors.toList()), + false, + schemaInfo.getAggs() + .stream() + .map(AggregatorFactory::getCombiningFactory) + .toArray(AggregatorFactory[]::new), + outDir, + new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null) + ) + ); for (QueryableIndex index : indexes) { index.close(); } - - return merged; } catch (IOException e) { throw new RuntimeException(e); } } + + log.info("Finished writing segment[%s] to[%s]", dataSegment, outDir); + + return retVal; } @Override public void close() throws IOException { - FileUtils.deleteDirectory(tempDir); + if (cleanupCacheDir) { + FileUtils.deleteDirectory(cacheDir); + } } private QueryableIndex makeIndex( final SegmentId identifier, + final String dataHash, final int indexNumber, final List rows, final IncrementalIndexSchema indexSchema @@ -187,9 +249,14 @@ public class SegmentGenerator implements Closeable return IndexBuilder .create() .schema(indexSchema) - .tmpDir(new File(new File(tempDir, identifier.toString()), String.valueOf(indexNumber))) + .tmpDir(new File(getSegmentDir(identifier, dataHash), String.valueOf(indexNumber))) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .rows(rows) .buildMMappedIndex(); } + + private File getSegmentDir(final SegmentId identifier, final String dataHash) + { + return new File(cacheDir, StringUtils.format("%s_%s", identifier, dataHash)); + } } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/GroupByBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/GroupByBenchmark.java index 2a61341ca9e..2eb74266136 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/query/GroupByBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/GroupByBenchmark.java @@ -51,6 +51,7 @@ import org.apache.druid.query.QueryRunner; import org.apache.druid.query.QueryRunnerFactory; import org.apache.druid.query.QueryToolChest; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleMinAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; @@ -134,6 +135,9 @@ public class GroupByBenchmark @Param({"all", "day"}) private String queryGranularity; + @Param({"force", "false"}) + private String vectorize; + private static final Logger log = new Logger(GroupByBenchmark.class); private static final int RNG_SEED = 9999; private static final IndexMergerV9 INDEX_MERGER_V9; @@ -178,10 +182,8 @@ public class GroupByBenchmark { // basic.A QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval())); List queryAggs = new ArrayList<>(); - queryAggs.add(new LongSumAggregatorFactory( - "sumLongSequential", - "sumLongSequential" - )); + queryAggs.add(new CountAggregatorFactory("cnt")); + queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential")); GroupByQuery queryA = GroupByQuery .builder() .setDataSource("blah") @@ -189,6 +191,7 @@ public class GroupByBenchmark .setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); basicQueries.put("A", queryA); @@ -209,6 +212,7 @@ public class GroupByBenchmark .setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularities.DAY) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); GroupByQuery queryA = GroupByQuery @@ -218,6 +222,7 @@ public class GroupByBenchmark .setDimensions(new DefaultDimensionSpec("dimSequential", null)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularities.WEEK) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); basicQueries.put("nested", queryA); @@ -242,6 +247,7 @@ public class GroupByBenchmark .setAggregatorSpecs(queryAggs) .setGranularity(Granularity.fromString(queryGranularity)) .setDimFilter(new BoundDimFilter("dimUniform", "0", "100", true, true, null, null, null)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); basicQueries.put("filter", queryA); @@ -265,6 +271,7 @@ public class GroupByBenchmark .setDimensions(new DefaultDimensionSpec("dimZipf", null)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); basicQueries.put("singleZipf", queryA); @@ -292,6 +299,7 @@ public class GroupByBenchmark queryAggs ) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); simpleQueries.put("A", queryA); @@ -317,6 +325,7 @@ public class GroupByBenchmark queryAggs ) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); simpleLongQueries.put("A", queryA); @@ -340,6 +349,7 @@ public class GroupByBenchmark .setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.FLOAT)) .setAggregatorSpecs(queryAggs) .setGranularity(Granularity.fromString(queryGranularity)) + .setContext(ImmutableMap.of("vectorize", vectorize)) .build(); simpleFloatQueries.put("A", queryA); @@ -502,9 +512,9 @@ public class GroupByBenchmark return new IncrementalIndex.Builder() .setIndexSchema( new IncrementalIndexSchema.Builder() - .withMetrics(schemaInfo.getAggsArray()) - .withRollup(withRollup) - .build() + .withMetrics(schemaInfo.getAggsArray()) + .withRollup(withRollup) + .build() ) .setReportParseExceptions(false) .setConcurrentEventAdd(true) @@ -536,7 +546,7 @@ public class GroupByBenchmark } } - private static List runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query) + private static Sequence runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query) { QueryToolChest toolChest = factory.getToolchest(); QueryRunner theRunner = new FinalizeResultsQueryRunner<>( @@ -544,8 +554,7 @@ public class GroupByBenchmark toolChest ); - Sequence queryResult = theRunner.run(QueryPlus.wrap(query), new HashMap<>()); - return queryResult.toList(); + return theRunner.run(QueryPlus.wrap(query), new HashMap<>()); } @Benchmark @@ -559,8 +568,13 @@ public class GroupByBenchmark new IncrementalIndexSegment(anIncrementalIndex, SegmentId.dummy("incIndex")) ); - List results = GroupByBenchmark.runQuery(factory, runner, query); - blackhole.consume(results); + final Sequence results = GroupByBenchmark.runQuery(factory, runner, query); + final Row lastRow = results.accumulate( + null, + (accumulated, in) -> in + ); + + blackhole.consume(lastRow); } @Benchmark @@ -574,8 +588,13 @@ public class GroupByBenchmark new QueryableIndexSegment(queryableIndexes.get(0), SegmentId.dummy("qIndex")) ); - List results = GroupByBenchmark.runQuery(factory, runner, query); - blackhole.consume(results); + final Sequence results = GroupByBenchmark.runQuery(factory, runner, query); + final Row lastRow = results.accumulate( + null, + (accumulated, in) -> in + ); + + blackhole.consume(lastRow); } @Benchmark diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java index 529886a96f6..667210c2887 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlBenchmark.java @@ -19,30 +19,26 @@ package org.apache.druid.benchmark.query; -import com.google.common.io.Files; -import org.apache.commons.io.FileUtils; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import org.apache.druid.benchmark.datagen.BenchmarkSchemaInfo; import org.apache.druid.benchmark.datagen.BenchmarkSchemas; import org.apache.druid.benchmark.datagen.SegmentGenerator; -import org.apache.druid.data.input.Row; -import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.query.QueryPlus; import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.query.aggregation.CountAggregatorFactory; -import org.apache.druid.query.dimension.DefaultDimensionSpec; -import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.server.security.AuthTestUtils; +import org.apache.druid.server.security.AuthenticationResult; import org.apache.druid.server.security.NoopEscalator; -import org.apache.druid.sql.SqlLifecycle; -import org.apache.druid.sql.SqlLifecycleFactory; +import org.apache.druid.sql.calcite.planner.Calcites; +import org.apache.druid.sql.calcite.planner.DruidPlanner; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerFactory; +import org.apache.druid.sql.calcite.planner.PlannerResult; import org.apache.druid.sql.calcite.schema.DruidSchema; import org.apache.druid.sql.calcite.schema.SystemSchema; import org.apache.druid.sql.calcite.util.CalciteTests; @@ -64,39 +60,112 @@ import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; -import java.io.File; -import java.util.HashMap; +import javax.annotation.Nullable; import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; /** - * Benchmark that compares the same groupBy query through the native query layer and through the SQL layer. + * Benchmark that tests various SQL queries. */ @State(Scope.Benchmark) @Fork(value = 1) @Warmup(iterations = 15) -@Measurement(iterations = 30) +@Measurement(iterations = 25) public class SqlBenchmark { - @Param({"200000", "1000000"}) - private int rowsPerSegment; + static { + Calcites.setSystemProperties(); + } private static final Logger log = new Logger(SqlBenchmark.class); - private File tmpDir; - private SegmentGenerator segmentGenerator; - private SpecificSegmentsQuerySegmentWalker walker; - private SqlLifecycleFactory sqlLifecycleFactory; - private GroupByQuery groupByQuery; - private String sqlQuery; - private Closer resourceCloser; + private static final List QUERIES = ImmutableList.of( + // 0, 1, 2, 3: Timeseries, unfiltered + "SELECT COUNT(*) FROM foo", + "SELECT COUNT(DISTINCT hyper) FROM foo", + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo", + "SELECT FLOOR(__time TO MINUTE), SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo GROUP BY 1", + + // 4: Timeseries, low selectivity filter (90% of rows match) + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo WHERE dimSequential NOT LIKE '%3'", + + // 5: Timeseries, high selectivity filter (0.1% of rows match) + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo WHERE dimSequential = '311'", + + // 6: Timeseries, mixing low selectivity index-capable filter (90% of rows match) + cursor filter + "SELECT SUM(sumLongSequential), SUM(sumFloatNormal) FROM foo\n" + + "WHERE dimSequential NOT LIKE '%3' AND maxLongUniform > 10", + + // 7: Timeseries, low selectivity toplevel filter (90%), high selectivity filtered aggregator (0.1%) + "SELECT\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential = '311'),\n" + + " SUM(sumFloatNormal)\n" + + "FROM foo\n" + + "WHERE dimSequential NOT LIKE '%3'", + + // 8: Timeseries, no toplevel filter, various filtered aggregators with clauses repeated. + "SELECT\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential = '311'),\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential <> '311'),\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential LIKE '%3'),\n" + + " SUM(sumLongSequential) FILTER(WHERE dimSequential NOT LIKE '%3'),\n" + + " SUM(sumLongSequential),\n" + + " SUM(sumFloatNormal) FILTER(WHERE dimSequential = '311'),\n" + + " SUM(sumFloatNormal) FILTER(WHERE dimSequential <> '311'),\n" + + " SUM(sumFloatNormal) FILTER(WHERE dimSequential LIKE '%3'),\n" + + " SUM(sumFloatNormal) FILTER(WHERE dimSequential NOT LIKE '%3'),\n" + + " SUM(sumFloatNormal),\n" + + " COUNT(*) FILTER(WHERE dimSequential = '311'),\n" + + " COUNT(*) FILTER(WHERE dimSequential <> '311'),\n" + + " COUNT(*) FILTER(WHERE dimSequential LIKE '%3'),\n" + + " COUNT(*) FILTER(WHERE dimSequential NOT LIKE '%3'),\n" + + " COUNT(*)\n" + + "FROM foo", + + // 9: Timeseries, toplevel time filter, time-comparison filtered aggregators + "SELECT\n" + + " SUM(sumLongSequential)\n" + + " FILTER(WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-01 12:00:00'),\n" + + " SUM(sumLongSequential)\n" + + " FILTER(WHERE __time >= TIMESTAMP '2000-01-01 12:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00')\n" + + "FROM foo\n" + + "WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00'", + + // 10, 11: GroupBy two strings, unfiltered, unordered + "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo GROUP BY 1, 2", + "SELECT dimSequential, dimZipf, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1, 2", + + // 12, 13, 14: GroupBy one string, unfiltered, various aggregator configurations + "SELECT dimZipf FROM foo GROUP BY 1", + "SELECT dimZipf, COUNT(*) FROM foo GROUP BY 1 ORDER BY COUNT(*) DESC", + "SELECT dimZipf, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1 ORDER BY COUNT(*) DESC", + + // 15, 16: GroupBy long, unfiltered, unordered; with and without aggregators + "SELECT maxLongUniform FROM foo GROUP BY 1", + "SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo GROUP BY 1", + + // 17, 18: GroupBy long, filter by long, unordered; with and without aggregators + "SELECT maxLongUniform FROM foo WHERE maxLongUniform > 10 GROUP BY 1", + "SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo WHERE maxLongUniform > 10 GROUP BY 1" + ); + + @Param({"5000000"}) + private int rowsPerSegment; + + @Param({"false", "force"}) + private String vectorize; + + @Param({"10", "15"}) + private String query; + + @Nullable + private PlannerFactory plannerFactory; + private Closer closer = Closer.create(); @Setup(Level.Trial) public void setup() { - tmpDir = Files.createTempDir(); - log.info("Starting benchmark setup using tmpDir[%s], rows[%,d].", tmpDir, rowsPerSegment); - final BenchmarkSchemaInfo schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get("basic"); final DataSegment dataSegment = DataSegment.builder() @@ -106,87 +175,55 @@ public class SqlBenchmark .shardSpec(new LinearShardSpec(0)) .build(); - this.segmentGenerator = new SegmentGenerator(); - - final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); - final Pair conglomerateCloserPair = CalciteTests - .createQueryRunnerFactoryConglomerate(); - final QueryRunnerFactoryConglomerate conglomerate = conglomerateCloserPair.lhs; final PlannerConfig plannerConfig = new PlannerConfig(); - final DruidSchema druidSchema = CalciteTests.createMockSchema(conglomerate, walker, plannerConfig); + + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment); + final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); + + final Pair conglomerate = CalciteTests.createQueryRunnerFactoryConglomerate(); + closer.register(conglomerate.rhs); + + final SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker(conglomerate.lhs).add( + dataSegment, + index + ); + closer.register(walker); + + final DruidSchema druidSchema = CalciteTests.createMockSchema(conglomerate.lhs, walker, plannerConfig); final SystemSchema systemSchema = CalciteTests.createMockSystemSchema(druidSchema, walker, plannerConfig); - this.walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(dataSegment, index); - final PlannerFactory plannerFactory = new PlannerFactory( + + plannerFactory = new PlannerFactory( druidSchema, systemSchema, - CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), + CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate.lhs), CalciteTests.createOperatorTable(), CalciteTests.createExprMacroTable(), plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER, CalciteTests.getJsonMapper() ); - this.sqlLifecycleFactory = CalciteTests.createSqlLifecycleFactory(plannerFactory); - groupByQuery = GroupByQuery - .builder() - .setDataSource("foo") - .setInterval(Intervals.ETERNITY) - .setDimensions(new DefaultDimensionSpec("dimZipf", "d0"), new DefaultDimensionSpec("dimSequential", "d1")) - .setAggregatorSpecs(new CountAggregatorFactory("c")) - .setGranularity(Granularities.ALL) - .build(); - - sqlQuery = "SELECT\n" - + " dimZipf AS d0," - + " dimSequential AS d1,\n" - + " COUNT(*) AS c\n" - + "FROM druid.foo\n" - + "GROUP BY dimZipf, dimSequential"; } @TearDown(Level.Trial) public void tearDown() throws Exception { - if (walker != null) { - walker.close(); - walker = null; - } - - if (segmentGenerator != null) { - segmentGenerator.close(); - segmentGenerator = null; - } - - if (resourceCloser != null) { - resourceCloser.close(); - } - - if (tmpDir != null) { - FileUtils.deleteDirectory(tmpDir); - } + closer.close(); } @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void queryNative(Blackhole blackhole) + public void querySql(Blackhole blackhole) throws Exception { - final Sequence resultSequence = QueryPlus.wrap(groupByQuery).run(walker, new HashMap<>()); - final List resultList = resultSequence.toList(); - blackhole.consume(resultList); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void queryPlanner(Blackhole blackhole) throws Exception - { - SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize(); - final List results = sqlLifecycle.runSimple( - sqlQuery, - null, - NoopEscalator.getInstance().createEscalatedAuthenticationResult() - ).toList(); - blackhole.consume(results); + final Map context = ImmutableMap.of("vectorize", vectorize); + final AuthenticationResult authenticationResult = NoopEscalator.getInstance() + .createEscalatedAuthenticationResult(); + try (final DruidPlanner planner = plannerFactory.createPlanner(context, authenticationResult)) { + final PlannerResult plannerResult = planner.plan(QUERIES.get(Integer.parseInt(query))); + final Sequence resultSequence = plannerResult.run(); + final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); + blackhole.consume(lastRow); + } } } diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java new file mode 100644 index 00000000000..fdac5ffcdee --- /dev/null +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/query/SqlVsNativeBenchmark.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.benchmark.query; + +import org.apache.druid.benchmark.datagen.BenchmarkSchemaInfo; +import org.apache.druid.benchmark.datagen.BenchmarkSchemas; +import org.apache.druid.benchmark.datagen.SegmentGenerator; +import org.apache.druid.data.input.Row; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.QueryPlus; +import org.apache.druid.query.QueryRunnerFactoryConglomerate; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.server.security.AuthTestUtils; +import org.apache.druid.server.security.AuthenticationResult; +import org.apache.druid.server.security.NoopEscalator; +import org.apache.druid.sql.calcite.planner.DruidPlanner; +import org.apache.druid.sql.calcite.planner.PlannerConfig; +import org.apache.druid.sql.calcite.planner.PlannerFactory; +import org.apache.druid.sql.calcite.planner.PlannerResult; +import org.apache.druid.sql.calcite.schema.DruidSchema; +import org.apache.druid.sql.calcite.schema.SystemSchema; +import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.HashMap; +import java.util.concurrent.TimeUnit; + +/** + * Benchmark that compares the same groupBy query through the native query layer and through the SQL layer. + */ +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 15) +@Measurement(iterations = 30) +public class SqlVsNativeBenchmark +{ + @Param({"200000", "1000000"}) + private int rowsPerSegment; + + private static final Logger log = new Logger(SqlVsNativeBenchmark.class); + + private SpecificSegmentsQuerySegmentWalker walker; + private PlannerFactory plannerFactory; + private GroupByQuery groupByQuery; + private String sqlQuery; + private Closer closer; + + @Setup(Level.Trial) + public void setup() + { + this.closer = Closer.create(); + + final BenchmarkSchemaInfo schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get("basic"); + + final DataSegment dataSegment = DataSegment.builder() + .dataSource("foo") + .interval(schemaInfo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .build(); + + final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); + log.info("Starting benchmark setup using tmpDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment); + + final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment); + final Pair conglomerateCloserPair = CalciteTests + .createQueryRunnerFactoryConglomerate(); + final QueryRunnerFactoryConglomerate conglomerate = conglomerateCloserPair.lhs; + final PlannerConfig plannerConfig = new PlannerConfig(); + + this.walker = closer.register(new SpecificSegmentsQuerySegmentWalker(conglomerate).add(dataSegment, index)); + final DruidSchema druidSchema = CalciteTests.createMockSchema(conglomerate, walker, plannerConfig); + final SystemSchema systemSchema = CalciteTests.createMockSystemSchema(druidSchema, walker, plannerConfig); + + plannerFactory = new PlannerFactory( + druidSchema, + systemSchema, + CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), + CalciteTests.createOperatorTable(), + CalciteTests.createExprMacroTable(), + plannerConfig, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + CalciteTests.getJsonMapper() + ); + groupByQuery = GroupByQuery + .builder() + .setDataSource("foo") + .setInterval(Intervals.ETERNITY) + .setDimensions(new DefaultDimensionSpec("dimZipf", "d0"), new DefaultDimensionSpec("dimSequential", "d1")) + .setAggregatorSpecs(new CountAggregatorFactory("c")) + .setGranularity(Granularities.ALL) + .build(); + + sqlQuery = "SELECT\n" + + " dimZipf AS d0," + + " dimSequential AS d1,\n" + + " COUNT(*) AS c\n" + + "FROM druid.foo\n" + + "GROUP BY dimZipf, dimSequential"; + } + + @TearDown(Level.Trial) + public void tearDown() throws Exception + { + closer.close(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void queryNative(Blackhole blackhole) + { + final Sequence resultSequence = QueryPlus.wrap(groupByQuery).run(walker, new HashMap<>()); + final Row lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); + blackhole.consume(lastRow); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void queryPlanner(Blackhole blackhole) throws Exception + { + final AuthenticationResult authenticationResult = NoopEscalator.getInstance() + .createEscalatedAuthenticationResult(); + try (final DruidPlanner planner = plannerFactory.createPlanner(null, authenticationResult)) { + final PlannerResult plannerResult = planner.plan(sqlQuery); + final Sequence resultSequence = plannerResult.run(); + final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in); + blackhole.consume(lastRow); + } + } +} diff --git a/benchmarks/src/main/resources/log4j2.xml b/benchmarks/src/main/resources/log4j2.xml new file mode 100644 index 00000000000..dbce142e7f6 --- /dev/null +++ b/benchmarks/src/main/resources/log4j2.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteredSequence.java b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteredSequence.java index d3851ae1a69..81210d76c26 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteredSequence.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteredSequence.java @@ -28,11 +28,11 @@ import java.io.IOException; public class FilteredSequence implements Sequence { private final Sequence baseSequence; - private final Predicate pred; + private final Predicate pred; public FilteredSequence( Sequence baseSequence, - Predicate pred + Predicate pred ) { this.baseSequence = baseSequence; diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringAccumulator.java b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringAccumulator.java index 87b5b274510..36d36bcef04 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringAccumulator.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringAccumulator.java @@ -25,10 +25,10 @@ import com.google.common.base.Predicate; */ public class FilteringAccumulator implements Accumulator { - private final Predicate pred; + private final Predicate pred; private final Accumulator accumulator; - public FilteringAccumulator(Predicate pred, Accumulator accumulator) + public FilteringAccumulator(Predicate pred, Accumulator accumulator) { this.pred = pred; this.accumulator = accumulator; diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringYieldingAccumulator.java b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringYieldingAccumulator.java index 5faee68f7cd..014d54efc8f 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringYieldingAccumulator.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/FilteringYieldingAccumulator.java @@ -25,12 +25,12 @@ import com.google.common.base.Predicate; */ public class FilteringYieldingAccumulator extends YieldingAccumulator { - private final Predicate pred; + private final Predicate pred; private final YieldingAccumulator accumulator; private volatile boolean didSomething = false; - public FilteringYieldingAccumulator(Predicate pred, YieldingAccumulator accumulator) + public FilteringYieldingAccumulator(Predicate pred, YieldingAccumulator accumulator) { this.pred = pred; this.accumulator = accumulator; diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java index 555c768df68..c17a638b183 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequence.java @@ -19,6 +19,7 @@ package org.apache.druid.java.util.common.guava; +import com.google.common.base.Predicate; import com.google.common.collect.Ordering; import java.io.Closeable; @@ -53,7 +54,7 @@ public interface Sequence */ OutType accumulate(OutType initValue, Accumulator accumulator); - /** + /** * Return a Yielder for accumulated sequence. * * @param initValue the initial value to pass along to start the accumulation. @@ -71,11 +72,13 @@ public interface Sequence return new MappedSequence<>(this, mapper); } + default Sequence filter(Predicate predicate) + { + return Sequences.filter(this, predicate); + } + /** * This will materialize the entire sequence. Use at your own risk. - * - * Several benchmarks rely on this method to eagerly accumulate Sequences to ArrayLists. e.g. - * GroupByBenchmark. */ default List toList() { diff --git a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequences.java b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequences.java index 2bab97141d5..df6fbe5cbd9 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/guava/Sequences.java +++ b/core/src/main/java/org/apache/druid/java/util/common/guava/Sequences.java @@ -84,7 +84,7 @@ public class Sequences return new MappedSequence<>(sequence, fn::apply); } - public static Sequence filter(Sequence sequence, Predicate pred) + public static Sequence filter(Sequence sequence, Predicate pred) { return new FilteredSequence<>(sequence, pred); } diff --git a/docs/content/querying/query-context.md b/docs/content/querying/query-context.md index d9d82182f58..b668cd732d7 100644 --- a/docs/content/querying/query-context.md +++ b/docs/content/querying/query-context.md @@ -60,3 +60,31 @@ In addition, some query types offer context parameters specific to that query ty ### GroupBy queries See [GroupBy query context](groupbyquery.html#query-context). + +### Vectorizable queries + +The GroupBy and Timeseries query types can run in _vectorized_ mode, which speeds up query execution by processing +batches of rows at a time. Not all queries can be vectorized. In particular, vectorization currently has the following +requirements: + +- All query-level filters must either be able to run on bitmap indexes or must offer vectorized row-matchers. These +include "selector", "bound", "in", "like", "regex", "search", "and", "or", and "not". +- All filters in filtered aggregators must offer vectorized row-matchers. +- All aggregators must offer vectorized implementations. These include "count", "doubleSum", "floatSum", "longSum", +"hyperUnique", and "filtered". +- No virtual columns. +- For GroupBy: All dimension specs must be "default" (no extraction functions or filtered dimension specs). +- For GroupBy: No multi-value dimensions. +- For Timeseries: No "descending" order. +- Only immutable segments (not real-time). + +Other query types (like TopN, Scan, Select, and Search) ignore the "vectorize" parameter, and will execute without +vectorization. These query types will ignore the "vectorize" parameter even if it is set to `"force"`. + +Vectorization is an alpha-quality feature as of Druid #{DRUIDVERSION}. We heartily welcome any feedback and testing +from the community as we work to battle-test it. + +|property|default| description| +|--------|-------|------------| +|vectorize|`false`|Enables or disables vectorized query execution. Possible values are `false` (disabled), `true` (enabled if possible, disabled otherwise, on a per-segment basis), and `force` (enabled, and groupBy or timeseries queries that cannot be vectorized will fail). The `"force"` setting is meant to aid in testing, and is not generally useful in production (since real-time segments can never be processed with vectorized execution, any queries on real-time data will fail).| +|vectorSize|`512`|Sets the row batching size for a particular query.| diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java index f10ae2cac1b..db7c377ae48 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/BufferHashGrouperUsingSketchMergeAggregatorFactoryTest.java @@ -20,12 +20,13 @@ package org.apache.druid.query.aggregation.datasketches.theta; import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.yahoo.sketches.theta.Sketches; import com.yahoo.sketches.theta.UpdateSketch; import org.apache.druid.data.input.MapBasedRow; -import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.groupby.epinephelinae.BufferHashGrouper; import org.apache.druid.query.groupby.epinephelinae.Grouper; @@ -47,11 +48,13 @@ public class BufferHashGrouperUsingSketchMergeAggregatorFactoryTest final BufferHashGrouper grouper = new BufferHashGrouper<>( Suppliers.ofInstance(ByteBuffer.allocate(bufferSize)), GrouperTestUtil.intKeySerde(), - columnSelectorFactory, - new AggregatorFactory[]{ - new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new SketchMergeAggregatorFactory("sketch", "sketch", 16, false, true, 2), + new CountAggregatorFactory("count") + ) + ), Integer.MAX_VALUE, 0.75f, initialBuckets, diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java index fb8d31aa4a7..d01b4c536e0 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java @@ -246,21 +246,24 @@ public class BloomDimFilterTest extends BaseFilterTest @Test public void testExpressionVirtualColumn() throws IOException { - assertFilterMatches( + assertFilterMatchesSkipVectorize( new BloomDimFilter("expr", bloomKFilter(1000, 1.1F), null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("expr", bloomKFilter(1000, 1.2F), null), ImmutableList.of()); - assertFilterMatches( + assertFilterMatchesSkipVectorize(new BloomDimFilter("expr", bloomKFilter(1000, 1.2F), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.1D), null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.2D), null), ImmutableList.of()); - assertFilterMatches( + assertFilterMatchesSkipVectorize( + new BloomDimFilter("exprDouble", bloomKFilter(1000, 2.2D), null), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( new BloomDimFilter("exprLong", bloomKFilter(1000, 3L), null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); - assertFilterMatches(new BloomDimFilter("exprLong", bloomKFilter(1000, 4L), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(new BloomDimFilter("exprLong", bloomKFilter(1000, 4L), null), ImmutableList.of()); } @Test diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java index be32f01b78f..fd9f5f5b667 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java @@ -169,6 +169,9 @@ public class BloomDimFilterSqlTest extends BaseCalciteQueryTest @Test public void testBloomFilterVirtualColumn() throws Exception { + // Cannot vectorize due to expression virtual columns. + cannotVectorize(); + BloomKFilter filter = new BloomKFilter(1500); filter.addString("def-foo"); byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter); @@ -199,6 +202,9 @@ public class BloomDimFilterSqlTest extends BaseCalciteQueryTest @Test public void testBloomFilterVirtualColumnNumber() throws Exception { + // Cannot vectorize due to expression virtual columns. + cannotVectorize(); + BloomKFilter filter = new BloomKFilter(1500); filter.addFloat(20.2f); byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter); diff --git a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceGroupByQueryTest.java b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceGroupByQueryTest.java index 1b0ef259552..b302aef03f3 100644 --- a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceGroupByQueryTest.java +++ b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceGroupByQueryTest.java @@ -45,6 +45,7 @@ import org.junit.runners.Parameterized; import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; /** * @@ -58,9 +59,22 @@ public class VarianceGroupByQueryTest private final String testName; @Parameterized.Parameters(name = "{0}") - public static Collection constructorFeeder() + public static Collection constructorFeeder() { - return GroupByQueryRunnerTest.constructorFeeder(); + // Use GroupByQueryRunnerTest's constructorFeeder, but remove vectorized tests, since this aggregator + // can't vectorize yet. + return GroupByQueryRunnerTest.constructorFeeder().stream() + .filter(constructor -> !((boolean) constructor[4]) /* !vectorize */) + .map( + constructor -> + new Object[]{ + constructor[0], + constructor[1], + constructor[2], + constructor[3] + } + ) + .collect(Collectors.toList()); } public VarianceGroupByQueryTest( diff --git a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceTimeseriesQueryTest.java b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceTimeseriesQueryTest.java index c5dcecea9bf..7e0e059178c 100644 --- a/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceTimeseriesQueryTest.java +++ b/extensions-core/stats/src/test/java/org/apache/druid/query/aggregation/variance/VarianceTimeseriesQueryTest.java @@ -37,6 +37,8 @@ import org.junit.runners.Parameterized; import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; @RunWith(Parameterized.class) public class VarianceTimeseriesQueryTest @@ -44,13 +46,22 @@ public class VarianceTimeseriesQueryTest @Parameterized.Parameters(name = "{0}:descending={1}") public static Iterable constructorFeeder() { - return TimeseriesQueryRunnerTest.constructorFeeder(); + // Use TimeseriesQueryRunnerTest's constructorFeeder, but remove vectorized tests, since this aggregator + // can't vectorize yet. + return StreamSupport.stream(TimeseriesQueryRunnerTest.constructorFeeder().spliterator(), false) + .filter(constructor -> !((boolean) constructor[2]) /* !vectorize */) + .map(constructor -> new Object[]{constructor[0], constructor[1], constructor[3]}) + .collect(Collectors.toList()); } private final QueryRunner runner; private final boolean descending; - public VarianceTimeseriesQueryTest(QueryRunner runner, boolean descending, List aggregatorFactories) + public VarianceTimeseriesQueryTest( + QueryRunner runner, + boolean descending, + List aggregatorFactories + ) { this.runner = runner; this.descending = descending; diff --git a/pom.xml b/pom.xml index 836307aa802..1a802c9f5e1 100644 --- a/pom.xml +++ b/pom.xml @@ -1227,7 +1227,7 @@ org.apache.maven.plugins maven-surefire-plugin - 2.19.1 + 2.22.2 diff --git a/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java b/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java new file mode 100644 index 00000000000..225999f02c0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/collections/bitmap/BatchIteratorAdapter.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.collections.bitmap; + +import com.google.common.base.Preconditions; +import org.roaringbitmap.BatchIterator; +import org.roaringbitmap.IntIterator; + +public final class BatchIteratorAdapter implements BatchIterator +{ + private final IntIterator iterator; + + public BatchIteratorAdapter(IntIterator iterator) + { + this.iterator = Preconditions.checkNotNull(iterator, "iterator"); + } + + @Override + public int nextBatch(int[] buffer) + { + int i; + for (i = 0; i < buffer.length && iterator.hasNext(); i++) { + buffer[i] = iterator.next(); + } + + return i; + } + + @Override + public boolean hasNext() + { + return iterator.hasNext(); + } + + @Override + public BatchIterator clone() + { + // It's okay to make a "new BatchIteratorAdapter" instead of calling super.clone(), since this class is final. + return new BatchIteratorAdapter(iterator.clone()); + } +} diff --git a/processing/src/main/java/org/apache/druid/collections/bitmap/ImmutableBitmap.java b/processing/src/main/java/org/apache/druid/collections/bitmap/ImmutableBitmap.java index 6e8247cfb22..d02e2371d50 100644 --- a/processing/src/main/java/org/apache/druid/collections/bitmap/ImmutableBitmap.java +++ b/processing/src/main/java/org/apache/druid/collections/bitmap/ImmutableBitmap.java @@ -19,6 +19,7 @@ package org.apache.druid.collections.bitmap; +import org.roaringbitmap.BatchIterator; import org.roaringbitmap.IntIterator; /** @@ -32,6 +33,14 @@ public interface ImmutableBitmap */ IntIterator iterator(); + /** + * @return a batched iterator over the set bits of this bitmap + */ + default BatchIterator batchIterator() + { + return new BatchIteratorAdapter(iterator()); + } + /** * @return The number of bits set to true in this bitmap */ diff --git a/processing/src/main/java/org/apache/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java b/processing/src/main/java/org/apache/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java index 98c45b6c62c..763445a9612 100644 --- a/processing/src/main/java/org/apache/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java +++ b/processing/src/main/java/org/apache/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java @@ -19,6 +19,7 @@ package org.apache.druid.collections.bitmap; +import org.roaringbitmap.BatchIterator; import org.roaringbitmap.IntIterator; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; @@ -76,6 +77,12 @@ public class WrappedImmutableRoaringBitmap implements ImmutableBitmap return bitmap.getIntIterator(); } + @Override + public BatchIterator batchIterator() + { + return bitmap.getBatchIterator(); + } + @Override public int size() { diff --git a/processing/src/main/java/org/apache/druid/query/DefaultQueryMetrics.java b/processing/src/main/java/org/apache/druid/query/DefaultQueryMetrics.java index 1a029b40ac1..4d5e3c5a71a 100644 --- a/processing/src/main/java/org/apache/druid/query/DefaultQueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/DefaultQueryMetrics.java @@ -196,6 +196,12 @@ public class DefaultQueryMetrics> implements QueryMet // Emit nothing by default. } + @Override + public void vectorized(final boolean vectorized) + { + // Emit nothing by default. + } + @Override public BitmapResultFactory makeBitmapResultFactory(BitmapFactory factory) { diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java index a1922180709..d5003e26cca 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java @@ -23,7 +23,10 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import org.apache.druid.guice.annotations.PublicApi; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Numbers; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.QueryableIndexStorageAdapter; import java.util.concurrent.TimeUnit; @@ -43,11 +46,44 @@ public class QueryContexts public static final boolean DEFAULT_USE_CACHE = true; public static final boolean DEFAULT_POPULATE_RESULTLEVEL_CACHE = true; public static final boolean DEFAULT_USE_RESULTLEVEL_CACHE = true; + public static final Vectorize DEFAULT_VECTORIZE = Vectorize.FALSE; public static final int DEFAULT_PRIORITY = 0; public static final int DEFAULT_UNCOVERED_INTERVALS_LIMIT = 0; public static final long DEFAULT_TIMEOUT_MILLIS = TimeUnit.MINUTES.toMillis(5); public static final long NO_TIMEOUT = 0; + @SuppressWarnings("unused") // Used by Jackson serialization + public enum Vectorize + { + FALSE { + @Override + public boolean shouldVectorize(final boolean canVectorize) + { + return false; + } + }, + TRUE { + @Override + public boolean shouldVectorize(final boolean canVectorize) + { + return canVectorize; + } + }, + FORCE { + @Override + public boolean shouldVectorize(final boolean canVectorize) + { + if (!canVectorize) { + throw new ISE("Cannot vectorize!"); + } + + return true; + } + }; + + public abstract boolean shouldVectorize(boolean canVectorize); + } + public static boolean isBySegment(Query query) { return isBySegment(query, DEFAULT_BY_SEGMENT); @@ -113,6 +149,16 @@ public class QueryContexts return parseBoolean(query, "serializeDateTimeAsLongInner", defaultValue); } + public static Vectorize getVectorize(Query query) + { + return parseEnum(query, "vectorize", Vectorize.class, DEFAULT_VECTORIZE); + } + + public static int getVectorSize(Query query) + { + return parseInt(query, "vectorSize", QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE); + } + public static int getUncoveredIntervalsLimit(Query query) { return getUncoveredIntervalsLimit(query, DEFAULT_UNCOVERED_INTERVALS_LIMIT); @@ -239,4 +285,19 @@ public class QueryContexts private QueryContexts() { } + + static > E parseEnum(Query query, String key, Class clazz, E defaultValue) + { + Object val = query.getContextValue(key); + if (val == null) { + return defaultValue; + } + if (val instanceof String) { + return Enum.valueOf(clazz, StringUtils.toUpperCase((String) val)); + } else if (val instanceof Boolean) { + return Enum.valueOf(clazz, StringUtils.toUpperCase(String.valueOf(val))); + } else { + throw new ISE("Unknown type [%s]. Cannot parse!", val.getClass()); + } + } } diff --git a/processing/src/main/java/org/apache/druid/query/QueryMetrics.java b/processing/src/main/java/org/apache/druid/query/QueryMetrics.java index e7976630faa..e34d6619687 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/QueryMetrics.java @@ -236,6 +236,12 @@ public interface QueryMetrics> */ void identity(String identity); + /** + * Sets whether or not a segment scan has been vectorized. Generally expected to only be attached to segment-level + * metrics, since at whole-query level we might have a mix of vectorized and non-vectorized segment scans. + */ + void vectorized(boolean vectorized); + /** * Creates a {@link BitmapResultFactory} which may record some information along bitmap construction from {@link * #preFilters(List)}. The returned BitmapResultFactory may add some dimensions to this QueryMetrics from it's {@link diff --git a/processing/src/main/java/org/apache/druid/query/QueryRunnerHelper.java b/processing/src/main/java/org/apache/druid/query/QueryRunnerHelper.java index 53f281dbd86..c99b6aec574 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryRunnerHelper.java +++ b/processing/src/main/java/org/apache/druid/query/QueryRunnerHelper.java @@ -21,7 +21,6 @@ package org.apache.druid.query; import com.google.common.base.Function; import com.google.common.base.Preconditions; -import com.google.common.base.Predicates; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; @@ -34,19 +33,19 @@ import org.joda.time.Interval; import java.io.Closeable; import java.util.List; import java.util.Map; +import java.util.Objects; /** */ public class QueryRunnerHelper { - public static Sequence> makeCursorBasedQuery( final StorageAdapter adapter, - List queryIntervals, - Filter filter, - VirtualColumns virtualColumns, - boolean descending, - Granularity granularity, + final List queryIntervals, + final Filter filter, + final VirtualColumns virtualColumns, + final boolean descending, + final Granularity granularity, final Function> mapFn ) { @@ -57,16 +56,9 @@ public class QueryRunnerHelper return Sequences.filter( Sequences.map( adapter.makeCursors(filter, queryIntervals.get(0), virtualColumns, granularity, descending, null), - new Function>() - { - @Override - public Result apply(Cursor input) - { - return mapFn.apply(input); - } - } + mapFn ), - Predicates.notNull() + Objects::nonNull ); } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java new file mode 100644 index 00000000000..8ae7a33b08d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java @@ -0,0 +1,389 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import com.google.common.primitives.Ints; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * A class that helps query engines use Buffer- or VectorAggregators in a consistent way. + * + * The two main benefits this class provides are: + * + * (1) Query engines can treat BufferAggregators and VectorAggregators the same for operations that are equivalent + * across them, like "init", "get", "relocate", and "close". + * (2) Query engines are freed from the need to manage how much space each individual aggregator needs. They only + * need to allocate a block of size "spaceNeeded". + */ +public class AggregatorAdapters implements Closeable +{ + private static final Logger log = new Logger(AggregatorAdapters.class); + + private final List adapters; + private final List factories; + private final int[] aggregatorPositions; + private final int spaceNeeded; + + private AggregatorAdapters(final List adapters) + { + this.adapters = adapters; + this.factories = adapters.stream().map(Adapter::getFactory).collect(Collectors.toList()); + this.aggregatorPositions = new int[adapters.size()]; + + long nextPosition = 0; + for (int i = 0; i < adapters.size(); i++) { + final AggregatorFactory aggregatorFactory = adapters.get(i).getFactory(); + aggregatorPositions[i] = Ints.checkedCast(nextPosition); + nextPosition += aggregatorFactory.getMaxIntermediateSizeWithNulls(); + } + + this.spaceNeeded = Ints.checkedCast(nextPosition); + } + + /** + * Create an adapters object based on {@link VectorAggregator}. + */ + public static AggregatorAdapters factorizeVector( + final VectorColumnSelectorFactory columnSelectorFactory, + final List aggregatorFactories + ) + { + final Adapter[] adapters = new Adapter[aggregatorFactories.size()]; + for (int i = 0; i < aggregatorFactories.size(); i++) { + final AggregatorFactory aggregatorFactory = aggregatorFactories.get(i); + adapters[i] = new VectorAggregatorAdapter( + aggregatorFactory, + aggregatorFactory.factorizeVector(columnSelectorFactory) + ); + } + + return new AggregatorAdapters(Arrays.asList(adapters)); + } + + /** + * Create an adapters object based on {@link BufferAggregator}. + */ + public static AggregatorAdapters factorizeBuffered( + final ColumnSelectorFactory columnSelectorFactory, + final List aggregatorFactories + ) + { + final Adapter[] adapters = new Adapter[aggregatorFactories.size()]; + for (int i = 0; i < aggregatorFactories.size(); i++) { + final AggregatorFactory aggregatorFactory = aggregatorFactories.get(i); + adapters[i] = new BufferAggregatorAdapter( + aggregatorFactory, + aggregatorFactory.factorizeBuffered(columnSelectorFactory) + ); + } + + return new AggregatorAdapters(Arrays.asList(adapters)); + } + + /** + * Return the amount of buffer bytes needed by all aggregators wrapped up in this object. + */ + public int spaceNeeded() + { + return spaceNeeded; + } + + /** + * Return the {@link AggregatorFactory} objects that were used to create this object. + */ + public List factories() + { + return factories; + } + + /** + * Return the individual positions of each aggregator within a hypothetical buffer of size {@link #spaceNeeded()}. + */ + public int[] aggregatorPositions() + { + return aggregatorPositions; + } + + /** + * Return the number of aggregators in this object. + */ + public int size() + { + return adapters.size(); + } + + /** + * Initialize all aggregators. + * + * @param buf aggregation buffer + * @param position position in buffer where our block of size {@link #spaceNeeded()} starts + */ + public void init(final ByteBuffer buf, final int position) + { + for (int i = 0; i < adapters.size(); i++) { + adapters.get(i).init(buf, position + aggregatorPositions[i]); + } + } + + /** + * Call {@link BufferAggregator#aggregate(ByteBuffer, int)} on all of our aggregators. + * + * This method is only valid if the underlying aggregators are {@link BufferAggregator}. + */ + public void aggregateBuffered(final ByteBuffer buf, final int position) + { + for (int i = 0; i < adapters.size(); i++) { + final Adapter adapter = adapters.get(i); + adapter.asBufferAggregator().aggregate(buf, position + aggregatorPositions[i]); + } + } + + /** + * Call {@link VectorAggregator#aggregate(ByteBuffer, int, int, int)} on all of our aggregators. + * + * This method is only valid if the underlying aggregators are {@link VectorAggregator}. + */ + public void aggregateVector( + final ByteBuffer buf, + final int position, + final int start, + final int end + ) + { + for (int i = 0; i < adapters.size(); i++) { + final Adapter adapter = adapters.get(i); + adapter.asVectorAggregator().aggregate(buf, position + aggregatorPositions[i], start, end); + } + } + + /** + * Call {@link VectorAggregator#aggregate(ByteBuffer, int, int[], int[], int)} on all of our aggregators. + * + * This method is only valid if the underlying aggregators are {@link VectorAggregator}. + */ + public void aggregateVector( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows + ) + { + for (int i = 0; i < adapters.size(); i++) { + final Adapter adapter = adapters.get(i); + adapter.asVectorAggregator().aggregate(buf, numRows, positions, rows, aggregatorPositions[i]); + } + } + + /** + * Retrieve aggregation state from one of our aggregators. + * + * @param buf aggregation buffer + * @param position position in buffer where our block of size {@link #spaceNeeded()} starts + * @param aggregatorNumber which aggregator to retrieve state, from 0 to {@link #size()} - 1 + */ + @Nullable + public Object get(final ByteBuffer buf, final int position, final int aggregatorNumber) + { + return adapters.get(aggregatorNumber).get(buf, position + aggregatorPositions[aggregatorNumber]); + } + + /** + * Inform all of our aggregators that they are being relocated. + */ + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + for (int i = 0; i < adapters.size(); i++) { + adapters.get(i).relocate( + oldPosition + aggregatorPositions[i], + newPosition + aggregatorPositions[i], + oldBuffer, + newBuffer + ); + } + } + + /** + * Close all of our aggregators. + */ + @Override + public void close() + { + for (Adapter adapter : adapters) { + try { + adapter.close(); + } + catch (Exception e) { + log.warn(e, "Could not close aggregator [%s], skipping.", adapter.getFactory().getName()); + } + } + } + + /** + * The interface that allows this class to achieve its goals of partially unifying handling of + * BufferAggregator and VectorAggregator. Private, since it doesn't escape this class and the + * only two implementations are private static classes below. + */ + private interface Adapter extends Closeable + { + void init(ByteBuffer buf, int position); + + @Nullable + Object get(ByteBuffer buf, int position); + + void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer); + + @Override + void close(); + + AggregatorFactory getFactory(); + + BufferAggregator asBufferAggregator(); + + VectorAggregator asVectorAggregator(); + } + + private static class VectorAggregatorAdapter implements Adapter + { + private final AggregatorFactory factory; + private final VectorAggregator aggregator; + + VectorAggregatorAdapter(final AggregatorFactory factory, final VectorAggregator aggregator) + { + this.factory = factory; + this.aggregator = aggregator; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + aggregator.init(buf, position); + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return aggregator.get(buf, position); + } + + @Override + public void close() + { + aggregator.close(); + } + + @Override + public void relocate( + final int oldPosition, + final int newPosition, + final ByteBuffer oldBuffer, + final ByteBuffer newBuffer + ) + { + aggregator.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } + + @Override + public AggregatorFactory getFactory() + { + return factory; + } + + @Override + public BufferAggregator asBufferAggregator() + { + throw new ISE("Not a BufferAggregator!"); + } + + @Override + public VectorAggregator asVectorAggregator() + { + return aggregator; + } + } + + private static class BufferAggregatorAdapter implements Adapter + { + private final AggregatorFactory factory; + private final BufferAggregator aggregator; + + BufferAggregatorAdapter(final AggregatorFactory factory, final BufferAggregator aggregator) + { + this.factory = factory; + this.aggregator = aggregator; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + aggregator.init(buf, position); + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return aggregator.get(buf, position); + } + + @Override + public void close() + { + aggregator.close(); + } + + @Override + public void relocate( + final int oldPosition, + final int newPosition, + final ByteBuffer oldBuffer, + final ByteBuffer newBuffer + ) + { + aggregator.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } + + @Override + public AggregatorFactory getFactory() + { + return factory; + } + + @Override + public BufferAggregator asBufferAggregator() + { + return aggregator; + } + + @Override + public VectorAggregator asVectorAggregator() + { + throw new ISE("Not a VectorAggregator!"); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java index 6b0f4a11c94..ced087bd7de 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorFactory.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.PerSegmentQueryOptimizationContext; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.util.Arrays; @@ -49,6 +50,23 @@ public abstract class AggregatorFactory implements Cacheable public abstract BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory); + /** + * Create a VectorAggregator based on the provided column selector factory. Will throw an exception if + * this aggregation class does not support vectorization: check "canVectorize" first. + */ + public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) + { + throw new UOE("Aggregator[%s] cannot vectorize", getClass().getName()); + } + + /** + * Returns whether or not this aggregation class supports vectorization. The default implementation returns false. + */ + public boolean canVectorize() + { + return false; + } + public abstract Comparator getComparator(); /** diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java index ed77c912c73..98608546ccc 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java @@ -24,6 +24,7 @@ import org.apache.druid.query.monomorphicprocessing.CalledFromHotLoop; import org.apache.druid.query.monomorphicprocessing.HotLoopCallee; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import javax.annotation.Nullable; import java.nio.ByteBuffer; /** @@ -33,6 +34,8 @@ import java.nio.ByteBuffer; * * Thus, an Aggregator can be thought of as a closure over some other thing that is stateful and changes between calls * to aggregate(...). + * + * @see VectorAggregator, the vectorized version */ @ExtensionPoint public interface BufferAggregator extends HotLoopCallee @@ -87,6 +90,7 @@ public interface BufferAggregator extends HotLoopCallee * @param position offset within the byte buffer at which the aggregate value is stored * @return the Object representation of the aggregate */ + @Nullable Object get(ByteBuffer buf, int position); /** diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/CountAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/CountAggregatorFactory.java index f4329fef082..599a2c49202 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/CountAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/CountAggregatorFactory.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.util.Collections; @@ -58,12 +59,24 @@ public class CountAggregatorFactory extends AggregatorFactory return new CountBufferAggregator(); } + @Override + public VectorAggregator factorizeVector(final VectorColumnSelectorFactory selectorFactory) + { + return new CountVectorAggregator(); + } + @Override public Comparator getComparator() { return CountAggregator.COMPARATOR; } + @Override + public boolean canVectorize() + { + return true; + } + @Override public Object combine(Object lhs, Object rhs) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/CountVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/CountVectorAggregator.java new file mode 100644 index 00000000000..fec4793274e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/CountVectorAggregator.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class CountVectorAggregator implements VectorAggregator +{ + @Override + public void init(final ByteBuffer buf, final int position) + { + buf.putLong(position, 0); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final int delta = endRow - startRow; + buf.putLong(position, buf.getLong(position) + delta); + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + buf.putLong(position, buf.getLong(position) + 1); + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return buf.getLong(position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumAggregatorFactory.java index e5c8bf6c7f8..0292823056a 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumAggregatorFactory.java @@ -26,6 +26,8 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.segment.BaseDoubleColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -61,12 +63,24 @@ public class DoubleSumAggregatorFactory extends SimpleDoubleAggregatorFactory ); } + @Override + protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnSelectorFactory) + { + return columnSelectorFactory.makeValueSelector(fieldName); + } + @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, BaseDoubleColumnValueSelector selector) { return new DoubleSumAggregator(selector); } + @Override + public boolean canVectorize() + { + return expression == null; + } + @Override protected BufferAggregator factorizeBuffered( ColumnSelectorFactory metricFactory, @@ -76,6 +90,15 @@ public class DoubleSumAggregatorFactory extends SimpleDoubleAggregatorFactory return new DoubleSumBufferAggregator(selector); } + @Override + protected VectorAggregator factorizeVector( + VectorColumnSelectorFactory columnSelectorFactory, + VectorValueSelector selector + ) + { + return new DoubleSumVectorAggregator(selector); + } + @Override @Nullable public Object combine(@Nullable Object lhs, @Nullable Object rhs) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumVectorAggregator.java new file mode 100644 index 00000000000..f66a4f406be --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/DoubleSumVectorAggregator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class DoubleSumVectorAggregator implements VectorAggregator +{ + private final VectorValueSelector selector; + + public DoubleSumVectorAggregator(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + buf.putDouble(position, 0); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final double[] vector = selector.getDoubleVector(); + + double sum = 0; + for (int i = startRow; i < endRow; i++) { + sum += vector[i]; + } + + buf.putDouble(position, buf.getDouble(position) + sum); + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final double[] vector = selector.getDoubleVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + buf.putDouble(position, buf.getDouble(position) + vector[rows != null ? rows[i] : i]); + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return buf.getDouble(position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java index 52f072be085..47d17556928 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredAggregatorFactory.java @@ -25,11 +25,13 @@ import com.google.common.base.Preconditions; import com.google.common.base.Strings; import org.apache.druid.query.PerSegmentQueryOptimizationContext; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.IntervalDimFilter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.column.ColumnHolder; -import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -42,7 +44,10 @@ import java.util.Objects; public class FilteredAggregatorFactory extends AggregatorFactory { private final AggregatorFactory delegate; - private final DimFilter filter; + private final DimFilter dimFilter; + private final Filter filter; + + @Nullable private final String name; // Constructor for backwards compat only @@ -57,22 +62,23 @@ public class FilteredAggregatorFactory extends AggregatorFactory @JsonCreator public FilteredAggregatorFactory( @JsonProperty("aggregator") AggregatorFactory delegate, - @JsonProperty("filter") DimFilter filter, - @JsonProperty("name") String name + @JsonProperty("filter") DimFilter dimFilter, + @Nullable @JsonProperty("name") String name ) { - Preconditions.checkNotNull(delegate); - Preconditions.checkNotNull(filter); + Preconditions.checkNotNull(delegate, "aggregator"); + Preconditions.checkNotNull(dimFilter, "filter"); this.delegate = delegate; - this.filter = filter; + this.dimFilter = dimFilter; + this.filter = dimFilter.toFilter(); this.name = name; } @Override public Aggregator factorize(ColumnSelectorFactory columnSelectorFactory) { - final ValueMatcher valueMatcher = Filters.toFilter(filter).makeMatcher(columnSelectorFactory); + final ValueMatcher valueMatcher = filter.makeMatcher(columnSelectorFactory); return new FilteredAggregator( valueMatcher, delegate.factorize(columnSelectorFactory) @@ -82,13 +88,30 @@ public class FilteredAggregatorFactory extends AggregatorFactory @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnSelectorFactory) { - final ValueMatcher valueMatcher = Filters.toFilter(filter).makeMatcher(columnSelectorFactory); + final ValueMatcher valueMatcher = filter.makeMatcher(columnSelectorFactory); return new FilteredBufferAggregator( valueMatcher, delegate.factorizeBuffered(columnSelectorFactory) ); } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) + { + Preconditions.checkState(canVectorize(), "Cannot vectorize"); + final VectorValueMatcher valueMatcher = filter.makeVectorMatcher(columnSelectorFactory); + return new FilteredVectorAggregator( + valueMatcher, + delegate.factorizeVector(columnSelectorFactory) + ); + } + + @Override + public boolean canVectorize() + { + return delegate.canVectorize() && filter.canVectorizeMatcher(); + } + @Override public Comparator getComparator() { @@ -147,7 +170,7 @@ public class FilteredAggregatorFactory extends AggregatorFactory @Override public byte[] getCacheKey() { - byte[] filterCacheKey = filter.getCacheKey(); + byte[] filterCacheKey = dimFilter.getCacheKey(); byte[] aggregatorCacheKey = delegate.getCacheKey(); return ByteBuffer.allocate(1 + filterCacheKey.length + aggregatorCacheKey.length) .put(AggregatorUtil.FILTERED_AGG_CACHE_TYPE_ID) @@ -171,8 +194,8 @@ public class FilteredAggregatorFactory extends AggregatorFactory @Override public AggregatorFactory optimizeForSegment(PerSegmentQueryOptimizationContext optimizationContext) { - if (filter instanceof IntervalDimFilter) { - IntervalDimFilter intervalDimFilter = ((IntervalDimFilter) filter); + if (dimFilter instanceof IntervalDimFilter) { + IntervalDimFilter intervalDimFilter = ((IntervalDimFilter) dimFilter); if (intervalDimFilter.getExtractionFn() != null) { // no support for extraction functions right now return this; @@ -238,7 +261,7 @@ public class FilteredAggregatorFactory extends AggregatorFactory @JsonProperty public DimFilter getFilter() { - return filter; + return dimFilter; } @Override @@ -248,7 +271,7 @@ public class FilteredAggregatorFactory extends AggregatorFactory } @Override - public boolean equals(Object o) + public boolean equals(final Object o) { if (this == o) { return true; @@ -256,16 +279,17 @@ public class FilteredAggregatorFactory extends AggregatorFactory if (o == null || getClass() != o.getClass()) { return false; } - FilteredAggregatorFactory that = (FilteredAggregatorFactory) o; + final FilteredAggregatorFactory that = (FilteredAggregatorFactory) o; return Objects.equals(delegate, that.delegate) && - Objects.equals(filter, that.filter) && + Objects.equals(dimFilter, that.dimFilter) && Objects.equals(name, that.name); } @Override public int hashCode() { - return Objects.hash(delegate, filter, name); + + return Objects.hash(delegate, dimFilter, name); } @Override @@ -273,7 +297,7 @@ public class FilteredAggregatorFactory extends AggregatorFactory { return "FilteredAggregatorFactory{" + "delegate=" + delegate + - ", filter=" + filter + + ", dimFilter=" + dimFilter + ", name='" + name + '\'' + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FilteredVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredVectorAggregator.java new file mode 100644 index 00000000000..40c0490dbb7 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FilteredVectorAggregator.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Arrays; + +public class FilteredVectorAggregator implements VectorAggregator +{ + private final VectorValueMatcher matcher; + private final VectorAggregator delegate; + private final int[] delegatePositions; + + @Nullable + private VectorMatch maskScratch = null; + + public FilteredVectorAggregator( + final VectorValueMatcher matcher, + final VectorAggregator delegate + ) + { + this.matcher = matcher; + this.delegate = delegate; + this.delegatePositions = new int[matcher.getMaxVectorSize()]; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + delegate.init(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final ReadableVectorMatch mask; + + if (startRow == 0) { + mask = VectorMatch.allTrue(endRow); + } else { + if (maskScratch == null) { + maskScratch = VectorMatch.wrap(new int[matcher.getMaxVectorSize()]); + } + + final int maskSize = endRow - startRow; + final int[] maskArray = maskScratch.getSelection(); + for (int i = 0; i < maskSize; i++) { + maskArray[i] = startRow + i; + } + + maskScratch.setSelectionSize(maskSize); + mask = maskScratch; + } + + final ReadableVectorMatch match = matcher.match(mask); + + if (match.isAllTrue(matcher.getCurrentVectorSize())) { + delegate.aggregate(buf, position, startRow, endRow); + } else if (!match.isAllFalse()) { + Arrays.fill(delegatePositions, 0, match.getSelectionSize(), position); + delegate.aggregate(buf, match.getSelectionSize(), delegatePositions, match.getSelection(), 0); + } + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final ReadableVectorMatch match0; + + if (rows == null) { + match0 = VectorMatch.allTrue(numRows); + } else { + match0 = VectorMatch.wrap(rows).setSelectionSize(numRows); + } + + final ReadableVectorMatch match = matcher.match(match0); + final int[] selection = match.getSelection(); + + if (rows == null) { + for (int i = 0; i < match.getSelectionSize(); i++) { + delegatePositions[i] = positions[selection[i]]; + } + } else { + // i iterates over the match; j iterates over the "rows" array + for (int i = 0, j = 0; i < match.getSelectionSize(); i++) { + for (; rows[j] < selection[i]; j++) { + // Do nothing; the for loop is doing the work of incrementing j. + } + + if (rows[j] != selection[i]) { + throw new ISE("Selection contained phantom row[%d]", selection[i]); + } + + delegatePositions[i] = positions[j]; + } + } + + delegate.aggregate(buf, match.getSelectionSize(), delegatePositions, selection, positionOffset); + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return delegate.get(buf, position); + } + + @Override + public void close() + { + delegate.close(); + maskScratch = null; + } + + @Override + public void relocate( + final int oldPosition, + final int newPosition, + final ByteBuffer oldBuffer, + final ByteBuffer newBuffer + ) + { + delegate.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumAggregatorFactory.java index 17a9f9df74c..debe44d518f 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumAggregatorFactory.java @@ -26,6 +26,8 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.segment.BaseFloatColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -61,12 +63,24 @@ public class FloatSumAggregatorFactory extends SimpleFloatAggregatorFactory ); } + @Override + protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnSelectorFactory) + { + return columnSelectorFactory.makeValueSelector(fieldName); + } + @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, BaseFloatColumnValueSelector selector) { return new FloatSumAggregator(selector); } + @Override + public boolean canVectorize() + { + return expression == null; + } + @Override protected BufferAggregator factorizeBuffered( ColumnSelectorFactory metricFactory, @@ -76,6 +90,15 @@ public class FloatSumAggregatorFactory extends SimpleFloatAggregatorFactory return new FloatSumBufferAggregator(selector); } + @Override + protected VectorAggregator factorizeVector( + VectorColumnSelectorFactory columnSelectorFactory, + VectorValueSelector selector + ) + { + return new FloatSumVectorAggregator(selector); + } + @Override @Nullable public Object combine(@Nullable Object lhs, @Nullable Object rhs) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumVectorAggregator.java new file mode 100644 index 00000000000..d5bd54e8a86 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/FloatSumVectorAggregator.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class FloatSumVectorAggregator implements VectorAggregator +{ + private final VectorValueSelector selector; + + public FloatSumVectorAggregator(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + buf.putFloat(position, 0); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final float[] vector = selector.getFloatVector(); + + float sum = 0; + for (int i = startRow; i < endRow; i++) { + sum += vector[i]; + } + + buf.putFloat(position, buf.getFloat(position) + sum); + } + + + @Override + public void aggregate( + final ByteBuffer buf, + final int numPositions, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final float[] vector = selector.getFloatVector(); + + for (int i = 0; i < numPositions; i++) { + final int position = positions[i] + positionOffset; + buf.putFloat(position, buf.getFloat(position) + vector[rows != null ? rows[i] : i]); + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return buf.getFloat(position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/LongSumAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/LongSumAggregatorFactory.java index 0fc2c694137..accfc787f7a 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/LongSumAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/LongSumAggregatorFactory.java @@ -26,6 +26,8 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.segment.BaseLongColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -62,9 +64,9 @@ public class LongSumAggregatorFactory extends SimpleLongAggregatorFactory } @Override - protected Aggregator factorize(ColumnSelectorFactory metricFactory, BaseLongColumnValueSelector selector) + protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnSelectorFactory) { - return new LongSumAggregator(selector); + return columnSelectorFactory.makeValueSelector(fieldName); } @Override @@ -76,6 +78,27 @@ public class LongSumAggregatorFactory extends SimpleLongAggregatorFactory return new LongSumBufferAggregator(selector); } + @Override + protected VectorAggregator factorizeVector( + VectorColumnSelectorFactory columnSelectorFactory, + VectorValueSelector selector + ) + { + return new LongSumVectorAggregator(selector); + } + + @Override + protected Aggregator factorize(ColumnSelectorFactory metricFactory, BaseLongColumnValueSelector selector) + { + return new LongSumAggregator(selector); + } + + @Override + public boolean canVectorize() + { + return expression == null; + } + @Override @Nullable public Object combine(@Nullable Object lhs, @Nullable Object rhs) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/LongSumVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/LongSumVectorAggregator.java new file mode 100644 index 00000000000..24496c7c480 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/LongSumVectorAggregator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class LongSumVectorAggregator implements VectorAggregator +{ + private final VectorValueSelector selector; + + public LongSumVectorAggregator(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + buf.putLong(position, 0); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final long[] vector = selector.getLongVector(); + + long sum = 0; + for (int i = startRow; i < endRow; i++) { + sum += vector[i]; + } + + buf.putLong(position, buf.getLong(position) + sum); + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final long[] vector = selector.getLongVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + buf.putLong(position, buf.getLong(position) + vector[rows != null ? rows[i] : i]); + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return buf.getLong(position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/NoopVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/NoopVectorAggregator.java new file mode 100644 index 00000000000..a57f7a99124 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/NoopVectorAggregator.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class NoopVectorAggregator implements VectorAggregator +{ + private static final NoopVectorAggregator INSTANCE = new NoopVectorAggregator(); + + public static NoopVectorAggregator instance() + { + return INSTANCE; + } + + private NoopVectorAggregator() + { + // Singleton. + } + + @Override + public void init(ByteBuffer buf, int position) + { + // Do nothing. + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + // Do nothing. + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + // Do nothing. + } + + @Override + public Object get(ByteBuffer buf, int position) + { + return null; + } + + @Override + public void close() + { + // Do nothing. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java index 1c7d4b231fe..e88eac3428e 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/NullableAggregatorFactory.java @@ -20,11 +20,14 @@ package org.apache.druid.query.aggregation; +import com.google.common.base.Preconditions; import org.apache.druid.common.config.NullHandling; import org.apache.druid.guice.annotations.ExtensionPoint; import org.apache.druid.segment.BaseNullableColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; /** * Abstract class with functionality to wrap {@link Aggregator}, {@link BufferAggregator} and {@link AggregateCombiner} @@ -35,21 +38,30 @@ import org.apache.druid.segment.ColumnValueSelector; public abstract class NullableAggregatorFactory extends AggregatorFactory { @Override - public final Aggregator factorize(ColumnSelectorFactory metricFactory) + public final Aggregator factorize(ColumnSelectorFactory columnSelectorFactory) { - T selector = selector(metricFactory); - Aggregator aggregator = factorize(metricFactory, selector); + T selector = selector(columnSelectorFactory); + Aggregator aggregator = factorize(columnSelectorFactory, selector); return NullHandling.replaceWithDefault() ? aggregator : new NullableAggregator(aggregator, selector); } @Override - public final BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) + public final BufferAggregator factorizeBuffered(ColumnSelectorFactory columnSelectorFactory) { - T selector = selector(metricFactory); - BufferAggregator aggregator = factorizeBuffered(metricFactory, selector); + T selector = selector(columnSelectorFactory); + BufferAggregator aggregator = factorizeBuffered(columnSelectorFactory, selector); return NullHandling.replaceWithDefault() ? aggregator : new NullableBufferAggregator(aggregator, selector); } + @Override + public final VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) + { + Preconditions.checkState(canVectorize(), "Cannot vectorize"); + VectorValueSelector selector = vectorSelector(columnSelectorFactory); + VectorAggregator aggregator = factorizeVector(columnSelectorFactory, selector); + return NullHandling.replaceWithDefault() ? aggregator : new NullableVectorAggregator(aggregator, selector); + } + @Override public final AggregateCombiner makeNullableAggregateCombiner() { @@ -70,26 +82,59 @@ public abstract class NullableAggregatorFactoryImplementations must not change the position, limit or mark of the given buffer + * + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @param startRow first row of the range within the current batch to aggregate (inclusive) + * @param endRow end row of the range (exclusive) + */ + void aggregate(ByteBuffer buf, int position, int startRow, int endRow); + + /** + * Aggregate a list of rows ("rows") into a list of aggregation slots ("positions"). + * + * Implementations must not change the position, limit or mark of the given buffer + * + * @param buf byte buffer storing the byte array representation of the aggregate + * @param numRows number of rows to aggregate + * @param positions array of aggregate value positions within the buffer; must be at least as long as "numRows" + * @param rows array of row numbers within the current row batch; must be at least as long as "numRows". If + * null, the aggregator will aggregate rows from 0 (inclusive) to numRows (exclusive). + * @param positionOffset an offset to apply to each value from "positions" + */ + void aggregate(ByteBuffer buf, int numRows, int positions[], @Nullable int[] rows, int positionOffset); + + /** + * Same as {@link BufferAggregator#get}. + */ + @Nullable + Object get(ByteBuffer buf, int position); + + /** + * Same as {@link BufferAggregator#relocate}. + */ + default void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + } + + /** + * Release any resources used by the aggregator. + */ + void close(); +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java index 875cd3d8be8..64d70d47459 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.ColumnSelectorPlus; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategy; +import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesBufferAggregator; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import java.nio.ByteBuffer; @@ -33,8 +34,6 @@ public class CardinalityBufferAggregator implements BufferAggregator private final ColumnSelectorPlus[] selectorPluses; private final boolean byRow; - private static final byte[] EMPTY_BYTES = HyperLogLogCollector.makeEmptyVersionedByteArray(); - CardinalityBufferAggregator( ColumnSelectorPlus[] selectorPluses, boolean byRow @@ -47,9 +46,7 @@ public class CardinalityBufferAggregator implements BufferAggregator @Override public void init(ByteBuffer buf, int position) { - final ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - mutationBuffer.put(EMPTY_BYTES); + HyperUniquesBufferAggregator.doInit(buf, position); } @Override @@ -78,11 +75,7 @@ public class CardinalityBufferAggregator implements BufferAggregator @Override public Object get(ByteBuffer buf, int position) { - ByteBuffer dataCopyBuffer = ByteBuffer.allocate(HyperLogLogCollector.getLatestNumBytesForDenseStorage()); - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - mutationBuffer.get(dataCopyBuffer.array()); - return HyperLogLogCollector.makeCollector(dataCopyBuffer); + return HyperUniquesBufferAggregator.doGet(buf, position); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index 27f1650688a..b3ab8c3b4ef 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -33,11 +33,16 @@ import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.NoopAggregator; import org.apache.druid.query.aggregation.NoopBufferAggregator; +import org.apache.druid.query.aggregation.NoopVectorAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.aggregation.cardinality.HyperLogLogCollectorAggregateCombiner; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.NilColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -123,6 +128,23 @@ public class HyperUniquesAggregatorFactory extends AggregatorFactory throw new IAE("Incompatible type for metric[%s], expected a HyperUnique, got a %s", fieldName, classOfObject); } + @Override + public VectorAggregator factorizeVector(final VectorColumnSelectorFactory selectorFactory) + { + final ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); + if (capabilities == null || capabilities.getType() != ValueType.COMPLEX) { + return NoopVectorAggregator.instance(); + } else { + return new HyperUniquesVectorAggregator(selectorFactory.makeObjectSelector(fieldName)); + } + } + + @Override + public boolean canVectorize() + { + return true; + } + @Override public Comparator getComparator() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesBufferAggregator.java index c7f31f4fc9d..3eba440aee6 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesBufferAggregator.java @@ -38,14 +38,31 @@ public class HyperUniquesBufferAggregator implements BufferAggregator this.selector = selector; } - @Override - public void init(ByteBuffer buf, int position) + public static void doInit(ByteBuffer buf, int position) { final ByteBuffer mutationBuffer = buf.duplicate(); mutationBuffer.position(position); mutationBuffer.put(EMPTY_BYTES); } + public static HyperLogLogCollector doGet(ByteBuffer buf, int position) + { + final int size = HyperLogLogCollector.getLatestNumBytesForDenseStorage(); + ByteBuffer dataCopyBuffer = ByteBuffer.allocate(size); + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + mutationBuffer.limit(position + size); + dataCopyBuffer.put(mutationBuffer); + dataCopyBuffer.rewind(); + return HyperLogLogCollector.makeCollector(dataCopyBuffer); + } + + @Override + public void init(ByteBuffer buf, int position) + { + doInit(buf, position); + } + @Override public void aggregate(ByteBuffer buf, int position) { @@ -73,14 +90,7 @@ public class HyperUniquesBufferAggregator implements BufferAggregator @Override public Object get(ByteBuffer buf, int position) { - final int size = HyperLogLogCollector.getLatestNumBytesForDenseStorage(); - ByteBuffer dataCopyBuffer = ByteBuffer.allocate(size); - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - mutationBuffer.limit(position + size); - dataCopyBuffer.put(mutationBuffer); - dataCopyBuffer.rewind(); - return HyperLogLogCollector.makeCollector(dataCopyBuffer); + return doGet(buf, position); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesVectorAggregator.java new file mode 100644 index 00000000000..616b9de6d6a --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesVectorAggregator.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.hyperloglog; + +import com.google.common.base.Preconditions; +import org.apache.druid.hll.HyperLogLogCollector; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class HyperUniquesVectorAggregator implements VectorAggregator +{ + private final VectorObjectSelector selector; + + public HyperUniquesVectorAggregator(final VectorObjectSelector selector) + { + this.selector = Preconditions.checkNotNull(selector, "selector"); + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + HyperUniquesBufferAggregator.doInit(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + // Save position, limit and restore later instead of allocating a new ByteBuffer object + final int oldPosition = buf.position(); + final int oldLimit = buf.limit(); + buf.limit(position + HyperLogLogCollector.getLatestNumBytesForDenseStorage()); + buf.position(position); + + try { + final HyperLogLogCollector collector = HyperLogLogCollector.makeCollector(buf); + final Object[] vector = selector.getObjectVector(); + for (int i = startRow; i < endRow; i++) { + final HyperLogLogCollector otherCollector = (HyperLogLogCollector) vector[i]; + if (otherCollector != null) { + collector.fold(otherCollector); + } + } + } + finally { + buf.limit(oldLimit); + buf.position(oldPosition); + } + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final Object[] vector = selector.getObjectVector(); + + for (int i = 0; i < numRows; i++) { + final HyperLogLogCollector otherCollector = (HyperLogLogCollector) vector[rows != null ? rows[i] : i]; + if (otherCollector == null) { + continue; + } + + final int position = positions[i] + positionOffset; + + // Save position, limit and restore later instead of allocating a new ByteBuffer object + final int oldPosition = buf.position(); + final int oldLimit = buf.limit(); + buf.limit(position + HyperLogLogCollector.getLatestNumBytesForDenseStorage()); + buf.position(position); + + try { + HyperLogLogCollector.makeCollector(buf).fold(otherCollector); + } + finally { + buf.limit(oldLimit); + buf.position(oldPosition); + } + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return HyperUniquesBufferAggregator.doGet(buf, position); + } + + @Override + public void close() + { + // Nothing to close. + } +} diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java index 921402c8da9..ddc499c95e4 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java @@ -25,6 +25,8 @@ import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import javax.annotation.Nullable; import java.util.Arrays; @@ -110,6 +112,24 @@ public class DefaultDimensionSpec implements DimensionSpec return selector; } + @Override + public SingleValueDimensionVectorSelector decorate(final SingleValueDimensionVectorSelector selector) + { + return selector; + } + + @Override + public MultiValueDimensionVectorSelector decorate(final MultiValueDimensionVectorSelector selector) + { + return selector; + } + + @Override + public boolean canVectorize() + { + return true; + } + @Override public boolean mustDecorate() { diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java index bbf970ed2bc..9f3eb2ae031 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java @@ -22,9 +22,12 @@ package org.apache.druid.query.dimension; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.java.util.common.Cacheable; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import javax.annotation.Nullable; @@ -55,10 +58,29 @@ public interface DimensionSpec extends Cacheable DimensionSelector decorate(DimensionSelector selector); + default SingleValueDimensionVectorSelector decorate(SingleValueDimensionVectorSelector selector) + { + throw new UOE("DimensionSpec[%s] cannot vectorize", getClass().getName()); + } + + default MultiValueDimensionVectorSelector decorate(MultiValueDimensionVectorSelector selector) + { + throw new UOE("DimensionSpec[%s] cannot vectorize", getClass().getName()); + } + /** * Does this DimensionSpec require that decorate() be called to produce correct results? */ boolean mustDecorate(); + /** + * Does this DimensionSpec have working {@link #decorate(SingleValueDimensionVectorSelector)} and + * {@link #decorate(MultiValueDimensionVectorSelector)} methods? + */ + default boolean canVectorize() + { + return false; + } + boolean preservesOrdering(); } diff --git a/processing/src/main/java/org/apache/druid/query/dimension/VectorColumnStrategizer.java b/processing/src/main/java/org/apache/druid/query/dimension/VectorColumnStrategizer.java new file mode 100644 index 00000000000..06338d4b93d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/dimension/VectorColumnStrategizer.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.dimension; + +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +/** + * Class that encapsulates knowledge about how to create vector column processors. Used by + * {@link org.apache.druid.segment.DimensionHandlerUtils#makeVectorProcessor}. + */ +public interface VectorColumnStrategizer +{ + T makeSingleValueDimensionStrategy(SingleValueDimensionVectorSelector selector); + + T makeMultiValueDimensionStrategy(MultiValueDimensionVectorSelector selector); + + T makeFloatStrategy(VectorValueSelector selector); + + T makeDoubleStrategy(VectorValueSelector selector); + + T makeLongStrategy(VectorValueSelector selector); +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index a8b7c7f627d..120358f3044 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -20,10 +20,13 @@ package org.apache.druid.query.filter; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.DefaultBitmapResultFactory; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; public interface Filter { @@ -84,6 +87,17 @@ public interface Filter */ ValueMatcher makeMatcher(ColumnSelectorFactory factory); + /** + * Get a VectorValueMatcher that applies this filter to row vectors. + * + * @param factory Object used to create ValueMatchers + * + * @return VectorValueMatcher that applies this filter to row vectors. + */ + default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + throw new UOE("Filter[%s] cannot vectorize", getClass().getName()); + } /** * Indicates whether this filter can return a bitmap index for filtering, based on @@ -107,4 +121,12 @@ public interface Filter * @return true if this Filter supports selectivity estimation, false otherwise. */ boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector); + + /** + * Returns true if this filter can produce a vectorized matcher from its "makeVectorMatcher" method. + */ + default boolean canVectorizeMatcher() + { + return false; + } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java index c2d8511ba1f..eec5ccaa2a4 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java @@ -163,6 +163,12 @@ public class IntervalDimFilter implements DimFilter return result; } + @Override + public String toString() + { + return convertedFilter.toString(); + } + private List> makeIntervalLongs() { List> intervalLongs = new ArrayList<>(); diff --git a/processing/src/main/java/org/apache/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java index e5092753478..73100c611e7 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java @@ -20,20 +20,72 @@ package org.apache.druid.query.filter; import com.google.common.base.Predicate; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.filter.BooleanValueMatcher; +import javax.annotation.Nullable; +import java.util.Objects; + public class StringValueMatcherColumnSelectorStrategy implements ValueMatcherColumnSelectorStrategy { private static final String[] NULL_VALUE = new String[]{null}; private static final ValueGetter NULL_VALUE_GETTER = () -> NULL_VALUE; - @Override - public ValueMatcher makeValueMatcher(final DimensionSelector selector, String value) + private final boolean hasMultipleValues; + + public StringValueMatcherColumnSelectorStrategy(final boolean hasMultipleValues) + { + this.hasMultipleValues = hasMultipleValues; + } + + @Nullable + public static Boolean toBooleanIfPossible( + final DimensionDictionarySelector selector, + final boolean hasMultipleValues, + final Predicate predicate + ) { if (selector.getValueCardinality() == 0) { - return BooleanValueMatcher.of(value == null); + // Column has no values (it doesn't exist, or it's all empty arrays). + // Match if and only if "predicate" matches null. + return predicate.apply(null); + } else if (!hasMultipleValues && selector.getValueCardinality() == 1 && selector.nameLookupPossibleInAdvance()) { + // Every row has the same value. Match if and only if "predicate" matches the possible value. + return predicate.apply(selector.lookupName(0)); + } else { + return null; + } + } + + @Nullable + private static ValueMatcher toBooleanMatcherIfPossible( + final DimensionSelector selector, + final boolean hasMultipleValues, + final Predicate predicate + ) + { + final Boolean booleanValue = StringValueMatcherColumnSelectorStrategy.toBooleanIfPossible( + selector, + hasMultipleValues, + predicate + ); + return booleanValue == null ? null : BooleanValueMatcher.of(booleanValue); + } + + @Override + public ValueMatcher makeValueMatcher(final DimensionSelector selector, final String value) + { + final ValueMatcher booleanMatcher = toBooleanMatcherIfPossible( + selector, + hasMultipleValues, + s -> Objects.equals(s, NullHandling.emptyToNullIfNeeded(value)) + ); + + if (booleanMatcher != null) { + return booleanMatcher; } else { return selector.makeValueMatcher(value); } @@ -46,8 +98,10 @@ public class StringValueMatcherColumnSelectorStrategy implements ValueMatcherCol ) { final Predicate predicate = predicateFactory.makeStringPredicate(); - if (selector.getValueCardinality() == 0) { - return BooleanValueMatcher.of(predicate.apply(null)); + final ValueMatcher booleanMatcher = toBooleanMatcherIfPossible(selector, hasMultipleValues, predicate); + + if (booleanMatcher != null) { + return booleanMatcher; } else { return selector.makeValueMatcher(predicate); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/ValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/ValueMatcher.java index be36c4d760b..a800ec7e39e 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ValueMatcher.java @@ -25,6 +25,11 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseNullableColumnValueSelector; /** + * An object that returns a boolean indicating if the "current" row should be selected or not. The most prominent use + * of this interface is that it is returned by the {@link Filter} "makeMatcher" method, where it is used to identify + * selected rows for filtered cursors and filtered aggregators. + * + * @see org.apache.druid.query.filter.vector.VectorValueMatcher, the vectorized version */ public interface ValueMatcher extends HotLoopCallee { diff --git a/processing/src/main/java/org/apache/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java b/processing/src/main/java/org/apache/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java index edecbbe0a21..2797f082f74 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java @@ -49,7 +49,7 @@ public class ValueMatcherColumnSelectorStrategyFactory ValueType type = capabilities.getType(); switch (type) { case STRING: - return new StringValueMatcherColumnSelectorStrategy(); + return new StringValueMatcherColumnSelectorStrategy(capabilities.hasMultipleValues()); case LONG: return new LongValueMatcherColumnSelectorStrategy(); case FLOAT: diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/BaseVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/BaseVectorValueMatcher.java new file mode 100644 index 00000000000..aea2f143332 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/BaseVectorValueMatcher.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.segment.vector.VectorSizeInspector; + +public abstract class BaseVectorValueMatcher implements VectorValueMatcher +{ + private final VectorSizeInspector selector; + + public BaseVectorValueMatcher(final VectorSizeInspector selector) + { + this.selector = selector; + } + + @Override + public int getCurrentVectorSize() + { + return selector.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return selector.getMaxVectorSize(); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/BooleanVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/BooleanVectorValueMatcher.java new file mode 100644 index 00000000000..65af27b83fc --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/BooleanVectorValueMatcher.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.segment.vector.VectorSizeInspector; + +public class BooleanVectorValueMatcher extends BaseVectorValueMatcher +{ + private final VectorSizeInspector selector; + private final boolean matches; + + private BooleanVectorValueMatcher(final VectorSizeInspector selector, final boolean matches) + { + super(selector); + this.selector = selector; + this.matches = matches; + } + + public static BooleanVectorValueMatcher of(final VectorSizeInspector selector, final boolean matches) + { + return new BooleanVectorValueMatcher(selector, matches); + } + + @Override + public int getCurrentVectorSize() + { + return selector.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return selector.getCurrentVectorSize(); + } + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + if (matches) { + assert mask.isValid(mask); + return mask; + } else { + return VectorMatch.allFalse(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java new file mode 100644 index 00000000000..c304a117551 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.DruidDoublePredicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; + +public class DoubleVectorValueMatcher implements VectorValueMatcherFactory +{ + private final VectorValueSelector selector; + + public DoubleVectorValueMatcher(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final Double matchVal = DimensionHandlerUtils.convertObjectToDouble(value); + + if (matchVal == null) { + return BooleanVectorValueMatcher.of(selector, false); + } + + final double matchValDouble = matchVal; + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final double[] vector = selector.getDoubleVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (vector[rowNum] == matchValDouble) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + final DruidDoublePredicate predicate = predicateFactory.makeDoublePredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final double[] vector = selector.getDoubleVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.applyDouble(vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java new file mode 100644 index 00000000000..4ea33aecb5f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.DruidFloatPredicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; + +public class FloatVectorValueMatcher implements VectorValueMatcherFactory +{ + private final VectorValueSelector selector; + + public FloatVectorValueMatcher(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final Float matchVal = DimensionHandlerUtils.convertObjectToFloat(value); + + if (matchVal == null) { + return BooleanVectorValueMatcher.of(selector, false); + } + + final float matchValFloat = matchVal; + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final float[] vector = selector.getFloatVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (vector[rowNum] == matchValFloat) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + final DruidFloatPredicate predicate = predicateFactory.makeFloatPredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final float[] vector = selector.getFloatVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.applyFloat(vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java new file mode 100644 index 00000000000..a07f9ba32f0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.DruidLongPredicate; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; + +public class LongVectorValueMatcher implements VectorValueMatcherFactory +{ + private final VectorValueSelector selector; + + public LongVectorValueMatcher(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final Long matchVal = DimensionHandlerUtils.convertObjectToLong(value); + + if (matchVal == null) { + return BooleanVectorValueMatcher.of(selector, false); + } + + final long matchValLong = matchVal; + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final long[] vector = selector.getLongVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (vector[rowNum] == matchValLong) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + final DruidLongPredicate predicate = predicateFactory.makeLongPredicate(); + + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final long[] vector = selector.getLongVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.applyLong(vector[rowNum])) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java new file mode 100644 index 00000000000..d192673784d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/MultiValueStringVectorValueMatcher.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Predicate; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; + +import javax.annotation.Nullable; +import java.util.BitSet; +import java.util.Objects; + +public class MultiValueStringVectorValueMatcher implements VectorValueMatcherFactory +{ + private final MultiValueDimensionVectorSelector selector; + + public MultiValueStringVectorValueMatcher(final MultiValueDimensionVectorSelector selector) + { + this.selector = selector; + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final String etnValue = NullHandling.emptyToNullIfNeeded(NullHandling.emptyToNullIfNeeded(value)); + final IdLookup idLookup = selector.idLookup(); + final int id; + + if (idLookup != null) { + // Optimization when names can be looked up to IDs ahead of time. + id = idLookup.lookupId(etnValue); + + if (id < 0) { + // Value doesn't exist in this column. + return BooleanVectorValueMatcher.of(selector, false); + } + + // Check for "id". + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final IndexedInts[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + final IndexedInts ints = vector[rowNum]; + final int n = ints.size(); + + if (n == 0) { + // null should match empty rows in multi-value columns + if (etnValue == null) { + selection[numRows++] = rowNum; + } + } else { + for (int j = 0; j < n; j++) { + if (ints.get(j) == id) { + selection[numRows++] = rowNum; + break; + } + } + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } else { + return makeMatcher(s -> Objects.equals(s, etnValue)); + } + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + return makeMatcher(predicateFactory.makeStringPredicate()); + } + + private VectorValueMatcher makeMatcher(final Predicate predicate) + { + final boolean matchNull = predicate.apply(null); + + if (selector.getValueCardinality() > 0) { + final BitSet checkedIds = new BitSet(selector.getValueCardinality()); + final BitSet matchingIds = new BitSet(selector.getValueCardinality()); + + // Lazy matcher; only check an id if matches() is called. + return new BaseVectorValueMatcher(selector) + { + private final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final IndexedInts[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + final IndexedInts ints = vector[rowNum]; + final int n = ints.size(); + + if (n == 0) { + // null should match empty rows in multi-value columns + if (matchNull) { + selection[numRows++] = rowNum; + } + } else { + for (int j = 0; j < n; j++) { + final int id = ints.get(j); + final boolean matches; + + if (checkedIds.get(id)) { + matches = matchingIds.get(id); + } else { + matches = predicate.apply(selector.lookupName(id)); + checkedIds.set(id); + if (matches) { + matchingIds.set(id); + } + } + + if (matches) { + selection[numRows++] = rowNum; + break; + } + } + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } else { + // Evaluate "lookupName" and "predicate" on every row. + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final IndexedInts[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + final IndexedInts ints = vector[rowNum]; + final int n = ints.size(); + + if (n == 0) { + // null should match empty rows in multi-value columns + if (matchNull) { + selection[numRows++] = rowNum; + } + } else { + for (int j = 0; j < n; j++) { + final int id = ints.get(j); + if (predicate.apply(selector.lookupName(id))) { + selection[numRows++] = rowNum; + break; + } + } + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java b/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java new file mode 100644 index 00000000000..88cbbf3aaa0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/ReadableVectorMatch.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import javax.annotation.Nullable; + +/** + * The result of calling {@link VectorValueMatcher#match}. + * + * @see VectorMatch, the implementation, which also adds some extra mutation methods. + */ +public interface ReadableVectorMatch +{ + /** + * Returns an array of indexes into the current batch. Only the first "getSelectionSize" are valid. + * + * Even though this array is technically mutable, it is very poor form to mutate it if you are not the owner of the + * VectorMatch object. The reason we use a mutable array here instead of positional getter methods, by the way, is in + * the hopes of keeping access to the selection vector as low-level and optimizable as possible. Potential + * optimizations could include making it easier for the JVM to use CPU-level vectorization, avoid method calls, etc. + */ + int[] getSelection(); + + /** + * Returns the number of valid values in the array from "getSelection". + */ + int getSelectionSize(); + + /** + * Checks if this match has accepted every row in the vector. + * + * @param vectorSize the current vector size; must be passed in since VectorMatch objects do not "know" the size + * of the vector they came from. + */ + boolean isAllTrue(int vectorSize); + + /** + * Checks if this match has accepted *nothing*. + */ + boolean isAllFalse(); + + /** + * Checks if this match is valid (increasing row numbers, no out-of-range row numbers). Can additionally verify + * that the match is a subset of a provided "mask". + * + * Used by assertions and tests. + * + * @param mask if provided, checks if this match is a subset of the mask. + */ + boolean isValid(@Nullable ReadableVectorMatch mask); +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java new file mode 100644 index 00000000000..6ed7c16c368 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/SingleValueStringVectorValueMatcher.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Predicate; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.query.filter.StringValueMatcherColumnSelectorStrategy; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; + +import javax.annotation.Nullable; +import java.util.BitSet; +import java.util.Objects; + +public class SingleValueStringVectorValueMatcher implements VectorValueMatcherFactory +{ + private final SingleValueDimensionVectorSelector selector; + + public SingleValueStringVectorValueMatcher(final SingleValueDimensionVectorSelector selector) + { + this.selector = selector; + } + + @Nullable + private static BooleanVectorValueMatcher toBooleanMatcherIfPossible( + final SingleValueDimensionVectorSelector selector, + final Predicate predicate + ) + { + final Boolean booleanValue = StringValueMatcherColumnSelectorStrategy.toBooleanIfPossible( + selector, + false, + predicate + ); + + return booleanValue == null ? null : BooleanVectorValueMatcher.of(selector, booleanValue); + } + + @Override + public VectorValueMatcher makeMatcher(@Nullable final String value) + { + final String etnValue = NullHandling.emptyToNullIfNeeded(value); + + final VectorValueMatcher booleanMatcher = toBooleanMatcherIfPossible(selector, s -> Objects.equals(s, etnValue)); + if (booleanMatcher != null) { + return booleanMatcher; + } + + final IdLookup idLookup = selector.idLookup(); + final int id; + + if (idLookup != null) { + // Optimization when names can be looked up to IDs ahead of time. + id = idLookup.lookupId(etnValue); + + if (id < 0) { + // Value doesn't exist in this column. + return BooleanVectorValueMatcher.of(selector, false); + } + + // Check for "id". + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final int[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (vector[rowNum] == id) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } else { + return makeMatcher(s -> Objects.equals(s, etnValue)); + } + } + + @Override + public VectorValueMatcher makeMatcher(final DruidPredicateFactory predicateFactory) + { + return makeMatcher(predicateFactory.makeStringPredicate()); + } + + private VectorValueMatcher makeMatcher(final Predicate predicate) + { + final VectorValueMatcher booleanMatcher = toBooleanMatcherIfPossible(selector, predicate); + if (booleanMatcher != null) { + return booleanMatcher; + } + + if (selector.getValueCardinality() > 0) { + final BitSet checkedIds = new BitSet(selector.getValueCardinality()); + final BitSet matchingIds = new BitSet(selector.getValueCardinality()); + + // Lazy matcher; only check an id if matches() is called. + return new BaseVectorValueMatcher(selector) + { + private final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final int[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + final int id = vector[rowNum]; + final boolean matches; + + if (checkedIds.get(id)) { + matches = matchingIds.get(id); + } else { + matches = predicate.apply(selector.lookupName(id)); + checkedIds.set(id); + if (matches) { + matchingIds.set(id); + } + } + + if (matches) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } else { + // Evaluate "lookupName" and "predicate" on every row. + return new BaseVectorValueMatcher(selector) + { + final VectorMatch match = VectorMatch.wrap(new int[selector.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final int[] vector = selector.getRowVector(); + final int[] selection = match.getSelection(); + + int numRows = 0; + + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int rowNum = mask.getSelection()[i]; + if (predicate.apply(selector.lookupName(vector[rowNum]))) { + selection[numRows++] = rowNum; + } + } + + match.setSelectionSize(numRows); + assert match.isValid(mask); + return match; + } + }; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorMatch.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorMatch.java new file mode 100644 index 00000000000..54a69476654 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorMatch.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import com.google.common.base.Preconditions; +import org.apache.druid.segment.QueryableIndexStorageAdapter; + +import javax.annotation.Nullable; + +/** + * Implementation class for ReadableVectorMatch. + * + * Also adds some useful methods, like "addAll", "removeAll", and "copyFrom". + */ +public class VectorMatch implements ReadableVectorMatch +{ + private static final int[] DEFAULT_ALL_TRUE_VECTOR = new int[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + + private static final VectorMatch ALL_FALSE = new VectorMatch(new int[0], 0); + + static { + for (int i = 0; i < DEFAULT_ALL_TRUE_VECTOR.length; i++) { + DEFAULT_ALL_TRUE_VECTOR[i] = i; + } + } + + private final int[] selection; + private int selectionSize; + + private VectorMatch(final int[] selection, final int selectionSize) + { + this.selection = selection; + this.selectionSize = selectionSize; + } + + /** + * Creates a match that matches everything up to "numRows". This will often be the current vector size, but + * does not necessarily have to be. + */ + public static ReadableVectorMatch allTrue(final int numRows) + { + if (numRows <= DEFAULT_ALL_TRUE_VECTOR.length) { + return new VectorMatch(DEFAULT_ALL_TRUE_VECTOR, numRows); + } else { + final int[] selection = new int[numRows]; + + for (int i = 0; i < numRows; i++) { + selection[i] = i; + } + + return new VectorMatch(selection, numRows); + } + } + + /** + * Creates a match that matches nothing. + */ + public static ReadableVectorMatch allFalse() + { + return ALL_FALSE; + } + + /** + * Creates a new match object with selectionSize = 0, and the provided array as a backing array. + */ + public static VectorMatch wrap(final int[] selection) + { + return new VectorMatch(selection, 0); + } + + @Override + public boolean isAllTrue(final int vectorSize) + { + return selectionSize == vectorSize; + } + + @Override + public boolean isAllFalse() + { + return selectionSize == 0; + } + + @Override + public boolean isValid(@Nullable final ReadableVectorMatch mask) + { + if (mask != null && !mask.isValid(null)) { + // Invalid mask. + return false; + } + + // row numbers must be increasing. + int rowNum = -1; + for (int i = 0; i < selectionSize; i++) { + if (selection[i] > rowNum) { + rowNum = selection[i]; + } else { + return false; + } + } + + // row number cannot be larger than the max length of the selection vector. + if (rowNum > selection.length) { + return false; + } + + // row numbers must all be present in the mask, if it exists. + if (mask != null) { + final int[] maskArray = mask.getSelection(); + for (int i = 0, j = 0; i < selectionSize; i++) { + while (j < mask.getSelectionSize() && selection[i] > maskArray[j]) { + j++; + } + + if (j >= mask.getSelectionSize() || selection[i] != maskArray[j]) { + return false; + } + } + } + + return true; + } + + /** + * Removes all rows from this object that occur in "other", in place, and returns a reference to this object. Does + * not modify "other". + */ + public VectorMatch removeAll(final ReadableVectorMatch other) + { + //noinspection ObjectEquality + Preconditions.checkState(this != other, "'other' must be a different instance from 'this'"); + + int i = 0; // reading position in this.selection + int j = 0; // writing position in this.selection + int p = 0; // position in otherSelection + final int[] otherSelection = other.getSelection(); + for (; i < selectionSize; i++) { + while (p < other.getSelectionSize() && otherSelection[p] < selection[i]) { + // Other value < selection[i], keep reading in other so we can see if selection[i] should be preserved or not. + p++; + } + + if (!(p < other.getSelectionSize() && otherSelection[p] == selection[i])) { + // Preserve selection[i]. + selection[j++] = selection[i]; + } + } + selectionSize = j; + assert isValid(null); + return this; + } + + /** + * Adds all rows from "other" to this object, using "scratch" as scratch space if needed. Does not modify "other". + * Returns a reference to this object. + */ + public VectorMatch addAll(final ReadableVectorMatch other, final VectorMatch scratch) + { + //noinspection ObjectEquality + Preconditions.checkState(this != scratch, "'scratch' must be a different instance from 'this'"); + //noinspection ObjectEquality + Preconditions.checkState(other != scratch, "'scratch' must be a different instance from 'other'"); + + final int[] scratchSelection = scratch.getSelection(); + final int[] otherSelection = other.getSelection(); + + int i = 0; // this.selection pointer + int j = 0; // otherSelection pointer + int k = 0; // scratchSelection pointer + + for (; i < selectionSize; i++) { + while (j < other.getSelectionSize() && otherSelection[j] < selection[i]) { + scratchSelection[k++] = otherSelection[j++]; + } + + scratchSelection[k++] = selection[i]; + + if (j < other.getSelectionSize() && otherSelection[j] == selection[i]) { + j++; + } + } + + while (j < other.getSelectionSize()) { + scratchSelection[k++] = otherSelection[j++]; + } + + scratch.setSelectionSize(k); + copyFrom(scratch); + assert isValid(null); + return this; + } + + /** + * Copies "other" into this object, and returns a reference to this object. Does not modify "other". + */ + public VectorMatch copyFrom(final ReadableVectorMatch other) + { + Preconditions.checkState( + selection.length >= other.getSelectionSize(), + "Capacity[%s] cannot fit other match's selectionSize[%s]", + selection.length, + other.getSelectionSize() + ); + System.arraycopy(other.getSelection(), 0, selection, 0, other.getSelectionSize()); + selectionSize = other.getSelectionSize(); + assert isValid(null); + return this; + } + + @Override + public int[] getSelection() + { + return selection; + } + + @Override + public int getSelectionSize() + { + return selectionSize; + } + + /** + * Sets the valid selectionSize, and returns a reference to this object. + */ + public VectorMatch setSelectionSize(final int newSelectionSize) + { + Preconditions.checkArgument( + newSelectionSize <= selection.length, + "Oops! Cannot setSelectionSize[%s] > selection.length[%s].", + newSelectionSize, + selection.length + ); + this.selectionSize = newSelectionSize; + assert isValid(null); + return this; + } + + @Override + public String toString() + { + final StringBuilder retVal = new StringBuilder("["); + for (int i = 0; i < selectionSize; i++) { + if (i > 0) { + retVal.append(", "); + } + retVal.append(selection[i]); + } + retVal.append("]"); + return retVal.toString(); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java new file mode 100644 index 00000000000..242166115b9 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcher.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.Filter; +import org.apache.druid.segment.vector.VectorSizeInspector; + +/** + * An object that returns a boolean indicating if the "current" row should be selected or not. The most prominent use + * of this interface is that it is returned by the {@link Filter} "makeVectorMatcher" method, where it is used to + * identify selected rows for filtered cursors and filtered aggregators. + * + * @see org.apache.druid.query.filter.ValueMatcher, the non-vectorized version + */ +public interface VectorValueMatcher extends VectorSizeInspector +{ + /** + * Examine the current vector and return a match indicating what is accepted. + * + * @param mask must not be null; use {@link VectorMatch#allTrue} if you don't need a mask. + * + * @return the subset of "mask" that this value matcher accepts + */ + ReadableVectorMatch match(ReadableVectorMatch mask); +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnStrategizer.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnStrategizer.java new file mode 100644 index 00000000000..7b970e20ce4 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnStrategizer.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.dimension.VectorColumnStrategizer; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +public class VectorValueMatcherColumnStrategizer implements VectorColumnStrategizer +{ + private static final VectorValueMatcherColumnStrategizer INSTANCE = new VectorValueMatcherColumnStrategizer(); + + private VectorValueMatcherColumnStrategizer() + { + // Singleton. + } + + public static VectorValueMatcherColumnStrategizer instance() + { + return INSTANCE; + } + + @Override + public VectorValueMatcherFactory makeSingleValueDimensionStrategy( + final SingleValueDimensionVectorSelector selector + ) + { + return new SingleValueStringVectorValueMatcher(selector); + } + + @Override + public VectorValueMatcherFactory makeMultiValueDimensionStrategy( + final MultiValueDimensionVectorSelector selector + ) + { + return new MultiValueStringVectorValueMatcher(selector); + } + + @Override + public VectorValueMatcherFactory makeFloatStrategy(final VectorValueSelector selector) + { + return new FloatVectorValueMatcher(selector); + } + + @Override + public VectorValueMatcherFactory makeDoubleStrategy(final VectorValueSelector selector) + { + return new DoubleVectorValueMatcher(selector); + } + + @Override + public VectorValueMatcherFactory makeLongStrategy(final VectorValueSelector selector) + { + return new LongVectorValueMatcher(selector); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java new file mode 100644 index 00000000000..a7971eb7430 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/VectorValueMatcherFactory.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.apache.druid.query.filter.DruidPredicateFactory; + +import javax.annotation.Nullable; + +public interface VectorValueMatcherFactory +{ + VectorValueMatcher makeMatcher(@Nullable String value); + + VectorValueMatcher makeMatcher(DruidPredicateFactory predicateFactory); +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java index 8ab4601b09a..29ec5ec332c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java @@ -1077,6 +1077,7 @@ public class GroupByQuery extends BaseQuery ", aggregatorSpecs=" + aggregatorSpecs + ", postAggregatorSpecs=" + postAggregatorSpecs + ", havingSpec=" + havingSpec + + ", context=" + getContext() + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java index 2ec00256095..13107fba68e 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java @@ -42,6 +42,7 @@ public class GroupByQueryConfig private static final String CTX_KEY_FORCE_HASH_AGGREGATION = "forceHashAggregation"; private static final String CTX_KEY_INTERMEDIATE_COMBINE_DEGREE = "intermediateCombineDegree"; private static final String CTX_KEY_NUM_PARALLEL_COMBINE_THREADS = "numParallelCombineThreads"; + private static final String CTX_KEY_VECTORIZE = "vectorize"; @JsonProperty private String defaultStrategy = GroupByStrategySelector.STRATEGY_V2; @@ -88,6 +89,9 @@ public class GroupByQueryConfig @JsonProperty private int numParallelCombineThreads = 1; + @JsonProperty + private boolean vectorize = false; + public String getDefaultStrategy() { return defaultStrategy; @@ -168,6 +172,11 @@ public class GroupByQueryConfig return numParallelCombineThreads; } + public boolean isVectorize() + { + return vectorize; + } + public boolean isForcePushDownNestedQuery() { return forcePushDownNestedQuery; @@ -203,7 +212,10 @@ public class GroupByQueryConfig getMaxOnDiskStorage() ); newConfig.maxMergingDictionarySize = Math.min( - ((Number) query.getContextValue(CTX_KEY_MAX_MERGING_DICTIONARY_SIZE, getMaxMergingDictionarySize())).longValue(), + ((Number) query.getContextValue( + CTX_KEY_MAX_MERGING_DICTIONARY_SIZE, + getMaxMergingDictionarySize() + )).longValue(), getMaxMergingDictionarySize() ); newConfig.forcePushDownLimit = query.getContextBoolean(CTX_KEY_FORCE_LIMIT_PUSH_DOWN, isForcePushDownLimit()); @@ -217,6 +229,7 @@ public class GroupByQueryConfig CTX_KEY_NUM_PARALLEL_COMBINE_THREADS, getNumParallelCombineThreads() ); + newConfig.vectorize = query.getContextBoolean(CTX_KEY_VECTORIZE, isVectorize()); return newConfig; } @@ -237,6 +250,7 @@ public class GroupByQueryConfig ", forceHashAggregation=" + forceHashAggregation + ", intermediateCombineDegree=" + intermediateCombineDegree + ", numParallelCombineThreads=" + numParallelCombineThreads + + ", vectorize=" + vectorize + ", forcePushDownNestedQuery=" + forcePushDownNestedQuery + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryEngine.java index c8dcef3857c..1383b202d35 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryEngine.java @@ -44,6 +44,7 @@ import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.Filter; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.column.ValueType; @@ -331,7 +332,7 @@ public class GroupByQueryEngine } final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(dimSpec); - if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN) { + if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN) { throw new UnsupportedOperationException( "GroupBy v1 does not support dimension selectors with unknown cardinality."); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/RowBasedColumnSelectorFactory.java index 081b48191de..34e5596b1e8 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/RowBasedColumnSelectorFactory.java @@ -31,6 +31,7 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IdLookup; @@ -242,7 +243,7 @@ public class RowBasedColumnSelectorFactory implements ColumnSelectorFactory @Override public int getValueCardinality() { - return DimensionSelector.CARDINALITY_UNKNOWN; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index 8ca48ebb949..aae2e4b3a3c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -22,8 +22,7 @@ package org.apache.druid.query.groupby.epinephelinae; import com.google.common.base.Supplier; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import java.nio.ByteBuffer; @@ -35,8 +34,8 @@ public abstract class AbstractBufferHashGrouper implements Grouper bufferSupplier; protected final KeySerde keySerde; protected final int keySize; - protected final BufferAggregator[] aggregators; - protected final int[] aggregatorOffsets; + protected final AggregatorAdapters aggregators; + protected final int baseAggregatorOffset; protected final int bufferGrouperMaxSize; // Integer.MAX_VALUE in production, only used for unit tests // The load factor and bucket configurations are not final, to allow subclasses to set their own values @@ -53,15 +52,16 @@ public abstract class AbstractBufferHashGrouper implements Grouper bufferSupplier, final KeySerde keySerde, - final AggregatorFactory[] aggregatorFactories, + final AggregatorAdapters aggregators, + final int baseAggregatorOffset, final int bufferGrouperMaxSize ) { this.bufferSupplier = bufferSupplier; this.keySerde = keySerde; this.keySize = keySerde.keySize(); - this.aggregators = new BufferAggregator[aggregatorFactories.length]; - this.aggregatorOffsets = new int[aggregatorFactories.length]; + this.aggregators = aggregators; + this.baseAggregatorOffset = baseAggregatorOffset; this.bufferGrouperMaxSize = bufferGrouperMaxSize; } @@ -77,8 +77,9 @@ public abstract class AbstractBufferHashGrouper implements Grouper implements Grouper implements Grouper {}); if (bucket < 0) { // This may just trigger a spill and get ignored, which is ok. If it bubbles up to the user, the message will // be correct. - return Groupers.HASH_TABLE_FULL; + return Groupers.hashTableFull(0); } final int bucketStartOffset = hashTable.getOffsetForBucket(bucket); @@ -149,10 +150,7 @@ public abstract class AbstractBufferHashGrouper implements Grouper implements Grouper implements Grouper bucketEntryForOffset(final int bucketOffset) { final ByteBuffer tableBuffer = hashTable.getTableBuffer(); final KeyType key = keySerde.fromByteBuffer(tableBuffer, bucketOffset + HASH_SIZE); - final Object[] values = new Object[aggregators.length]; - for (int i = 0; i < aggregators.length; i++) { - values[i] = aggregators[i].get(tableBuffer, bucketOffset + aggregatorOffsets[i]); + final Object[] values = new Object[aggregators.size()]; + for (int i = 0; i < aggregators.size(); i++) { + values[i] = aggregators.get(tableBuffer, bucketOffset + baseAggregatorOffset, i); } return new Entry<>(key, values); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AggregateResult.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AggregateResult.java index f1bd4931bda..7ddd5528af9 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AggregateResult.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AggregateResult.java @@ -19,13 +19,19 @@ package org.apache.druid.query.groupby.epinephelinae; +import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.ISE; + +import javax.annotation.Nullable; import java.util.Objects; public class AggregateResult { - private static final AggregateResult OK = new AggregateResult(true, null); + private static final AggregateResult OK = new AggregateResult(0, null); - private final boolean ok; + private final int count; + + @Nullable private final String reason; public static AggregateResult ok() @@ -33,29 +39,47 @@ public class AggregateResult return OK; } - public static AggregateResult failure(final String reason) + public static AggregateResult partial(final int count, final String reason) { - return new AggregateResult(false, reason); + return new AggregateResult(count, Preconditions.checkNotNull(reason, "reason")); } - private AggregateResult(final boolean ok, final String reason) + private AggregateResult(final int count, @Nullable final String reason) { - this.ok = ok; + Preconditions.checkArgument(count >= 0, "count >= 0"); + this.count = count; this.reason = reason; } + /** + * True if all rows have been processed. + */ public boolean isOk() { - return ok; + return reason == null; } + public int getCount() + { + if (isOk()) { + throw new ISE("Cannot call getCount when isOk = true"); + } + + return count; + } + + @Nullable public String getReason() { + if (isOk()) { + throw new ISE("Cannot call getReason when isOk = true"); + } + return reason; } @Override - public boolean equals(final Object o) + public boolean equals(Object o) { if (this == o) { return true; @@ -63,22 +87,22 @@ public class AggregateResult if (o == null || getClass() != o.getClass()) { return false; } - final AggregateResult that = (AggregateResult) o; - return ok == that.ok && + AggregateResult that = (AggregateResult) o; + return count == that.count && Objects.equals(reason, that.reason); } @Override public int hashCode() { - return Objects.hash(ok, reason); + return Objects.hash(count, reason); } @Override public String toString() { return "AggregateResult{" + - "ok=" + ok + + "count=" + count + ", reason='" + reason + '\'' + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java index 7994996bcf6..7fd34bf2288 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java @@ -21,14 +21,14 @@ package org.apache.druid.query.groupby.epinephelinae; import com.google.common.base.Preconditions; import com.google.common.base.Supplier; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.groupby.epinephelinae.column.GroupByColumnSelectorStrategy; -import org.apache.druid.segment.ColumnSelectorFactory; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.NoSuchElementException; @@ -47,13 +47,10 @@ import java.util.NoSuchElementException; * different segments cannot be currently retrieved, this grouper can be used only when performing per-segment query * execution. */ -public class BufferArrayGrouper implements IntGrouper +public class BufferArrayGrouper implements VectorGrouper, IntGrouper { - private static final Logger LOG = new Logger(BufferArrayGrouper.class); - private final Supplier bufferSupplier; - private final BufferAggregator[] aggregators; - private final int[] aggregatorOffsets; + private final AggregatorAdapters aggregators; private final int cardinalityWithMissingValue; private final int recordSize; // size of all aggregated values @@ -61,6 +58,10 @@ public class BufferArrayGrouper implements IntGrouper private ByteBuffer usedFlagBuffer; private ByteBuffer valBuffer; + // Scratch objects used by aggregateVector(). Only set if initVectorized() is called. + private int[] vAggregationPositions = null; + private int[] vAggregationRows = null; + static long requiredBufferCapacity( int cardinality, AggregatorFactory[] aggregatorFactories @@ -72,7 +73,7 @@ public class BufferArrayGrouper implements IntGrouper .sum(); return getUsedFlagBufferCapacity(cardinalityWithMissingValue) + // total used flags size - (long) cardinalityWithMissingValue * recordSize; // total values size + (long) cardinalityWithMissingValue * recordSize; // total values size } /** @@ -86,26 +87,17 @@ public class BufferArrayGrouper implements IntGrouper public BufferArrayGrouper( // the buffer returned from the below supplier can have dirty bits and should be cleared during initialization final Supplier bufferSupplier, - final ColumnSelectorFactory columnSelectorFactory, - final AggregatorFactory[] aggregatorFactories, + final AggregatorAdapters aggregators, final int cardinality ) { - Preconditions.checkNotNull(aggregatorFactories, "aggregatorFactories"); + Preconditions.checkNotNull(aggregators, "aggregators"); Preconditions.checkArgument(cardinality > 0, "Cardinality must a non-zero positive number"); this.bufferSupplier = Preconditions.checkNotNull(bufferSupplier, "bufferSupplier"); - this.aggregators = new BufferAggregator[aggregatorFactories.length]; - this.aggregatorOffsets = new int[aggregatorFactories.length]; + this.aggregators = aggregators; this.cardinalityWithMissingValue = cardinality + 1; - - int offset = 0; - for (int i = 0; i < aggregatorFactories.length; i++) { - aggregators[i] = aggregatorFactories[i].factorizeBuffered(columnSelectorFactory); - aggregatorOffsets[i] = offset; - offset += aggregatorFactories[i].getMaxIntermediateSizeWithNulls(); - } - recordSize = offset; + this.recordSize = aggregators.spaceNeeded(); } @Override @@ -115,6 +107,20 @@ public class BufferArrayGrouper implements IntGrouper final ByteBuffer buffer = bufferSupplier.get(); final int usedFlagBufferEnd = getUsedFlagBufferCapacity(cardinalityWithMissingValue); + + // Sanity check on buffer capacity. + if (usedFlagBufferEnd + (long) cardinalityWithMissingValue * recordSize > buffer.capacity()) { + // Should not happen in production, since we should only select array-based aggregation if we have + // enough scratch space. + throw new ISE( + "Records of size[%,d] and possible cardinality[%,d] exceeds the buffer capacity[%,d].", + recordSize, + cardinalityWithMissingValue, + valBuffer.capacity() + ); + } + + // Slice up the buffer. buffer.position(0); buffer.limit(usedFlagBufferEnd); usedFlagBuffer = buffer.slice(); @@ -129,6 +135,15 @@ public class BufferArrayGrouper implements IntGrouper } } + @Override + public void initVectorized(final int maxVectorSize) + { + init(); + + this.vAggregationPositions = new int[maxVectorSize]; + this.vAggregationRows = new int[maxVectorSize]; + } + @Override public boolean isInitialized() { @@ -136,7 +151,7 @@ public class BufferArrayGrouper implements IntGrouper } @Override - public AggregateResult aggregateKeyHash(int dimIndex) + public AggregateResult aggregateKeyHash(final int dimIndex) { Preconditions.checkArgument( dimIndex >= 0 && dimIndex < cardinalityWithMissingValue, @@ -144,39 +159,62 @@ public class BufferArrayGrouper implements IntGrouper dimIndex ); - final int recordOffset = dimIndex * recordSize; + initializeSlotIfNeeded(dimIndex); + aggregators.aggregateBuffered(valBuffer, dimIndex * recordSize); + return AggregateResult.ok(); + } - if (recordOffset + recordSize > valBuffer.capacity()) { - // This error cannot be recoverd, and the query must fail - throw new ISE( - "A record of size [%d] cannot be written to the array buffer at offset[%d] " - + "because it exceeds the buffer capacity[%d]. Try increasing druid.processing.buffer.sizeBytes", - recordSize, - recordOffset, - valBuffer.capacity() + @Override + public AggregateResult aggregateVector(int[] keySpace, int startRow, int endRow) + { + if (keySpace.length == 0) { + // Empty key space, assume keys are all zeroes. + final int dimIndex = 1; + + initializeSlotIfNeeded(dimIndex); + + aggregators.aggregateVector( + valBuffer, + dimIndex * recordSize, + startRow, + endRow ); - } + } else { + final int numRows = endRow - startRow; - if (!isUsedSlot(dimIndex)) { - initializeSlot(dimIndex); - } + for (int i = 0; i < numRows; i++) { + // +1 matches what hashFunction() would do. + final int dimIndex = keySpace[i] + 1; - for (int i = 0; i < aggregators.length; i++) { - aggregators[i].aggregate(valBuffer, recordOffset + aggregatorOffsets[i]); + if (dimIndex < 0 || dimIndex >= cardinalityWithMissingValue) { + throw new IAE("Invalid dimIndex[%s]", dimIndex); + } + + vAggregationPositions[i] = dimIndex * recordSize; + + initializeSlotIfNeeded(dimIndex); + } + + aggregators.aggregateVector( + valBuffer, + numRows, + vAggregationPositions, + Groupers.writeAggregationRows(vAggregationRows, startRow, endRow) + ); } return AggregateResult.ok(); } - private void initializeSlot(int dimIndex) + private void initializeSlotIfNeeded(int dimIndex) { final int index = dimIndex / Byte.SIZE; final int extraIndex = dimIndex % Byte.SIZE; - usedFlagBuffer.put(index, (byte) (usedFlagBuffer.get(index) | (1 << extraIndex))); + final int usedFlagByte = 1 << extraIndex; - final int recordOffset = dimIndex * recordSize; - for (int i = 0; i < aggregators.length; i++) { - aggregators[i].init(valBuffer, recordOffset + aggregatorOffsets[i]); + if ((usedFlagBuffer.get(index) & usedFlagByte) == 0) { + usedFlagBuffer.put(index, (byte) (usedFlagBuffer.get(index) | (1 << extraIndex))); + aggregators.init(valBuffer, dimIndex * recordSize); } } @@ -185,6 +223,7 @@ public class BufferArrayGrouper implements IntGrouper final int index = dimIndex / Byte.SIZE; final int extraIndex = dimIndex % Byte.SIZE; final int usedFlagByte = 1 << extraIndex; + return (usedFlagBuffer.get(index) & usedFlagByte) != 0; } @@ -214,14 +253,36 @@ public class BufferArrayGrouper implements IntGrouper @Override public void close() { - for (BufferAggregator aggregator : aggregators) { - try { - aggregator.close(); + aggregators.close(); + } + + @Override + public CloseableIterator> iterator() + { + final CloseableIterator> iterator = iterator(false); + final ByteBuffer keyBuffer = ByteBuffer.allocate(Integer.BYTES); + return new CloseableIterator>() + { + @Override + public boolean hasNext() + { + return iterator.hasNext(); } - catch (Exception e) { - LOG.warn(e, "Could not close aggregator [%s], skipping.", aggregator); + + @Override + public Entry next() + { + final Entry integerEntry = iterator.next(); + keyBuffer.putInt(0, integerEntry.getKey()); + return new Entry<>(keyBuffer, integerEntry.getValues()); } - } + + @Override + public void close() throws IOException + { + iterator.close(); + } + }; } @Override @@ -252,10 +313,10 @@ public class BufferArrayGrouper implements IntGrouper final int current = next; next = findNext(current); - final Object[] values = new Object[aggregators.length]; + final Object[] values = new Object[aggregators.size()]; final int recordOffset = current * recordSize; - for (int i = 0; i < aggregators.length; i++) { - values[i] = aggregators[i].get(valBuffer, recordOffset + aggregatorOffsets[i]); + for (int i = 0; i < aggregators.size(); i++) { + values[i] = aggregators.get(valBuffer, recordOffset, i); } // shift by -1 since values are initially shifted by +1 so they are all positive and // GroupByColumnSelectorStrategy.GROUP_BY_MISSING_VALUE is -1 diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 5f54fad9133..17995798f1a 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -20,11 +20,13 @@ package org.apache.druid.query.groupby.epinephelinae; import com.google.common.base.Supplier; +import org.apache.commons.lang.mutable.MutableInt; import org.apache.druid.java.util.common.CloseableIterators; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.segment.ColumnSelectorFactory; import java.nio.ByteBuffer; import java.util.AbstractList; @@ -32,14 +34,14 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.NoSuchElementException; +import java.util.function.ToIntFunction; -public class BufferHashGrouper extends AbstractBufferHashGrouper +public class BufferHashGrouper extends AbstractBufferHashGrouper implements VectorGrouper { private static final int MIN_INITIAL_BUCKETS = 4; private static final int DEFAULT_INITIAL_BUCKETS = 1024; private static final float DEFAULT_MAX_LOAD_FACTOR = 0.7f; - private final AggregatorFactory[] aggregatorFactories; private ByteBuffer buffer; private boolean initialized = false; @@ -58,19 +60,23 @@ public class BufferHashGrouper extends AbstractBufferHashGrouper bufferSupplier, final KeySerde keySerde, - final ColumnSelectorFactory columnSelectorFactory, - final AggregatorFactory[] aggregatorFactories, + final AggregatorAdapters aggregators, final int bufferGrouperMaxSize, final float maxLoadFactor, final int initialBuckets, final boolean useDefaultSorting ) { - super(bufferSupplier, keySerde, aggregatorFactories, bufferGrouperMaxSize); - this.aggregatorFactories = aggregatorFactories; + super(bufferSupplier, keySerde, aggregators, HASH_SIZE + keySerde.keySize(), bufferGrouperMaxSize); this.maxLoadFactor = maxLoadFactor > 0 ? maxLoadFactor : DEFAULT_MAX_LOAD_FACTOR; this.initialBuckets = initialBuckets > 0 ? Math.max(MIN_INITIAL_BUCKETS, initialBuckets) : DEFAULT_INITIAL_BUCKETS; @@ -79,14 +85,7 @@ public class BufferHashGrouper extends AbstractBufferHashGrouper extends AbstractBufferHashGrouper { + if (aggregationNumRows.intValue() > 0) { + doAggregateVector(aggregationStartRow.intValue(), aggregationNumRows.intValue()); + aggregationStartRow.setValue(aggregationStartRow.intValue() + aggregationNumRows.intValue()); + aggregationNumRows.setValue(0); + } + } + ); + + if (bucket < 0) { + // This may just trigger a spill and get ignored, which is ok. If it bubbles up to the user, the message will + // be correct. + + // Aggregate any remaining rows. + if (aggregationNumRows.intValue() > 0) { + doAggregateVector(aggregationStartRow.intValue(), aggregationNumRows.intValue()); + } + + return Groupers.hashTableFull(rowNum); + } + + final int bucketStartOffset = hashTable.getOffsetForBucket(bucket); + final boolean bucketWasUsed = hashTable.isBucketUsed(bucket); + + // Set up key and initialize the aggs if this is a new bucket. + if (!bucketWasUsed) { + hashTable.initializeNewBucketKey(bucket, vKeyBuffer, vKeyHashCodes[rowNum]); + aggregators.init(hashTable.getTableBuffer(), bucketStartOffset + baseAggregatorOffset); + } + + // Schedule the current row for aggregation. + vAggregationPositions[aggregationNumRows.intValue()] = bucketStartOffset + Integer.BYTES + keySize; + aggregationNumRows.increment(); + } + + // Aggregate any remaining rows. + if (aggregationNumRows.intValue() > 0) { + doAggregateVector(aggregationStartRow.intValue(), aggregationNumRows.intValue()); + } + + return AggregateResult.ok(); + } + @Override public boolean isInitialized() { return initialized; } + @Override + public ToIntFunction hashFunction() + { + return Groupers::hashObject; + } + @Override public void newBucketHook(int bucketOffset) { + // Nothing needed. } @Override @@ -152,7 +248,7 @@ public class BufferHashGrouper extends AbstractBufferHashGrouper extends AbstractBufferHashGrouper> iterator() + { + // Unchecked cast, since this method is only called through the VectorGrouper interface, which uses + // ByteBuffer keys (and this is verified in initVectorized). + return (CloseableIterator) iterator(false); + } + @Override public CloseableIterator> iterator(boolean sorted) { @@ -201,7 +306,10 @@ public class BufferHashGrouper extends AbstractBufferHashGrouper extends AbstractBufferHashGrouper extends AbstractBufferHashGrouper +{ + private final int keySize; + + public ByteBufferKeySerde(final int keySize) + { + this.keySize = keySize; + } + + @Override + public int keySize() + { + return keySize; + } + + @Override + public Class keyClazz() + { + return ByteBuffer.class; + } + + @Override + public List getDictionary() + { + return ImmutableList.of(); + } + + @Override + public ByteBuffer toByteBuffer(ByteBuffer key) + { + return key; + } + + @Override + public ByteBuffer fromByteBuffer(ByteBuffer buffer, int position) + { + final ByteBuffer dup = buffer.duplicate(); + dup.position(position).limit(position + keySize); + return dup.slice(); + } + + @Override + public Grouper.BufferComparator bufferComparator() + { + // This class is used by segment processing engines, where bufferComparator will not be called. + throw new UnsupportedOperationException(); + } + + @Override + public Grouper.BufferComparator bufferComparatorWithAggregators( + AggregatorFactory[] aggregatorFactories, + int[] aggregatorOffsets + ) + { + // This class is used by segment processing engines, where bufferComparatorWithAggregators will not be called. + throw new UnsupportedOperationException(); + } + + @Override + public void reset() + { + // No state, nothing to reset + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/CloseableGrouperIterator.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/CloseableGrouperIterator.java index 43f3bcb037a..aabb1676778 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/CloseableGrouperIterator.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/CloseableGrouperIterator.java @@ -34,14 +34,13 @@ public class CloseableGrouperIterator implements CloseableIterator grouper, - final boolean sorted, + final CloseableIterator> iterator, final Function, T> transformer, final Closeable closeable ) { this.transformer = transformer; - this.iterator = grouper.iterator(sorted); + this.iterator = iterator; this.closer = Closer.create(); closer.register(iterator); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java index 4bf116e26bf..f5ff7ba627c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java @@ -264,9 +264,12 @@ public class ConcurrentGrouper implements Grouper synchronized (hashBasedGrouper) { if (!spilling) { - if (hashBasedGrouper.aggregate(key, keyHash).isOk()) { + final AggregateResult aggregateResult = hashBasedGrouper.aggregate(key, keyHash); + if (aggregateResult.isOk()) { return AggregateResult.ok(); } else { + // Expecting all-or-nothing behavior. + assert aggregateResult.getCount() == 0; spilling = true; } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 7594b9048c4..a39441d5745 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -22,6 +22,7 @@ package org.apache.druid.query.groupby.epinephelinae; import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import org.apache.druid.collections.NonBlockingPool; import org.apache.druid.collections.ResourceHolder; @@ -34,9 +35,12 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.guava.BaseSequence; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.ColumnSelectorPlus; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.dimension.ColumnSelectorStrategyFactory; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingStringGroupByColumnSelectorStrategy; @@ -47,6 +51,7 @@ import org.apache.druid.query.groupby.epinephelinae.column.GroupByColumnSelector import org.apache.druid.query.groupby.epinephelinae.column.LongGroupByColumnSelectorStrategy; import org.apache.druid.query.groupby.epinephelinae.column.NullableValueGroupByColumnSelectorStrategy; import org.apache.druid.query.groupby.epinephelinae.column.StringGroupByColumnSelectorStrategy; +import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine; import org.apache.druid.query.groupby.strategy.GroupByStrategyV2; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; @@ -54,7 +59,6 @@ import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.StorageAdapter; -import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; @@ -69,6 +73,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.function.Function; public class GroupByQueryEngineV2 { @@ -108,15 +113,6 @@ public class GroupByQueryEngineV2 throw new IAE("Should only have one interval, got[%s]", intervals); } - final Sequence cursors = storageAdapter.makeCursors( - Filters.toFilter(query.getDimFilter()), - intervals.get(0), - query.getVirtualColumns(), - query.getGranularity(), - false, - null - ); - final ResourceHolder bufferHolder = intermediateResultsBufferPool.take(); final String fudgeTimestampString = NullHandling.emptyToNullIfNeeded( @@ -127,6 +123,59 @@ public class GroupByQueryEngineV2 ? null : DateTimes.utc(Long.parseLong(fudgeTimestampString)); + final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter())); + final Interval interval = Iterables.getOnlyElement(query.getIntervals()); + + final boolean doVectorize = QueryContexts.getVectorize(query).shouldVectorize( + VectorGroupByEngine.canVectorize(query, storageAdapter, filter) + ); + + final Sequence result; + + if (doVectorize) { + result = VectorGroupByEngine.process( + query, + storageAdapter, + bufferHolder.get(), + fudgeTimestamp, + filter, + interval, + querySpecificConfig + ); + } else { + result = processNonVectorized( + query, + storageAdapter, + bufferHolder.get(), + fudgeTimestamp, + querySpecificConfig, + filter, + interval + ); + } + + return result.withBaggage(bufferHolder); + } + + private static Sequence processNonVectorized( + final GroupByQuery query, + final StorageAdapter storageAdapter, + final ByteBuffer processingBuffer, + @Nullable final DateTime fudgeTimestamp, + final GroupByQueryConfig querySpecificConfig, + @Nullable final Filter filter, + final Interval interval + ) + { + final Sequence cursors = storageAdapter.makeCursors( + filter, + interval, + query.getVirtualColumns(), + query.getGranularity(), + false, + null + ); + return cursors.flatMap( cursor -> new BaseSequence<>( new BaseSequence.IteratorMaker>() @@ -135,57 +184,42 @@ public class GroupByQueryEngineV2 public GroupByEngineIterator make() { final ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); - final boolean allSingleValueDims = query - .getDimensions() - .stream() - .allMatch(dimension -> { - final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities( - dimension.getDimension() - ); - return columnCapabilities != null && !columnCapabilities.hasMultipleValues(); - }); - - ColumnSelectorPlus[] selectorPlus = DimensionHandlerUtils + final ColumnSelectorPlus[] selectorPlus = DimensionHandlerUtils .createColumnSelectorPluses( STRATEGY_FACTORY, query.getDimensions(), columnSelectorFactory ); - GroupByColumnSelectorPlus[] dims = createGroupBySelectorPlus(selectorPlus); - final ByteBuffer buffer = bufferHolder.get(); + final GroupByColumnSelectorPlus[] dims = createGroupBySelectorPlus(selectorPlus); - // Check if array-based aggregation is applicable - final boolean useArrayAggregation = isArrayAggregateApplicable( + final int cardinalityForArrayAggregation = getCardinalityForArrayAggregation( querySpecificConfig, query, - dims, storageAdapter, - query.getVirtualColumns(), - buffer + processingBuffer ); - if (useArrayAggregation) { + if (cardinalityForArrayAggregation >= 0) { return new ArrayAggregateIterator( query, querySpecificConfig, cursor, - buffer, + processingBuffer, fudgeTimestamp, dims, - allSingleValueDims, - // There must be 0 or 1 dimension if isArrayAggregateApplicable() is true - dims.length == 0 ? 1 : storageAdapter.getDimensionCardinality(dims[0].getName()) + isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions()), + cardinalityForArrayAggregation ); } else { return new HashAggregateIterator( query, querySpecificConfig, cursor, - buffer, + processingBuffer, fudgeTimestamp, dims, - allSingleValueDims + isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions()) ); } } @@ -197,65 +231,91 @@ public class GroupByQueryEngineV2 } } ) - ).withBaggage(bufferHolder); + ); } - private static boolean isArrayAggregateApplicable( + /** + * Returns the cardinality of array needed to do array-based aggregation, or -1 if array-based aggregation + * is impossible. + */ + public static int getCardinalityForArrayAggregation( GroupByQueryConfig querySpecificConfig, GroupByQuery query, - GroupByColumnSelectorPlus[] dims, StorageAdapter storageAdapter, - VirtualColumns virtualColumns, ByteBuffer buffer ) { if (querySpecificConfig.isForceHashAggregation()) { - return false; + return -1; } + final List dimensions = query.getDimensions(); final ColumnCapabilities columnCapabilities; final int cardinality; // Find cardinality - if (dims.length == 0) { + if (dimensions.isEmpty()) { columnCapabilities = null; cardinality = 1; - } else if (dims.length == 1) { + } else if (dimensions.size() == 1) { // Only real columns can use array-based aggregation, since virtual columns cannot currently report their // cardinality. We need to check if a virtual column exists with the same name, since virtual columns can shadow // real columns, and we might miss that since we're going directly to the StorageAdapter (which only knows about // real columns). - if (virtualColumns.exists(dims[0].getName())) { - return false; + if (query.getVirtualColumns().exists(Iterables.getOnlyElement(dimensions).getDimension())) { + return -1; } - columnCapabilities = storageAdapter.getColumnCapabilities(dims[0].getName()); - cardinality = storageAdapter.getDimensionCardinality(dims[0].getName()); + final String columnName = Iterables.getOnlyElement(dimensions).getDimension(); + columnCapabilities = storageAdapter.getColumnCapabilities(columnName); + cardinality = storageAdapter.getDimensionCardinality(columnName); } else { // Cannot use array-based aggregation with more than one dimension. - return false; + return -1; } - // Choose array-based aggregation if the grouping key is a single string dimension of a - // known cardinality - if ((columnCapabilities == null || columnCapabilities.getType().equals(ValueType.STRING)) - && cardinality > 0) { - final AggregatorFactory[] aggregatorFactories = query - .getAggregatorSpecs() - .toArray(new AggregatorFactory[0]); + // Choose array-based aggregation if the grouping key is a single string dimension of a known cardinality + if (columnCapabilities != null && columnCapabilities.getType().equals(ValueType.STRING) && cardinality > 0) { + final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]); final long requiredBufferCapacity = BufferArrayGrouper.requiredBufferCapacity( cardinality, aggregatorFactories ); // Check that all keys and aggregated values can be contained in the buffer - return requiredBufferCapacity <= buffer.capacity(); + return requiredBufferCapacity <= buffer.capacity() ? cardinality : -1; } else { - return false; + return -1; } } - private static class GroupByStrategyFactory implements ColumnSelectorStrategyFactory + /** + * Checks whether all "dimensions" are either single-valued or nonexistent (which is just as good as single-valued, + * since their selectors will show up as full of nulls). + */ + public static boolean isAllSingleValueDims( + final Function capabilitiesFunction, + final List dimensions + ) + { + return dimensions + .stream() + .allMatch( + dimension -> { + if (dimension.mustDecorate()) { + // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. + // To be safe, we must return false here. + return false; + } + + // Now check column capabilities. + final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); + return columnCapabilities == null || !columnCapabilities.hasMultipleValues(); + }); + } + + private static class GroupByStrategyFactory + implements ColumnSelectorStrategyFactory { @Override public GroupByColumnSelectorStrategy makeColumnSelectorStrategy( @@ -311,7 +371,7 @@ public class GroupByQueryEngineV2 final GroupByQueryConfig querySpecificConfig, final Cursor cursor, final ByteBuffer buffer, - final DateTime fudgeTimestamp, + @Nullable final DateTime fudgeTimestamp, final GroupByColumnSelectorPlus[] dims, final boolean allSingleValueDims ) @@ -340,8 +400,7 @@ public class GroupByQueryEngineV2 } return new CloseableGrouperIterator<>( - grouper, - false, + grouper.iterator(false), entry -> { Map theMap = Maps.newLinkedHashMap(); @@ -448,7 +507,7 @@ public class GroupByQueryEngineV2 GroupByQueryConfig querySpecificConfig, Cursor cursor, ByteBuffer buffer, - DateTime fudgeTimestamp, + @Nullable DateTime fudgeTimestamp, GroupByColumnSelectorPlus[] dims, boolean allSingleValueDims ) @@ -467,9 +526,10 @@ public class GroupByQueryEngineV2 return new BufferHashGrouper<>( Suppliers.ofInstance(buffer), keySerde, - cursor.getColumnSelectorFactory(), - query.getAggregatorSpecs() - .toArray(new AggregatorFactory[0]), + AggregatorAdapters.factorizeBuffered( + cursor.getColumnSelectorFactory(), + query.getAggregatorSpecs() + ), querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), @@ -600,7 +660,7 @@ public class GroupByQueryEngineV2 GroupByQueryConfig querySpecificConfig, Cursor cursor, ByteBuffer buffer, - DateTime fudgeTimestamp, + @Nullable DateTime fudgeTimestamp, GroupByColumnSelectorPlus[] dims, boolean allSingleValueDims, int cardinality @@ -622,9 +682,7 @@ public class GroupByQueryEngineV2 { return new BufferArrayGrouper( Suppliers.ofInstance(buffer), - cursor.getColumnSelectorFactory(), - query.getAggregatorSpecs() - .toArray(new AggregatorFactory[0]), + AggregatorAdapters.factorizeBuffered(cursor.getColumnSelectorFactory(), query.getAggregatorSpecs()), cardinality ); } @@ -698,7 +756,7 @@ public class GroupByQueryEngineV2 protected void putToMap(Integer key, Map map) { if (dim != null) { - if (key != -1) { + if (key != GroupByColumnSelectorStrategy.GROUP_BY_MISSING_VALUE) { map.put( dim.getOutputName(), ((DimensionSelector) dim.getSelector()).lookupName(key) @@ -710,7 +768,7 @@ public class GroupByQueryEngineV2 } } - private static void convertRowTypesToOutputTypes(List dimensionSpecs, Map rowMap) + public static void convertRowTypesToOutputTypes(List dimensionSpecs, Map rowMap) { for (DimensionSpec dimSpec : dimensionSpecs) { final ValueType outputType = dimSpec.getOutputType(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java index 05664f516d1..596254ecd23 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java @@ -25,6 +25,7 @@ import com.google.common.base.Preconditions; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.query.aggregation.AggregatorFactory; +import javax.annotation.Nullable; import java.io.Closeable; import java.nio.ByteBuffer; import java.util.Arrays; @@ -36,10 +37,12 @@ import java.util.function.ToIntFunction; * Groupers aggregate metrics from rows that they typically get from a ColumnSelectorFactory, under * grouping keys that some outside driver is passing in. They can also iterate over the grouped * rows after the aggregation is done. - *

+ * * They work sort of like a map of KeyType to aggregated values, except they don't support * random lookups. * + * See {@link VectorGrouper} for a vectorized version. + * * @param type of the key that will be passed in */ public interface Grouper extends Closeable @@ -89,7 +92,7 @@ public interface Grouper extends Closeable default ToIntFunction hashFunction() { - return Groupers::hash; + return Groupers::hashObject; } /** @@ -247,6 +250,7 @@ public interface Grouper extends Closeable * * @return serialized key, or null if we are unable to serialize more keys due to resource limits */ + @Nullable ByteBuffer toByteBuffer(T key); /** diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Groupers.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Groupers.java index d173b04f518..a1d8dbf816e 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Groupers.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Groupers.java @@ -19,6 +19,7 @@ package org.apache.druid.query.groupby.epinephelinae; +import javax.annotation.Nullable; import java.nio.ByteBuffer; public class Groupers @@ -28,17 +29,22 @@ public class Groupers // No instantiation } - static final AggregateResult DICTIONARY_FULL = AggregateResult.failure( + private static final AggregateResult DICTIONARY_FULL_ZERO_COUNT = AggregateResult.partial( + 0, "Not enough dictionary space to execute this query. Try increasing " + "druid.query.groupBy.maxMergingDictionarySize or enable disk spilling by setting " + "druid.query.groupBy.maxOnDiskStorage to a positive number." ); - static final AggregateResult HASH_TABLE_FULL = AggregateResult.failure( + + private static final AggregateResult HASH_TABLE_FULL_ZERO_COUNT = AggregateResult.partial( + 0, "Not enough aggregation buffer space to execute this query. Try increasing " + "druid.processing.buffer.sizeBytes or enable disk spilling by setting " + "druid.query.groupBy.maxOnDiskStorage to a positive number." ); + private static final int USED_FLAG_MASK = 0x7fffffff; + private static final int C1 = 0xcc9e2d51; private static final int C2 = 0x1b873593; @@ -50,18 +56,46 @@ public class Groupers * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author * hereby disclaims copyright to this source code. */ - static int smear(int hashCode) + private static int smear(int hashCode) { return C2 * Integer.rotateLeft(hashCode * C1, 15); } - public static int hash(final Object obj) + public static AggregateResult dictionaryFull(final int count) + { + if (count == 0) { + return DICTIONARY_FULL_ZERO_COUNT; + } else { + return AggregateResult.partial(count, DICTIONARY_FULL_ZERO_COUNT.getReason()); + } + } + + public static AggregateResult hashTableFull(final int count) + { + if (count == 0) { + return HASH_TABLE_FULL_ZERO_COUNT; + } else { + return AggregateResult.partial(count, HASH_TABLE_FULL_ZERO_COUNT.getReason()); + } + } + + public static int hashObject(final Object obj) { // Mask off the high bit so we can use that to determine if a bucket is used or not. - // Also apply the smear function, to improve distribution. - final int code = obj.hashCode(); - return smear(code) & 0x7fffffff; + // Also apply the "smear" function, to improve distribution. + return smear(obj.hashCode()) & USED_FLAG_MASK; + } + public static int hashIntArray(final int[] ints, final int start, final int length) + { + // Similar to what Arrays.hashCode would do. + // Also apply the "smear" function, to improve distribution. + int hashCode = 1; + for (int i = 0; i < length; i++) { + hashCode = 31 * hashCode + ints[start + i]; + } + + return smear(hashCode) & USED_FLAG_MASK; } static int getUsedFlag(int keyHash) @@ -76,4 +110,22 @@ public class Groupers slice.limit(slice.position() + sliceSize); return slice.slice(); } + + /** + * Write ints from "start" to "end" into "scratch", if start != 0. Otherwise, return null. + */ + @Nullable + public static int[] writeAggregationRows(final int[] scratch, final int start, final int end) + { + if (start == 0) { + return null; + } else { + final int numRows = end - start; + for (int i = 0; i < numRows; i++) { + scratch[i] = start + i; + } + + return scratch; + } + } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index c8d97eafc1b..4c71c31167a 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -24,8 +24,8 @@ import org.apache.druid.java.util.common.CloseableIterators; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.segment.ColumnSelectorFactory; import java.nio.ByteBuffer; import java.util.AbstractList; @@ -40,8 +40,6 @@ public class LimitedBufferHashGrouper extends AbstractBufferHashGrouper private static final int DEFAULT_INITIAL_BUCKETS = 1024; private static final float DEFAULT_MAX_LOAD_FACTOR = 0.7f; - private final AggregatorFactory[] aggregatorFactories; - // Limit to apply to results. private int limit; @@ -66,8 +64,7 @@ public class LimitedBufferHashGrouper extends AbstractBufferHashGrouper public LimitedBufferHashGrouper( final Supplier bufferSupplier, final Grouper.KeySerde keySerde, - final ColumnSelectorFactory columnSelectorFactory, - final AggregatorFactory[] aggregatorFactories, + final AggregatorAdapters aggregators, final int bufferGrouperMaxSize, final float maxLoadFactor, final int initialBuckets, @@ -75,7 +72,7 @@ public class LimitedBufferHashGrouper extends AbstractBufferHashGrouper final boolean sortHasNonGroupingFields ) { - super(bufferSupplier, keySerde, aggregatorFactories, bufferGrouperMaxSize); + super(bufferSupplier, keySerde, aggregators, HASH_SIZE + keySerde.keySize(), bufferGrouperMaxSize); this.maxLoadFactor = maxLoadFactor > 0 ? maxLoadFactor : DEFAULT_MAX_LOAD_FACTOR; this.initialBuckets = initialBuckets > 0 ? Math.max(MIN_INITIAL_BUCKETS, initialBuckets) : DEFAULT_INITIAL_BUCKETS; this.limit = limit; @@ -85,18 +82,9 @@ public class LimitedBufferHashGrouper extends AbstractBufferHashGrouper throw new IAE("Invalid maxLoadFactor[%f], must be < 1.0", maxLoadFactor); } - int offset = HASH_SIZE + keySize; - this.aggregatorFactories = aggregatorFactories; - for (int i = 0; i < aggregatorFactories.length; i++) { - aggregators[i] = aggregatorFactories[i].factorizeBuffered(columnSelectorFactory); - aggregatorOffsets[i] = offset; - offset += aggregatorFactories[i].getMaxIntermediateSizeWithNulls(); - } - // For each bucket, store an extra field indicating the bucket's current index within the heap when - // pushing down limits - offset += Integer.BYTES; - this.bucketSize = offset; + // pushing down limits (size Integer.BYTES). + this.bucketSize = HASH_SIZE + keySerde.keySize() + Integer.BYTES + aggregators.spaceNeeded(); } @Override @@ -374,8 +362,8 @@ public class LimitedBufferHashGrouper extends AbstractBufferHashGrouper return new Comparator() { final BufferComparator bufferComparator = keySerde.bufferComparatorWithAggregators( - aggregatorFactories, - aggregatorOffsets + aggregators.factories().toArray(new AggregatorFactory[0]), + aggregators.aggregatorPositions() ); @Override @@ -511,14 +499,12 @@ public class LimitedBufferHashGrouper extends AbstractBufferHashGrouper offsetHeap.setAt(i, newBucketOffset); // relocate aggregators (see https://github.com/apache/incubator-druid/pull/4071) - for (int j = 0; j < aggregators.length; j++) { - aggregators[j].relocate( - oldBucketOffset + aggregatorOffsets[j], - newBucketOffset + aggregatorOffsets[j], - tableBuffer, - newTableBuffer - ); - } + aggregators.relocate( + oldBucketOffset + baseAggregatorOffset, + newBucketOffset + baseAggregatorOffset, + tableBuffer, + newTableBuffer + ); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java index bd41d08eedb..de5da6222fb 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java @@ -436,8 +436,7 @@ public class RowBasedGrouperHelper final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null; return new CloseableGrouperIterator<>( - grouper, - true, + grouper.iterator(true), new Function, Row>() { @Override @@ -833,7 +832,10 @@ public class RowBasedGrouperHelper @Override public int compare(Grouper.Entry entry1, Grouper.Entry entry2) { - final int timeCompare = Longs.compare((long) entry1.getKey().getKey()[0], (long) entry2.getKey().getKey()[0]); + final int timeCompare = Longs.compare( + (long) entry1.getKey().getKey()[0], + (long) entry2.getKey().getKey()[0] + ); if (timeCompare != 0) { return timeCompare; @@ -930,8 +932,10 @@ public class RowBasedGrouperHelper // use natural comparison cmp = Comparators.naturalNullsFirst().compare(lhs, rhs); } else { - cmp = comparator.compare(DimensionHandlerUtils.convertObjectToString(lhs), - DimensionHandlerUtils.convertObjectToString(rhs)); + cmp = comparator.compare( + DimensionHandlerUtils.convertObjectToString(lhs), + DimensionHandlerUtils.convertObjectToString(rhs) + ); } if (cmp != 0) { @@ -1637,7 +1641,8 @@ public class RowBasedGrouperHelper FloatRowBasedKeySerdeHelper( int keyBufferPosition, boolean pushLimitDown, - @Nullable StringComparator stringComparator) + @Nullable StringComparator stringComparator + ) { this.keyBufferPosition = keyBufferPosition; if (isPrimitiveComparable(pushLimitDown, stringComparator)) { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java index 2ef2eb9cfc5..249060a5b8c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java @@ -32,6 +32,7 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.query.BaseQuery; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.segment.ColumnSelectorFactory; @@ -41,6 +42,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; @@ -55,11 +57,12 @@ import java.util.Set; public class SpillingGrouper implements Grouper { private static final Logger log = new Logger(SpillingGrouper.class); - - private final Grouper grouper; - private static final AggregateResult DISK_FULL = AggregateResult.failure( + private static final AggregateResult DISK_FULL = AggregateResult.partial( + 0, "Not enough disk space to execute this query. Try raising druid.query.groupBy.maxOnDiskStorage." ); + + private final Grouper grouper; private final KeySerde keySerde; private final LimitedTemporaryStorage temporaryStorage; private final ObjectMapper spillMapper; @@ -96,8 +99,7 @@ public class SpillingGrouper implements Grouper LimitedBufferHashGrouper limitGrouper = new LimitedBufferHashGrouper<>( bufferSupplier, keySerde, - columnSelectorFactory, - aggregatorFactories, + AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)), bufferGrouperMaxSize, bufferGrouperMaxLoadFactor, bufferGrouperInitialBuckets, @@ -119,8 +121,7 @@ public class SpillingGrouper implements Grouper this.grouper = new BufferHashGrouper<>( bufferSupplier, keySerde, - columnSelectorFactory, - aggregatorFactories, + AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)), bufferGrouperMaxSize, bufferGrouperMaxLoadFactor, bufferGrouperInitialBuckets, @@ -133,8 +134,7 @@ public class SpillingGrouper implements Grouper this.grouper = new BufferHashGrouper<>( bufferSupplier, keySerde, - columnSelectorFactory, - aggregatorFactories, + AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)), bufferGrouperMaxSize, bufferGrouperMaxLoadFactor, bufferGrouperInitialBuckets, @@ -168,6 +168,9 @@ public class SpillingGrouper implements Grouper if (result.isOk() || !spillingAllowed || temporaryStorage.maxSize() <= 0) { return result; } else { + // Expecting all-or-nothing behavior. + assert result.getCount() == 0; + // Warning: this can potentially block up a processing thread for a while. try { spill(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java new file mode 100644 index 00000000000..1da43f8a99a --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/VectorGrouper.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae; + +import org.apache.druid.java.util.common.parsers.CloseableIterator; + +import java.io.Closeable; +import java.nio.ByteBuffer; + +/** + * Like a {@link Grouper}, but vectorized. Keys are always int arrays, so there is no generic type parameter KeyType. + *

+ * This interface is designed such that an implementation can implement both Grouper and VectorGrouper. Of course, + * it would generally only make sense for a particular instance to be called with one set of functionality or the + * other. + */ +public interface VectorGrouper extends Closeable +{ + /** + * Initialize the grouper. This method needs to be called before calling {@link #aggregateVector}. + */ + void initVectorized(int maxVectorSize); + + /** + * Aggregate the current vector of rows from "startVectorOffset" to "endVectorOffset" using the provided keys. + * + * @param keySpace array holding keys, chunked into ints. First (endVectorOffset - startVectorOffset) keys + * must be valid. + * @param startRow row to start at (inclusive). + * @param endRow row to end at (exclusive). + * + * @return result that indicates how many keys were aggregated (may be partial due to resource limits) + */ + AggregateResult aggregateVector(int[] keySpace, int startRow, int endRow); + + /** + * Reset the grouper to its initial state. + */ + void reset(); + + /** + * Close the grouper and release associated resources. + */ + @Override + void close(); + + /** + * Iterate through entries. + *

+ * Some implementations allow writes even after this method is called. After you are done with the iterator + * returned by this method, you should either call {@link #close()} (if you are done with the VectorGrouper) or + * {@link #reset()} (if you want to reuse it). + *

+ * Callers must process and discard the returned {@link Grouper.Entry}s immediately, because the keys may + * be reused. + * + * @return entry iterator + */ + CloseableIterator> iterator(); +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DoubleGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DoubleGroupByVectorColumnSelector.java new file mode 100644 index 00000000000..2802e3a8aec --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DoubleGroupByVectorColumnSelector.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class DoubleGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private final VectorValueSelector selector; + + DoubleGroupByVectorColumnSelector(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public int getGroupingKeySize() + { + return 2; + } + + @Override + public void writeKeys( + final int[] keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final double[] vector = selector.getDoubleVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + final long longValue = Double.doubleToLongBits(vector[i]); + keySpace[j] = (int) (longValue >>> 32); + keySpace[j + 1] = (int) (longValue & 0xffffffffL); + } + } + + @Override + public void writeKeyToResultRow( + final String outputName, + final ByteBuffer keyBuffer, + final int keyOffset, + final Map resultMap + ) + { + final double value = keyBuffer.getDouble(keyOffset * Integer.BYTES); + resultMap.put(outputName, value); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/FloatGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/FloatGroupByVectorColumnSelector.java new file mode 100644 index 00000000000..5adbdb1f14f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/FloatGroupByVectorColumnSelector.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class FloatGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private final VectorValueSelector selector; + + FloatGroupByVectorColumnSelector(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public int getGroupingKeySize() + { + return 1; + } + + @Override + public void writeKeys( + final int[] keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final float[] vector = selector.getFloatVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + keySpace[j] = Float.floatToIntBits(vector[i]); + } + } + + @Override + public void writeKeyToResultRow( + final String outputName, + final ByteBuffer keyBuffer, + final int keyOffset, + final Map resultMap + ) + { + final float value = Float.intBitsToFloat(keyBuffer.getInt(keyOffset * Integer.BYTES)); + resultMap.put(outputName, value); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnSelector.java new file mode 100644 index 00000000000..3cc415368eb --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnSelector.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public interface GroupByVectorColumnSelector +{ + int getGroupingKeySize(); + + void writeKeys(int[] keySpace, int keySize, int keyOffset, int startRow, int endRow); + + void writeKeyToResultRow( + String outputName, + ByteBuffer keyBuffer, + int keyOffset, + Map resultMap + ); +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnStrategizer.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnStrategizer.java new file mode 100644 index 00000000000..c14041cdeae --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnStrategizer.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.query.dimension.VectorColumnStrategizer; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +public class GroupByVectorColumnStrategizer implements VectorColumnStrategizer +{ + private static final GroupByVectorColumnStrategizer INSTANCE = new GroupByVectorColumnStrategizer(); + + private GroupByVectorColumnStrategizer() + { + // Singleton. + } + + public static GroupByVectorColumnStrategizer instance() + { + return INSTANCE; + } + + @Override + public GroupByVectorColumnSelector makeSingleValueDimensionStrategy(final SingleValueDimensionVectorSelector selector) + { + return new SingleValueStringGroupByVectorColumnSelector(selector); + } + + @Override + public GroupByVectorColumnSelector makeMultiValueDimensionStrategy(final MultiValueDimensionVectorSelector selector) + { + throw new UnsupportedOperationException("Multi-value dimensions not yet implemented for vectorized groupBys"); + } + + @Override + public GroupByVectorColumnSelector makeFloatStrategy(final VectorValueSelector selector) + { + return new FloatGroupByVectorColumnSelector(selector); + } + + @Override + public GroupByVectorColumnSelector makeDoubleStrategy(final VectorValueSelector selector) + { + return new DoubleGroupByVectorColumnSelector(selector); + } + + @Override + public GroupByVectorColumnSelector makeLongStrategy(final VectorValueSelector selector) + { + return new LongGroupByVectorColumnSelector(selector); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/LongGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/LongGroupByVectorColumnSelector.java new file mode 100644 index 00000000000..6ddbd99b4e8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/LongGroupByVectorColumnSelector.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.segment.vector.VectorValueSelector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class LongGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private final VectorValueSelector selector; + + LongGroupByVectorColumnSelector(final VectorValueSelector selector) + { + this.selector = selector; + } + + @Override + public int getGroupingKeySize() + { + return 2; + } + + @Override + public void writeKeys( + final int[] keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final long[] vector = selector.getLongVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + keySpace[j] = (int) (vector[i] >>> 32); + keySpace[j + 1] = (int) (vector[i] & 0xffffffffL); + } + } + + @Override + public void writeKeyToResultRow( + final String outputName, + final ByteBuffer keyBuffer, + final int keyOffset, + final Map resultMap + ) + { + final long value = keyBuffer.getLong(keyOffset * Integer.BYTES); + resultMap.put(outputName, value); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/SingleValueStringGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/SingleValueStringGroupByVectorColumnSelector.java new file mode 100644 index 00000000000..6a9b4289821 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/SingleValueStringGroupByVectorColumnSelector.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class SingleValueStringGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + private final SingleValueDimensionVectorSelector selector; + + SingleValueStringGroupByVectorColumnSelector(final SingleValueDimensionVectorSelector selector) + { + this.selector = selector; + } + + @Override + public int getGroupingKeySize() + { + return 1; + } + + @Override + public void writeKeys( + final int[] keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final int[] rowVector = selector.getRowVector(); + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + keySpace[j] = rowVector[i]; + } + } + + @Override + public void writeKeyToResultRow( + final String outputName, + final ByteBuffer keyBuffer, + final int keyOffset, + final Map resultMap + ) + { + final int id = keyBuffer.getInt(keyOffset * Integer.BYTES); + resultMap.put(outputName, selector.lookupName(id)); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java new file mode 100644 index 00000000000..96b9988e5ec --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -0,0 +1,436 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import com.google.common.base.Suppliers; +import org.apache.druid.data.input.MapBasedRow; +import org.apache.druid.data.input.Row; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.guava.BaseSequence; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.aggregation.AggregatorAdapters; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.epinephelinae.AggregateResult; +import org.apache.druid.query.groupby.epinephelinae.BufferArrayGrouper; +import org.apache.druid.query.groupby.epinephelinae.BufferHashGrouper; +import org.apache.druid.query.groupby.epinephelinae.ByteBufferKeySerde; +import org.apache.druid.query.groupby.epinephelinae.CloseableGrouperIterator; +import org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2; +import org.apache.druid.query.groupby.epinephelinae.Grouper; +import org.apache.druid.query.groupby.epinephelinae.VectorGrouper; +import org.apache.druid.query.vector.VectorCursorGranularizer; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.stream.Collectors; + +public class VectorGroupByEngine +{ + private VectorGroupByEngine() + { + // No instantiation. + } + + public static boolean canVectorize( + final GroupByQuery query, + final StorageAdapter adapter, + @Nullable final Filter filter + ) + { + // Multi-value dimensions are not yet supported. + // + // Two notes here about how we're handling this check: + // 1) After multi-value dimensions are supported, we could alter "GroupByQueryEngineV2.isAllSingleValueDims" + // to accept a ColumnSelectorFactory, which makes more sense than using a StorageAdapter (see #8013). + // 2) Technically using StorageAdapter here is bad since it only looks at real columns, but they might + // be shadowed by virtual columns (again, see #8013). But it's fine for now since adapter.canVectorize + // always returns false if there are any virtual columns. + // + // This situation should sort itself out pretty well once this engine supports multi-valued columns. Then we + // won't have to worry about having this all-single-value-dims check here. + + return GroupByQueryEngineV2.isAllSingleValueDims(adapter::getColumnCapabilities, query.getDimensions()) + && query.getDimensions().stream().allMatch(DimensionSpec::canVectorize) + && query.getAggregatorSpecs().stream().allMatch(AggregatorFactory::canVectorize) + && adapter.canVectorize(filter, query.getVirtualColumns(), false); + } + + public static Sequence process( + final GroupByQuery query, + final StorageAdapter storageAdapter, + final ByteBuffer processingBuffer, + @Nullable final DateTime fudgeTimestamp, + @Nullable final Filter filter, + final Interval interval, + final GroupByQueryConfig config + ) + { + if (!canVectorize(query, storageAdapter, filter)) { + throw new ISE("Cannot vectorize"); + } + + return new BaseSequence<>( + new BaseSequence.IteratorMaker>() + { + @Override + public CloseableIterator make() + { + final VectorCursor cursor = storageAdapter.makeVectorCursor( + Filters.toFilter(query.getDimFilter()), + interval, + query.getVirtualColumns(), + false, + QueryContexts.getVectorSize(query), + null + ); + + if (cursor == null) { + // Return empty iterator. + return new CloseableIterator() + { + @Override + public boolean hasNext() + { + return false; + } + + @Override + public Row next() + { + throw new NoSuchElementException(); + } + + @Override + public void close() + { + // Nothing to do. + } + }; + } + + try { + final VectorColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); + final List dimensions = query.getDimensions().stream().map( + dimensionSpec -> + DimensionHandlerUtils.makeVectorProcessor( + dimensionSpec, + GroupByVectorColumnStrategizer.instance(), + columnSelectorFactory + ) + ).collect(Collectors.toList()); + + return new VectorGroupByEngineIterator( + query, + config, + storageAdapter, + cursor, + interval, + dimensions, + processingBuffer, + fudgeTimestamp + ); + } + catch (Throwable e) { + try { + cursor.close(); + } + catch (Throwable e2) { + e.addSuppressed(e2); + } + throw e; + } + } + + @Override + public void cleanup(CloseableIterator iterFromMake) + { + try { + iterFromMake.close(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + } + ); + } + + private static class VectorGroupByEngineIterator implements CloseableIterator + { + private final GroupByQuery query; + private final GroupByQueryConfig querySpecificConfig; + private final StorageAdapter storageAdapter; + private final VectorCursor cursor; + private final List selectors; + private final ByteBuffer processingBuffer; + private final DateTime fudgeTimestamp; + private final int keySize; + private final int[] keySpace; + private final Grouper.KeySerde keySerde; + private final VectorGrouper vectorGrouper; + + @Nullable + private final VectorCursorGranularizer granulizer; + + // Granularity-bucket iterator and current bucket. + private final Iterator bucketIterator; + + @Nullable + private Interval bucketInterval; + + private int partiallyAggregatedRows = -1; + + @Nullable + private CloseableGrouperIterator delegate = null; + + VectorGroupByEngineIterator( + final GroupByQuery query, + final GroupByQueryConfig config, + final StorageAdapter storageAdapter, + final VectorCursor cursor, + final Interval queryInterval, + final List selectors, + final ByteBuffer processingBuffer, + @Nullable final DateTime fudgeTimestamp + ) + { + this.query = query; + this.querySpecificConfig = config; + this.storageAdapter = storageAdapter; + this.cursor = cursor; + this.selectors = selectors; + this.processingBuffer = processingBuffer; + this.fudgeTimestamp = fudgeTimestamp; + this.keySize = selectors.stream().mapToInt(GroupByVectorColumnSelector::getGroupingKeySize).sum(); + this.keySpace = new int[keySize * cursor.getMaxVectorSize()]; + this.keySerde = new ByteBufferKeySerde(keySize * Integer.BYTES); + this.vectorGrouper = makeGrouper(); + this.granulizer = VectorCursorGranularizer.create(storageAdapter, cursor, query.getGranularity(), queryInterval); + + if (granulizer != null) { + this.bucketIterator = granulizer.getBucketIterable().iterator(); + } else { + this.bucketIterator = Collections.emptyIterator(); + } + + this.bucketInterval = this.bucketIterator.hasNext() ? this.bucketIterator.next() : null; + } + + @Override + public Row next() + { + if (delegate == null || !delegate.hasNext()) { + throw new NoSuchElementException(); + } + + return delegate.next(); + } + + @Override + public boolean hasNext() + { + if (delegate != null && delegate.hasNext()) { + return true; + } else { + final boolean moreToRead = !cursor.isDone() || partiallyAggregatedRows >= 0; + + if (bucketInterval != null && moreToRead) { + while (delegate == null || !delegate.hasNext()) { + if (delegate != null) { + delegate.close(); + vectorGrouper.reset(); + } + + delegate = initNewDelegate(); + } + return true; + } else { + return false; + } + } + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public void close() + { + cursor.close(); + + if (delegate != null) { + delegate.close(); + } + } + + private VectorGrouper makeGrouper() + { + final VectorGrouper grouper; + + final int cardinalityForArrayAggregation = GroupByQueryEngineV2.getCardinalityForArrayAggregation( + querySpecificConfig, + query, + storageAdapter, + processingBuffer + ); + + if (cardinalityForArrayAggregation >= 0) { + grouper = new BufferArrayGrouper( + Suppliers.ofInstance(processingBuffer), + AggregatorAdapters.factorizeVector( + cursor.getColumnSelectorFactory(), + query.getAggregatorSpecs() + ), + cardinalityForArrayAggregation + ); + } else { + grouper = new BufferHashGrouper<>( + Suppliers.ofInstance(processingBuffer), + keySerde, + AggregatorAdapters.factorizeVector( + cursor.getColumnSelectorFactory(), + query.getAggregatorSpecs() + ), + querySpecificConfig.getBufferGrouperMaxSize(), + querySpecificConfig.getBufferGrouperMaxLoadFactor(), + querySpecificConfig.getBufferGrouperInitialBuckets(), + true + ); + } + + grouper.initVectorized(cursor.getMaxVectorSize()); + + return grouper; + } + + private CloseableGrouperIterator initNewDelegate() + { + // Method must not be called unless there's a current bucketInterval. + assert bucketInterval != null; + + final DateTime timestamp = fudgeTimestamp != null + ? fudgeTimestamp + : query.getGranularity().toDateTime(bucketInterval.getStartMillis()); + + while (!cursor.isDone()) { + final int startOffset; + + if (partiallyAggregatedRows < 0) { + granulizer.setCurrentOffsets(bucketInterval); + startOffset = granulizer.getStartOffset(); + } else { + startOffset = granulizer.getStartOffset() + partiallyAggregatedRows; + } + + if (granulizer.getEndOffset() > startOffset) { + // Write keys to the keySpace. + int keyOffset = 0; + for (final GroupByVectorColumnSelector selector : selectors) { + selector.writeKeys(keySpace, keySize, keyOffset, startOffset, granulizer.getEndOffset()); + keyOffset += selector.getGroupingKeySize(); + } + + // Aggregate this vector. + final AggregateResult result = vectorGrouper.aggregateVector( + keySpace, + startOffset, + granulizer.getEndOffset() + ); + + if (result.isOk()) { + partiallyAggregatedRows = -1; + } else { + if (partiallyAggregatedRows < 0) { + partiallyAggregatedRows = result.getCount(); + } else { + partiallyAggregatedRows += result.getCount(); + } + } + } else { + partiallyAggregatedRows = -1; + } + + if (partiallyAggregatedRows >= 0) { + break; + } else if (!granulizer.advanceCursorWithinBucket()) { + // Advance bucketInterval. + bucketInterval = bucketIterator.hasNext() ? bucketIterator.next() : null; + break; + } + } + + return new CloseableGrouperIterator<>( + vectorGrouper.iterator(), + entry -> { + Map theMap = new LinkedHashMap<>(); + + // Add dimensions. + int keyOffset = 0; + for (int i = 0; i < selectors.size(); i++) { + final GroupByVectorColumnSelector selector = selectors.get(i); + + selector.writeKeyToResultRow( + query.getDimensions().get(i).getOutputName(), + entry.getKey(), + keyOffset, + theMap + ); + + keyOffset += selector.getGroupingKeySize(); + } + + // Convert dimension values to desired output types, possibly. + GroupByQueryEngineV2.convertRowTypesToOutputTypes(query.getDimensions(), theMap); + + // Add aggregations. + for (int i = 0; i < entry.getValues().length; i++) { + theMap.put(query.getAggregatorSpecs().get(i).getName(), entry.getValues()[i]); + } + + return new MapBasedRow(timestamp, theMap); + }, + vectorGrouper + ); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/search/DefaultSearchQueryMetrics.java b/processing/src/main/java/org/apache/druid/query/search/DefaultSearchQueryMetrics.java index 494c261133c..ca1fe18c583 100644 --- a/processing/src/main/java/org/apache/druid/query/search/DefaultSearchQueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/search/DefaultSearchQueryMetrics.java @@ -160,6 +160,12 @@ public class DefaultSearchQueryMetrics implements SearchQueryMetrics delegateQueryMetrics.identity(identity); } + @Override + public void vectorized(final boolean vectorized) + { + delegateQueryMetrics.vectorized(vectorized); + } + @Override public BitmapResultFactory makeBitmapResultFactory(BitmapFactory factory) { diff --git a/processing/src/main/java/org/apache/druid/query/select/DefaultSelectQueryMetrics.java b/processing/src/main/java/org/apache/druid/query/select/DefaultSelectQueryMetrics.java index c522987080b..80d92455a7d 100644 --- a/processing/src/main/java/org/apache/druid/query/select/DefaultSelectQueryMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/select/DefaultSelectQueryMetrics.java @@ -159,6 +159,12 @@ public class DefaultSelectQueryMetrics implements SelectQueryMetrics delegateQueryMetrics.identity(identity); } + @Override + public void vectorized(final boolean vectorized) + { + delegateQueryMetrics.vectorized(vectorized); + } + @Override public BitmapResultFactory makeBitmapResultFactory(BitmapFactory factory) { diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index 0de74bbcbbf..87c24ffb11f 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -19,24 +19,65 @@ package org.apache.druid.query.timeseries; -import com.google.common.base.Function; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Iterables; +import com.google.inject.Inject; +import org.apache.druid.collections.NonBlockingPool; +import org.apache.druid.collections.ResourceHolder; +import org.apache.druid.collections.StupidPool; +import org.apache.druid.guice.annotations.Global; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryRunnerHelper; import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.filter.Filter; -import org.apache.druid.segment.Cursor; +import org.apache.druid.query.vector.VectorCursorGranularizer; import org.apache.druid.segment.SegmentMissingException; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; +import org.joda.time.Interval; +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Collections; import java.util.List; +import java.util.Objects; /** + * */ public class TimeseriesQueryEngine { + private final NonBlockingPool bufferPool; + + /** + * Constructor for tests. In production, the @Inject constructor is used instead. + */ + @VisibleForTesting + public TimeseriesQueryEngine() + { + this.bufferPool = new StupidPool<>("dummy", () -> ByteBuffer.allocate(1000000)); + } + + @Inject + public TimeseriesQueryEngine(final @Global NonBlockingPool bufferPool) + { + this.bufferPool = bufferPool; + } + + /** + * Run a single-segment, single-interval timeseries query on a particular adapter. The query must have been + * scoped down to a single interval before calling this method. + */ public Sequence> process(final TimeseriesQuery query, final StorageAdapter adapter) { if (adapter == null) { @@ -45,65 +86,210 @@ public class TimeseriesQueryEngine ); } - final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter())); + final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter())); + final Interval interval = Iterables.getOnlyElement(query.getIntervals()); + final Granularity gran = query.getGranularity(); + final boolean descending = query.isDescending(); + + final boolean doVectorize = QueryContexts.getVectorize(query).shouldVectorize( + adapter.canVectorize(filter, query.getVirtualColumns(), descending) + && query.getAggregatorSpecs().stream().allMatch(AggregatorFactory::canVectorize) + ); + + final Sequence> result; + + if (doVectorize) { + result = processVectorized(query, adapter, filter, interval, gran, descending); + } else { + result = processNonVectorized(query, adapter, filter, interval, gran, descending); + } + final int limit = query.getLimit(); - Sequence> result = generateTimeseriesResult(adapter, query, filter); if (limit < Integer.MAX_VALUE) { return result.limit(limit); + } else { + return result; } - return result; } - private Sequence> generateTimeseriesResult(StorageAdapter adapter, TimeseriesQuery query, Filter filter) + private Sequence> processVectorized( + final TimeseriesQuery query, + final StorageAdapter adapter, + @Nullable final Filter filter, + final Interval queryInterval, + final Granularity gran, + final boolean descending + ) { + final boolean skipEmptyBuckets = query.isSkipEmptyBuckets(); + final List aggregatorSpecs = query.getAggregatorSpecs(); + + final VectorCursor cursor = adapter.makeVectorCursor( + filter, + queryInterval, + query.getVirtualColumns(), + descending, + QueryContexts.getVectorSize(query), + null + ); + + if (cursor == null) { + return Sequences.empty(); + } + + final Closer closer = Closer.create(); + closer.register(cursor); + + try { + final VectorCursorGranularizer granularizer = VectorCursorGranularizer.create( + adapter, + cursor, + gran, + queryInterval + ); + + if (granularizer == null) { + return Sequences.empty(); + } + + final VectorColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); + final AggregatorAdapters aggregators = closer.register( + AggregatorAdapters.factorizeVector(columnSelectorFactory, query.getAggregatorSpecs()) + ); + + final ResourceHolder bufferHolder = closer.register(bufferPool.take()); + + final ByteBuffer buffer = bufferHolder.get(); + + if (aggregators.spaceNeeded() > buffer.remaining()) { + throw new ISE( + "Not enough space for aggregators, needed [%,d] bytes but have only [%,d].", + aggregators.spaceNeeded(), + buffer.remaining() + ); + } + + return Sequences.withBaggage( + Sequences + .simple(granularizer.getBucketIterable()) + .map( + bucketInterval -> { + // Whether or not the current bucket is empty + boolean emptyBucket = true; + + while (!cursor.isDone()) { + granularizer.setCurrentOffsets(bucketInterval); + + if (granularizer.getEndOffset() > granularizer.getStartOffset()) { + if (emptyBucket) { + aggregators.init(buffer, 0); + } + + aggregators.aggregateVector( + buffer, + 0, + granularizer.getStartOffset(), + granularizer.getEndOffset() + ); + + emptyBucket = false; + } + + if (!granularizer.advanceCursorWithinBucket()) { + break; + } + } + + if (emptyBucket && skipEmptyBuckets) { + // Return null, will get filtered out later by the Objects::nonNull filter. + return null; + } + + final TimeseriesResultBuilder bob = new TimeseriesResultBuilder( + gran.toDateTime(bucketInterval.getStartMillis()) + ); + + if (emptyBucket) { + aggregators.init(buffer, 0); + } + + for (int i = 0; i < aggregatorSpecs.size(); i++) { + bob.addMetric( + aggregatorSpecs.get(i).getName(), + aggregators.get(buffer, 0, i) + ); + } + + return bob.build(); + } + ) + .filter(Objects::nonNull), + closer + ); + } + catch (Throwable t1) { + try { + closer.close(); + } + catch (Throwable t2) { + t1.addSuppressed(t2); + } + throw t1; + } + } + + private Sequence> processNonVectorized( + final TimeseriesQuery query, + final StorageAdapter adapter, + @Nullable final Filter filter, + final Interval queryInterval, + final Granularity gran, + final boolean descending + ) + { + final boolean skipEmptyBuckets = query.isSkipEmptyBuckets(); + final List aggregatorSpecs = query.getAggregatorSpecs(); + return QueryRunnerHelper.makeCursorBasedQuery( adapter, - query.getQuerySegmentSpec().getIntervals(), + Collections.singletonList(queryInterval), filter, query.getVirtualColumns(), - query.isDescending(), - query.getGranularity(), - new Function>() - { - private final boolean skipEmptyBuckets = query.isSkipEmptyBuckets(); - private final List aggregatorSpecs = query.getAggregatorSpecs(); + descending, + gran, + cursor -> { + if (skipEmptyBuckets && cursor.isDone()) { + return null; + } - @Override - public Result apply(Cursor cursor) - { - if (skipEmptyBuckets && cursor.isDone()) { - return null; + Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()]; + String[] aggregatorNames = new String[aggregatorSpecs.size()]; + + for (int i = 0; i < aggregatorSpecs.size(); i++) { + aggregators[i] = aggregatorSpecs.get(i).factorize(cursor.getColumnSelectorFactory()); + aggregatorNames[i] = aggregatorSpecs.get(i).getName(); + } + + try { + while (!cursor.isDone()) { + for (Aggregator aggregator : aggregators) { + aggregator.aggregate(); + } + cursor.advance(); } - Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()]; - String[] aggregatorNames = new String[aggregatorSpecs.size()]; + TimeseriesResultBuilder bob = new TimeseriesResultBuilder(cursor.getTime()); for (int i = 0; i < aggregatorSpecs.size(); i++) { - aggregators[i] = aggregatorSpecs.get(i).factorize(cursor.getColumnSelectorFactory()); - aggregatorNames[i] = aggregatorSpecs.get(i).getName(); + bob.addMetric(aggregatorNames[i], aggregators[i].get()); } - try { - while (!cursor.isDone()) { - for (Aggregator aggregator : aggregators) { - aggregator.aggregate(); - } - cursor.advance(); - } - TimeseriesResultBuilder bob = new TimeseriesResultBuilder(cursor.getTime()); - - for (int i = 0; i < aggregatorSpecs.size(); i++) { - bob.addMetric(aggregatorNames[i], aggregators[i]); - } - - Result retVal = bob.build(); - return retVal; - } - finally { - // cleanup - for (Aggregator agg : aggregators) { - agg.close(); - } + return bob.build(); + } + finally { + // cleanup + for (Aggregator agg : aggregators) { + agg.close(); } } } diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java index 0ae9a707c19..686ed6824ef 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java @@ -227,7 +227,7 @@ public class TimeseriesQueryQueryToolChest extends QueryToolChest build() { - return new Result( + return new Result<>( timestamp, new TimeseriesResultValue(metricValues) ); diff --git a/processing/src/main/java/org/apache/druid/query/topn/types/StringTopNColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/topn/types/StringTopNColumnSelectorStrategy.java index dd30c369fcb..f5e838d42b6 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/types/StringTopNColumnSelectorStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/topn/types/StringTopNColumnSelectorStrategy.java @@ -25,6 +25,7 @@ import org.apache.druid.query.topn.TopNParams; import org.apache.druid.query.topn.TopNQuery; import org.apache.druid.query.topn.TopNResultBuilder; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.StorageAdapter; @@ -87,7 +88,7 @@ public class StringTopNColumnSelectorStrategy Map, Aggregator[]> aggregatesStore ) { - if (selector.getValueCardinality() != DimensionSelector.CARDINALITY_UNKNOWN) { + if (selector.getValueCardinality() != DimensionDictionarySelector.CARDINALITY_UNKNOWN) { return dimExtractionScanAndAggregateWithCardinalityKnown(query, cursor, selector, rowSelector, aggregatesStore); } else { return dimExtractionScanAndAggregateWithCardinalityUnknown(query, cursor, selector, aggregatesStore); diff --git a/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java b/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java new file mode 100644 index 00000000000..163befcf281 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/vector/VectorCursorGranularizer.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.vector; + +import com.google.common.collect.Iterables; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.vector.VectorCursor; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; + +/** + * Class that helps vectorized query engines handle "granularity" parameters. Nonvectorized engines have it handled + * for them by the StorageAdapter. Vectorized engines don't, because they can get efficiency gains by pushing + * granularity handling into the engine layer. + */ +public class VectorCursorGranularizer +{ + // And a cursor that has been made from it. + private final VectorCursor cursor; + + // Iterable that iterates over time buckets. + private final Iterable bucketIterable; + + // Vector selector for the "__time" column. + @Nullable + private final VectorValueSelector timeSelector; + + // Current time vector. + @Nullable + private long[] timestamps = null; + + // Offset into the vector that we should start reading from. + private int startOffset = 0; + + // Offset into the vector that is one past the last one we should read. + private int endOffset = 0; + + private VectorCursorGranularizer( + VectorCursor cursor, + Iterable bucketIterable, + @Nullable VectorValueSelector timeSelector + ) + { + this.cursor = cursor; + this.bucketIterable = bucketIterable; + this.timeSelector = timeSelector; + } + + @Nullable + public static VectorCursorGranularizer create( + final StorageAdapter storageAdapter, + final VectorCursor cursor, + final Granularity granularity, + final Interval queryInterval + ) + { + final DateTime minTime = storageAdapter.getMinTime(); + final DateTime maxTime = storageAdapter.getMaxTime(); + + final Interval storageAdapterInterval = new Interval(minTime, granularity.bucketEnd(maxTime)); + final Interval clippedQueryInterval = queryInterval.overlap(storageAdapterInterval); + + if (clippedQueryInterval == null) { + return null; + } + + final Iterable bucketIterable = granularity.getIterable(clippedQueryInterval); + final Interval firstBucket = granularity.bucket(clippedQueryInterval.getStart()); + + final VectorValueSelector timeSelector; + if (firstBucket.contains(clippedQueryInterval)) { + // Only one bucket, no need to read the time column. + assert Iterables.size(bucketIterable) == 1; + timeSelector = null; + } else { + // Multiple buckets, need to read the time column to know when we move from one to the next. + timeSelector = cursor.getColumnSelectorFactory().makeValueSelector(ColumnHolder.TIME_COLUMN_NAME); + } + + return new VectorCursorGranularizer(cursor, bucketIterable, timeSelector); + } + + public void setCurrentOffsets(final Interval bucketInterval) + { + final long timeStart = bucketInterval.getStartMillis(); + final long timeEnd = bucketInterval.getEndMillis(); + + int vectorSize = cursor.getCurrentVectorSize(); + endOffset = 0; + + if (timeSelector != null) { + if (timestamps == null) { + timestamps = timeSelector.getLongVector(); + } + + // Skip "offset" to start of bucketInterval. + while (startOffset < vectorSize && timestamps[startOffset] < timeStart) { + startOffset++; + } + + // Find end of bucketInterval. + for (endOffset = vectorSize - 1; + endOffset >= startOffset && timestamps[endOffset] >= timeEnd; + endOffset--) { + // nothing needed, "for" is doing the work. + } + + // Adjust: endOffset is now pointing at the last row to aggregate, but we want it + // to be one _past_ the last row. + endOffset++; + } else { + endOffset = vectorSize; + } + } + + /** + * Return true, and advances the cursor, if it can be advanced within the current time bucket. Otherwise, returns + * false and does nothing else. + */ + public boolean advanceCursorWithinBucket() + { + if (endOffset == cursor.getCurrentVectorSize()) { + cursor.advance(); + + if (timeSelector != null && !cursor.isDone()) { + timestamps = timeSelector.getLongVector(); + } + + startOffset = 0; + + return true; + } else { + return false; + } + } + + public Iterable getBucketIterable() + { + return bucketIterable; + } + + public int getStartOffset() + { + return startOffset; + } + + public int getEndOffset() + { + return endOffset; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorFactory.java index 3d51c27ee7f..07e66a67225 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorFactory.java @@ -27,6 +27,8 @@ import javax.annotation.Nullable; /** * Factory class for MetricSelectors + * + * @see org.apache.druid.segment.vector.VectorColumnSelectorFactory, the vectorized version */ @PublicApi public interface ColumnSelectorFactory diff --git a/processing/src/main/java/org/apache/druid/segment/Cursor.java b/processing/src/main/java/org/apache/druid/segment/Cursor.java index 7964485f529..645caee895f 100644 --- a/processing/src/main/java/org/apache/druid/segment/Cursor.java +++ b/processing/src/main/java/org/apache/druid/segment/Cursor.java @@ -23,13 +23,15 @@ import org.joda.time.DateTime; /** * Cursor is an interface for iteration over a range of data points, used during query execution. {@link - * QueryableIndexStorageAdapter.QueryableIndexCursor} is an implementation for historical segments, and {@link + * QueryableIndexCursorSequenceBuilder.QueryableIndexCursor} is an implementation for historical segments, and {@link * org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter.IncrementalIndexCursor} is an implementation for {@link * org.apache.druid.segment.incremental.IncrementalIndex}. * * Cursor is conceptually similar to {@link TimeAndDimsPointer}, but the latter is used for historical segment creation * rather than query execution (as Cursor). If those abstractions could be collapsed (and if it is worthwhile) is yet to * be determined. + * + * @see org.apache.druid.segment.vector.VectorCursor, the vectorized version */ public interface Cursor { diff --git a/processing/src/main/java/org/apache/druid/segment/CursorFactory.java b/processing/src/main/java/org/apache/druid/segment/CursorFactory.java index c3e55031702..809e7b85b06 100644 --- a/processing/src/main/java/org/apache/druid/segment/CursorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/CursorFactory.java @@ -23,14 +23,36 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.filter.Filter; +import org.apache.druid.segment.vector.VectorCursor; import org.joda.time.Interval; import javax.annotation.Nullable; /** + * Interface extended by {@link StorageAdapter}, which gives them the power to create cursors. + * + * @see StorageAdapter */ public interface CursorFactory { + /** + * Returns true if the provided combination of parameters can be handled by "makeVectorCursor". + * + * Query engines should use this before running in vectorized mode, and be prepared to fall back to non-vectorized + * mode if this method returns false. + */ + default boolean canVectorize( + @Nullable Filter filter, + VirtualColumns virtualColumns, + boolean descending + ) + { + return false; + } + + /** + * Creates a sequence of Cursors, one for each time-granular bucket (based on the provided Granularity). + */ Sequence makeCursors( @Nullable Filter filter, Interval interval, @@ -39,4 +61,25 @@ public interface CursorFactory boolean descending, @Nullable QueryMetrics queryMetrics ); + + /** + * Creates a VectorCursor. Unlike the Cursor returned by "makeCursor", there is just one of these. Hence, this method + * does not take a "granularity" parameter. Before calling this method, check "canVectorize" to see if the call you + * are about to make will throw an error or not. + * + * Returns null if there is no data to walk over (for example, if the "interval" does not overlap the data interval + * of this segment). + */ + @Nullable + default VectorCursor makeVectorCursor( + @Nullable Filter filter, + Interval interval, + VirtualColumns virtualColumns, + boolean descending, + int vectorSize, + @Nullable QueryMetrics queryMetrics + ) + { + throw new UnsupportedOperationException("Cannot vectorize. Check 'canVectorize' before calling 'makeVectorCursor'."); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java b/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java new file mode 100644 index 00000000000..a02c025b4b8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/DimensionDictionarySelector.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import org.apache.druid.query.monomorphicprocessing.CalledFromHotLoop; + +import javax.annotation.Nullable; + +/** + * Interface containing dictionary-related methods common to {@link DimensionSelector}, + * {@link org.apache.druid.segment.vector.SingleValueDimensionVectorSelector}, and + * {@link org.apache.druid.segment.vector.MultiValueDimensionVectorSelector}. + */ +public interface DimensionDictionarySelector +{ + int CARDINALITY_UNKNOWN = -1; + + /** + * Value cardinality is the cardinality of the different occurring values. If there were 4 rows: + * + * A,B + * A + * B + * A + * + * Value cardinality would be 2. + * + * Cardinality may be unknown (e.g. the selector used by IncrementalIndex while reading input rows), + * in which case this method will return -1. If cardinality is unknown, you should assume this + * dimension selector has no dictionary, and avoid storing ids, calling "lookupId", or calling "lookupName" + * outside of the context of operating on a single row. + * + * @return the value cardinality, or -1 if unknown. + */ + int getValueCardinality(); + + /** + * The Name is the String name of the actual field. It is assumed that storage layers convert names + * into id values which can then be used to get the string value. For example + * + * A,B + * A + * A,B + * B + * + * getRow() would return + * + * getRow(0) => [0 1] + * getRow(1) => [0] + * getRow(2) => [0 1] + * getRow(3) => [1] + * + * and then lookupName would return: + * + * lookupName(0) => A + * lookupName(1) => B + * + * @param id id to lookup the field name for + * + * @return the field name for the given id + */ + @CalledFromHotLoop + @Nullable + String lookupName(int id); + + /** + * Returns true if it is possible to {@link #lookupName(int)} by ids from 0 to {@link #getValueCardinality()} + * before the rows with those ids are returned. + * + *

Returns false if {@link #lookupName(int)} could be called with ids, returned from the most recent row (or row + * vector) returned by this DimensionSelector, but not earlier. If {@link #getValueCardinality()} of this + * selector additionally returns {@link #CARDINALITY_UNKNOWN}, {@code lookupName()} couldn't be called with + * ids, returned by not the most recent row (or row vector), i. e. names for ids couldn't be looked up "later". If + * {@link #getValueCardinality()} returns a non-negative number, {@code lookupName()} could be called with any ids, + * returned from rows (or row vectors) returned since the creation of this DimensionSelector. + * + *

If {@link #lookupName(int)} is called with an ineligible id, result is undefined: exception could be thrown, or + * null returned, or some other random value. + */ + boolean nameLookupPossibleInAdvance(); + + /** + * Returns {@link IdLookup} if available for this DimensionSelector, or null. + */ + @Nullable + IdLookup idLookup(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 73f2f1813c8..da4232dfe6b 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -26,15 +26,19 @@ import com.google.common.primitives.Floats; import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.query.ColumnSelectorPlus; import org.apache.druid.query.dimension.ColumnSelectorStrategy; import org.apache.druid.query.dimension.ColumnSelectorStrategyFactory; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.dimension.VectorColumnStrategizer; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.math.BigDecimal; @@ -242,6 +246,80 @@ public final class DimensionHandlerUtils return strategyFactory.makeColumnSelectorStrategy(capabilities, selector); } + /** + * Equivalent to calling makeVectorProcessor(DefaultDimensionSpec.of(column), strategyFactory, selectorFactory). + * + * @see #makeVectorProcessor(DimensionSpec, VectorColumnStrategizer, VectorColumnSelectorFactory) + */ + public static T makeVectorProcessor( + final String column, + final VectorColumnStrategizer strategyFactory, + final VectorColumnSelectorFactory selectorFactory + ) + { + return makeVectorProcessor(DefaultDimensionSpec.of(column), strategyFactory, selectorFactory); + } + + /** + * Creates "vector processors", which are objects that wrap a single vectorized input column and provide some + * functionality on top of it. Used by things like query engines and filter matchers. + * + * Supports the basic types STRING, LONG, DOUBLE, and FLOAT. + * + * @param dimensionSpec dimensionSpec for the input to the processor + * @param strategyFactory object that encapsulates the knowledge about how to create processors + * @param selectorFactory column selector factory used for creating the vector processor + */ + public static T makeVectorProcessor( + final DimensionSpec dimensionSpec, + final VectorColumnStrategizer strategyFactory, + final VectorColumnSelectorFactory selectorFactory + ) + { + final ColumnCapabilities capabilities = getEffectiveCapabilities( + dimensionSpec, + selectorFactory.getColumnCapabilities(dimensionSpec.getDimension()) + ); + + final ValueType type = capabilities.getType(); + + if (type == ValueType.STRING) { + if (capabilities.hasMultipleValues()) { + return strategyFactory.makeMultiValueDimensionStrategy( + selectorFactory.makeMultiValueDimensionSelector(dimensionSpec) + ); + } else { + return strategyFactory.makeSingleValueDimensionStrategy( + selectorFactory.makeSingleValueDimensionSelector(dimensionSpec) + ); + } + } else { + Preconditions.checkState( + dimensionSpec.getExtractionFn() == null && !dimensionSpec.mustDecorate(), + "Uh oh, was about to try to make a value selector for type[%s] with a dimensionSpec of class[%s] that " + + "requires decoration. Possible bug.", + type, + dimensionSpec.getClass().getName() + ); + + if (type == ValueType.LONG) { + return strategyFactory.makeLongStrategy( + selectorFactory.makeValueSelector(dimensionSpec.getDimension()) + ); + } else if (type == ValueType.FLOAT) { + return strategyFactory.makeFloatStrategy( + selectorFactory.makeValueSelector(dimensionSpec.getDimension()) + ); + } else if (type == ValueType.DOUBLE) { + return strategyFactory.makeDoubleStrategy( + selectorFactory.makeValueSelector(dimensionSpec.getDimension()) + ); + } else { + throw new ISE("Unsupported type[%s]", capabilities.getType()); + } + } + } + @Nullable public static String convertObjectToString(@Nullable Object valObj) { diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/DimensionSelector.java index 1c9bf9765f2..8a0f2a75bdd 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionSelector.java @@ -36,12 +36,15 @@ import javax.annotation.Nullable; import java.util.Arrays; /** + * Selector for a string-typed column, either single- or multi-valued. This is named a "dimension" selector for legacy + * reasons: in the past, all Druid dimensions were string-typed. + * + * @see org.apache.druid.segment.vector.SingleValueDimensionVectorSelector, a vectorized version + * @see org.apache.druid.segment.vector.MultiValueDimensionVectorSelector, another vectorized version */ @PublicApi -public interface DimensionSelector extends ColumnValueSelector, HotLoopCallee +public interface DimensionSelector extends ColumnValueSelector, DimensionDictionarySelector, HotLoopCallee { - int CARDINALITY_UNKNOWN = -1; - /** * Returns the indexed values at the current position in this DimensionSelector. * @@ -63,75 +66,6 @@ public interface DimensionSelector extends ColumnValueSelector, HotLoopC ValueMatcher makeValueMatcher(Predicate predicate); - /** - * Value cardinality is the cardinality of the different occurring values. If there were 4 rows: - * - * A,B - * A - * B - * A - * - * Value cardinality would be 2. - * - * Cardinality may be unknown (e.g. the selector used by IncrementalIndex while reading input rows), - * in which case this method will return -1. If cardinality is unknown, you should assume this - * dimension selector has no dictionary, and avoid storing ids, calling "lookupId", or calling "lookupName" - * outside of the context of operating on a single row. - * - * @return the value cardinality, or -1 if unknown. - */ - int getValueCardinality(); - - /** - * The Name is the String name of the actual field. It is assumed that storage layers convert names - * into id values which can then be used to get the string value. For example - * - * A,B - * A - * A,B - * B - * - * getRow() would return - * - * getRow(0) => [0 1] - * getRow(1) => [0] - * getRow(2) => [0 1] - * getRow(3) => [1] - * - * and then lookupName would return: - * - * lookupName(0) => A - * lookupName(1) => B - * - * @param id id to lookup the field name for - * @return the field name for the given id - */ - @CalledFromHotLoop - @Nullable - String lookupName(int id); - - /** - * Returns true if it is possible to {@link #lookupName(int)} by ids from 0 to {@link #getValueCardinality()} - * before the rows with those ids are returned. - * - *

Returns false if {@link #lookupName(int)} could be called with ids, returned from the most recent call of {@link - * #getRow()} on this DimensionSelector, but not earlier. If {@link #getValueCardinality()} of this DimensionSelector - * additionally returns {@link #CARDINALITY_UNKNOWN}, {@code lookupName()} couldn't be called with ids, returned by - * not the most recent call of {@link #getRow()}, i. e. names for ids couldn't be looked up "later". If {@link - * #getValueCardinality()} returns a non-negative number, {@code lookupName()} could be called with any ids, returned - * from {@code #getRow()} since the creation of this DimensionSelector. - * - *

If {@link #lookupName(int)} is called with an ineligible id, result is undefined: exception could be thrown, or - * null returned, or some other random value. - */ - boolean nameLookupPossibleInAdvance(); - - /** - * Returns {@link IdLookup} if available for this DimensionSelector, or null. - */ - @Nullable - IdLookup idLookup(); - /** * @deprecated This method is marked as deprecated in DimensionSelector to minimize the probability of accidental * calling. "Polymorphism" of DimensionSelector should be used only when operating on {@link ColumnValueSelector} diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 0ddacfb781b..55c0a2c2d11 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -80,7 +80,7 @@ public class DoubleDimensionIndexer implements DimensionIndexer @Override public int getCardinality() { - return DimensionSelector.CARDINALITY_UNKNOWN; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java new file mode 100644 index 00000000000..ba30649e644 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java @@ -0,0 +1,602 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.query.BaseQuery; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.BaseColumn; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.NumericColumn; +import org.apache.druid.segment.data.Offset; +import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.historical.HistoricalCursor; +import org.apache.druid.segment.vector.BitmapVectorOffset; +import org.apache.druid.segment.vector.FilteredVectorOffset; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.QueryableIndexVectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; +import org.apache.druid.segment.vector.VectorOffset; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class QueryableIndexCursorSequenceBuilder +{ + private final QueryableIndex index; + private final Interval interval; + private final VirtualColumns virtualColumns; + @Nullable + private final ImmutableBitmap filterBitmap; + private final long minDataTimestamp; + private final long maxDataTimestamp; + private final boolean descending; + @Nullable + private final Filter postFilter; + private final ColumnSelectorBitmapIndexSelector bitmapIndexSelector; + + public QueryableIndexCursorSequenceBuilder( + QueryableIndex index, + Interval interval, + VirtualColumns virtualColumns, + @Nullable ImmutableBitmap filterBitmap, + long minDataTimestamp, + long maxDataTimestamp, + boolean descending, + @Nullable Filter postFilter, + ColumnSelectorBitmapIndexSelector bitmapIndexSelector + ) + { + this.index = index; + this.interval = interval; + this.virtualColumns = virtualColumns; + this.filterBitmap = filterBitmap; + this.minDataTimestamp = minDataTimestamp; + this.maxDataTimestamp = maxDataTimestamp; + this.descending = descending; + this.postFilter = postFilter; + this.bitmapIndexSelector = bitmapIndexSelector; + } + + public Sequence build(final Granularity gran) + { + final Offset baseOffset; + + if (filterBitmap == null) { + baseOffset = descending + ? new SimpleDescendingOffset(index.getNumRows()) + : new SimpleAscendingOffset(index.getNumRows()); + } else { + baseOffset = BitmapOffset.of(filterBitmap, descending, index.getNumRows()); + } + + // Column caches shared amongst all cursors in this sequence. + final Map columnCache = new HashMap<>(); + + final NumericColumn timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); + + final Closer closer = Closer.create(); + closer.register(timestamps); + + Iterable iterable = gran.getIterable(interval); + if (descending) { + iterable = Lists.reverse(ImmutableList.copyOf(iterable)); + } + + return Sequences.withBaggage( + Sequences.map( + Sequences.simple(iterable), + new Function() + { + @Override + public Cursor apply(final Interval inputInterval) + { + final long timeStart = Math.max(interval.getStartMillis(), inputInterval.getStartMillis()); + final long timeEnd = Math.min( + interval.getEndMillis(), + gran.increment(inputInterval.getStart()).getMillis() + ); + + if (descending) { + for (; baseOffset.withinBounds(); baseOffset.increment()) { + if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) < timeEnd) { + break; + } + } + } else { + for (; baseOffset.withinBounds(); baseOffset.increment()) { + if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) >= timeStart) { + break; + } + } + } + + final Offset offset = descending ? + new DescendingTimestampCheckingOffset( + baseOffset, + timestamps, + timeStart, + minDataTimestamp >= timeStart + ) : + new AscendingTimestampCheckingOffset( + baseOffset, + timestamps, + timeEnd, + maxDataTimestamp < timeEnd + ); + + + final Offset baseCursorOffset = offset.clone(); + final ColumnSelectorFactory columnSelectorFactory = new QueryableIndexColumnSelectorFactory( + index, + virtualColumns, + descending, + closer, + baseCursorOffset.getBaseReadableOffset(), + columnCache + ); + final DateTime myBucket = gran.toDateTime(inputInterval.getStartMillis()); + + if (postFilter == null) { + return new QueryableIndexCursor(baseCursorOffset, columnSelectorFactory, myBucket); + } else { + FilteredOffset filteredOffset = new FilteredOffset( + baseCursorOffset, + columnSelectorFactory, + descending, + postFilter, + bitmapIndexSelector + ); + return new QueryableIndexCursor(filteredOffset, columnSelectorFactory, myBucket); + } + + } + } + ), + closer + ); + } + + public VectorCursor buildVectorized(final int vectorSize) + { + // Sanity check - matches QueryableIndexStorageAdapter.canVectorize + Preconditions.checkState(virtualColumns.size() == 0, "virtualColumns.size == 0"); + Preconditions.checkState(!descending, "!descending"); + + final Map columnCache = new HashMap<>(); + final Closer closer = Closer.create(); + + NumericColumn timestamps = null; + + final int startOffset; + final int endOffset; + + if (interval.getStartMillis() > minDataTimestamp) { + timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); + closer.register(timestamps); + + startOffset = timeSearch(timestamps, interval.getStartMillis(), 0, index.getNumRows()); + } else { + startOffset = 0; + } + + if (interval.getEndMillis() <= maxDataTimestamp) { + if (timestamps == null) { + timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); + closer.register(timestamps); + } + + endOffset = timeSearch(timestamps, interval.getEndMillis(), startOffset, index.getNumRows()); + } else { + endOffset = index.getNumRows(); + } + + final VectorOffset baseOffset = + filterBitmap == null + ? new NoFilterVectorOffset(vectorSize, startOffset, endOffset) + : new BitmapVectorOffset(vectorSize, filterBitmap, startOffset, endOffset); + + if (postFilter == null) { + return new QueryableIndexVectorCursor(index, baseOffset, closer, columnCache, vectorSize); + } else { + // baseColumnSelectorFactory using baseOffset is the column selector for filtering. + final VectorColumnSelectorFactory baseColumnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory( + index, + baseOffset, + closer, + columnCache + ); + + final VectorOffset filteredOffset = FilteredVectorOffset.create( + baseOffset, + baseColumnSelectorFactory, + postFilter + ); + + // Now create the cursor and column selector that will be returned to the caller. + // + // There is an inefficiency with how we do things here: this cursor (the one that will be provided to the + // caller) does share a columnCache with "baseColumnSelectorFactory", but it *doesn't* share vector data. This + // means that if the caller wants to read from a column that is also used for filtering, the underlying column + // object will get hit twice for some of the values (anything that matched the filter). This is probably most + // noticeable if it causes thrashing of decompression buffers due to out-of-order reads. I haven't observed + // this directly but it seems possible in principle. + return new QueryableIndexVectorCursor(index, filteredOffset, closer, columnCache, vectorSize); + } + } + + /** + * Search the time column using binary search. Benchmarks on various other approaches (linear search, binary + * search that switches to linear at various closeness thresholds) indicated that a pure binary search worked best. + * + * @param timeColumn the column + * @param timestamp the timestamp to search for + * @param startIndex first index to search, inclusive + * @param endIndex last index to search, exclusive + * + * @return first index that has a timestamp equal to, or greater, than "timestamp" + */ + @VisibleForTesting + static int timeSearch( + final NumericColumn timeColumn, + final long timestamp, + final int startIndex, + final int endIndex + ) + { + final long prevTimestamp = timestamp - 1; + + // Binary search for prevTimestamp. + int minIndex = startIndex; + int maxIndex = endIndex - 1; + + while (minIndex <= maxIndex) { + final int currIndex = (minIndex + maxIndex) >>> 1; + final long currValue = timeColumn.getLongSingleValueRow(currIndex); + + if (currValue < prevTimestamp) { + minIndex = currIndex + 1; + } else if (currValue > prevTimestamp) { + maxIndex = currIndex - 1; + } else { + // The value at currIndex is prevTimestamp. + minIndex = currIndex; + break; + } + } + + // Do linear search for the actual timestamp, then return. + for (; minIndex < endIndex; minIndex++) { + final long currValue = timeColumn.getLongSingleValueRow(minIndex); + if (currValue >= timestamp) { + return minIndex; + } + } + + // Not found. + return endIndex; + } + + private static class QueryableIndexVectorCursor implements VectorCursor + { + private final Closer closer; + private final int vectorSize; + private final VectorOffset offset; + private final VectorColumnSelectorFactory columnSelectorFactory; + + public QueryableIndexVectorCursor( + final QueryableIndex index, + final VectorOffset offset, + final Closer closer, + final Map columnCache, + final int vectorSize + ) + { + this.offset = offset; + this.closer = closer; + this.vectorSize = vectorSize; + this.columnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory(index, offset, closer, columnCache); + } + + @Override + public int getMaxVectorSize() + { + return vectorSize; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public VectorColumnSelectorFactory getColumnSelectorFactory() + { + return columnSelectorFactory; + } + + @Override + public void advance() + { + offset.advance(); + BaseQuery.checkInterrupted(); + } + + @Override + public boolean isDone() + { + return offset.isDone(); + } + + @Override + public void reset() + { + offset.reset(); + } + + @Override + public void close() + { + try { + closer.close(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + private static class QueryableIndexCursor implements HistoricalCursor + { + private final Offset cursorOffset; + private final ColumnSelectorFactory columnSelectorFactory; + private final DateTime bucketStart; + + QueryableIndexCursor(Offset cursorOffset, ColumnSelectorFactory columnSelectorFactory, DateTime bucketStart) + { + this.cursorOffset = cursorOffset; + this.columnSelectorFactory = columnSelectorFactory; + this.bucketStart = bucketStart; + } + + @Override + public Offset getOffset() + { + return cursorOffset; + } + + @Override + public ColumnSelectorFactory getColumnSelectorFactory() + { + return columnSelectorFactory; + } + + @Override + public DateTime getTime() + { + return bucketStart; + } + + @Override + public void advance() + { + cursorOffset.increment(); + // Must call BaseQuery.checkInterrupted() after cursorOffset.increment(), not before, because + // FilteredOffset.increment() is a potentially long, not an "instant" operation (unlike to all other subclasses + // of Offset) and it returns early on interruption, leaving itself in an illegal state. We should not let + // aggregators, etc. access this illegal state and throw a QueryInterruptedException by calling + // BaseQuery.checkInterrupted(). + BaseQuery.checkInterrupted(); + } + + @Override + public void advanceUninterruptibly() + { + cursorOffset.increment(); + } + + @Override + public void advanceTo(int offset) + { + int count = 0; + while (count < offset && !isDone()) { + advance(); + count++; + } + } + + @Override + public boolean isDone() + { + return !cursorOffset.withinBounds(); + } + + @Override + public boolean isDoneOrInterrupted() + { + return isDone() || Thread.currentThread().isInterrupted(); + } + + @Override + public void reset() + { + cursorOffset.reset(); + } + } + + + public abstract static class TimestampCheckingOffset extends Offset + { + final Offset baseOffset; + final NumericColumn timestamps; + final long timeLimit; + final boolean allWithinThreshold; + + TimestampCheckingOffset( + Offset baseOffset, + NumericColumn timestamps, + long timeLimit, + boolean allWithinThreshold + ) + { + this.baseOffset = baseOffset; + this.timestamps = timestamps; + this.timeLimit = timeLimit; + // checks if all the values are within the Threshold specified, skips timestamp lookups and checks if all values are within threshold. + this.allWithinThreshold = allWithinThreshold; + } + + @Override + public int getOffset() + { + return baseOffset.getOffset(); + } + + @Override + public boolean withinBounds() + { + if (!baseOffset.withinBounds()) { + return false; + } + if (allWithinThreshold) { + return true; + } + return timeInRange(timestamps.getLongSingleValueRow(baseOffset.getOffset())); + } + + @Override + public void reset() + { + baseOffset.reset(); + } + + @Override + public ReadableOffset getBaseReadableOffset() + { + return baseOffset.getBaseReadableOffset(); + } + + protected abstract boolean timeInRange(long current); + + @Override + public void increment() + { + baseOffset.increment(); + } + + @SuppressWarnings("MethodDoesntCallSuperMethod") + @Override + public Offset clone() + { + throw new IllegalStateException("clone"); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("baseOffset", baseOffset); + inspector.visit("timestamps", timestamps); + inspector.visit("allWithinThreshold", allWithinThreshold); + } + } + + public static class AscendingTimestampCheckingOffset extends TimestampCheckingOffset + { + AscendingTimestampCheckingOffset( + Offset baseOffset, + NumericColumn timestamps, + long timeLimit, + boolean allWithinThreshold + ) + { + super(baseOffset, timestamps, timeLimit, allWithinThreshold); + } + + @Override + protected final boolean timeInRange(long current) + { + return current < timeLimit; + } + + @Override + public String toString() + { + return (baseOffset.withinBounds() ? timestamps.getLongSingleValueRow(baseOffset.getOffset()) : "OOB") + + "<" + timeLimit + "::" + baseOffset; + } + + @SuppressWarnings("MethodDoesntCallSuperMethod") + @Override + public Offset clone() + { + return new AscendingTimestampCheckingOffset(baseOffset.clone(), timestamps, timeLimit, allWithinThreshold); + } + } + + public static class DescendingTimestampCheckingOffset extends TimestampCheckingOffset + { + DescendingTimestampCheckingOffset( + Offset baseOffset, + NumericColumn timestamps, + long timeLimit, + boolean allWithinThreshold + ) + { + super(baseOffset, timestamps, timeLimit, allWithinThreshold); + } + + @Override + protected final boolean timeInRange(long current) + { + return current >= timeLimit; + } + + @Override + public String toString() + { + return timeLimit + ">=" + + (baseOffset.withinBounds() ? timestamps.getLongSingleValueRow(baseOffset.getOffset()) : "OOB") + + "::" + baseOffset; + } + + @SuppressWarnings("MethodDoesntCallSuperMethod") + @Override + public Offset clone() + { + return new DescendingTimestampCheckingOffset(baseOffset.clone(), timestamps, timeLimit, allWithinThreshold); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java index 8f681781ac1..30b3fcba4df 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java @@ -27,11 +27,13 @@ import org.joda.time.Interval; public class QueryableIndexSegment extends AbstractSegment { private final QueryableIndex index; + private final QueryableIndexStorageAdapter storageAdapter; private final SegmentId segmentId; public QueryableIndexSegment(QueryableIndex index, final SegmentId segmentId) { this.index = index; + this.storageAdapter = new QueryableIndexStorageAdapter(index); this.segmentId = segmentId; } @@ -56,7 +58,7 @@ public class QueryableIndexSegment extends AbstractSegment @Override public StorageAdapter asStorageAdapter() { - return new QueryableIndexStorageAdapter(index); + return storageAdapter; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index 779b06033dd..d055b4bb2f6 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -19,22 +19,18 @@ package org.apache.druid.segment; -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.query.BaseQuery; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.column.ColumnCapabilities; @@ -43,10 +39,8 @@ import org.apache.druid.segment.column.ComplexColumn; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.data.Offset; -import org.apache.druid.segment.data.ReadableOffset; import org.apache.druid.segment.filter.AndFilter; -import org.apache.druid.segment.historical.HistoricalCursor; +import org.apache.druid.segment.vector.VectorCursor; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -55,18 +49,24 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Objects; /** */ public class QueryableIndexStorageAdapter implements StorageAdapter { + public static final int DEFAULT_VECTOR_SIZE = 512; + private final QueryableIndex index; + @Nullable + private volatile DateTime minTime; + + @Nullable + private volatile DateTime maxTime; + public QueryableIndexStorageAdapter(QueryableIndex index) { this.index = index; @@ -124,17 +124,23 @@ public class QueryableIndexStorageAdapter implements StorageAdapter @Override public DateTime getMinTime() { - try (final NumericColumn column = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn()) { - return DateTimes.utc(column.getLongSingleValueRow(0)); + if (minTime == null) { + // May be called a few times in parallel when first populating minTime, but this is benign, so allow it. + populateMinMaxTime(); } + + return minTime; } @Override public DateTime getMaxTime() { - try (final NumericColumn column = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn()) { - return DateTimes.utc(column.getLongSingleValueRow(column.length() - 1)); + if (maxTime == null) { + // May be called a few times in parallel when first populating maxTime, but this is benign, so allow it. + populateMinMaxTime(); } + + return maxTime; } @Override @@ -197,6 +203,70 @@ public class QueryableIndexStorageAdapter implements StorageAdapter return getMaxTime(); } + @Override + public boolean canVectorize( + @Nullable final Filter filter, + final VirtualColumns virtualColumns, + final boolean descending + ) + { + if (filter != null) { + final boolean filterCanVectorize = + filter.supportsBitmapIndex(makeBitmapIndexSelector(virtualColumns)) + || filter.canVectorizeMatcher(); + + if (!filterCanVectorize) { + return false; + } + } + + // 1) Virtual columns can't vectorize yet + // 2) Vector cursors can't iterate backwards yet + return virtualColumns.size() == 0 && !descending; + } + + @Override + @Nullable + public VectorCursor makeVectorCursor( + @Nullable final Filter filter, + final Interval interval, + final VirtualColumns virtualColumns, + final boolean descending, + final int vectorSize, + @Nullable final QueryMetrics queryMetrics + ) + { + if (!canVectorize(filter, virtualColumns, descending)) { + throw new ISE("Cannot vectorize. Check 'canVectorize' before calling 'makeVectorCursor'."); + } + + if (queryMetrics != null) { + queryMetrics.vectorized(true); + } + + final Interval actualInterval = computeCursorInterval(Granularities.ALL, interval); + + if (actualInterval == null) { + return null; + } + + final ColumnSelectorBitmapIndexSelector bitmapIndexSelector = makeBitmapIndexSelector(virtualColumns); + + final FilterAnalysis filterAnalysis = analyzeFilter(filter, bitmapIndexSelector, queryMetrics); + + return new QueryableIndexCursorSequenceBuilder( + index, + actualInterval, + virtualColumns, + filterAnalysis.getPreFilterBitmap(), + getMinTime().getMillis(), + getMaxTime().getMillis(), + descending, + filterAnalysis.getPostFilter(), + bitmapIndexSelector + ).buildVectorized(vectorSize > 0 ? vectorSize : DEFAULT_VECTOR_SIZE); + } + @Override public Sequence makeCursors( @Nullable Filter filter, @@ -207,25 +277,91 @@ public class QueryableIndexStorageAdapter implements StorageAdapter @Nullable QueryMetrics queryMetrics ) { + if (queryMetrics != null) { + queryMetrics.vectorized(false); + } - DateTime minTime = getMinTime(); - long minDataTimestamp = minTime.getMillis(); - DateTime maxTime = getMaxTime(); - long maxDataTimestamp = maxTime.getMillis(); - final Interval dataInterval = new Interval(minTime, gran.bucketEnd(maxTime)); + final Interval actualInterval = computeCursorInterval(gran, interval); - if (!interval.overlaps(dataInterval)) { + if (actualInterval == null) { return Sequences.empty(); } - final Interval actualInterval = interval.overlap(dataInterval); + final ColumnSelectorBitmapIndexSelector bitmapIndexSelector = makeBitmapIndexSelector(virtualColumns); - final ColumnSelectorBitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector( + final FilterAnalysis filterAnalysis = analyzeFilter(filter, bitmapIndexSelector, queryMetrics); + + return Sequences.filter( + new QueryableIndexCursorSequenceBuilder( + index, + actualInterval, + virtualColumns, + filterAnalysis.getPreFilterBitmap(), + getMinTime().getMillis(), + getMaxTime().getMillis(), + descending, + filterAnalysis.getPostFilter(), + bitmapIndexSelector + ).build(gran), + Objects::nonNull + ); + } + + @Nullable + public static ColumnCapabilities getColumnCapabilities(ColumnSelector index, String columnName) + { + final ColumnHolder columnHolder = index.getColumnHolder(columnName); + if (columnHolder == null) { + return null; + } + return columnHolder.getCapabilities(); + } + + @Override + public Metadata getMetadata() + { + return index.getMetadata(); + } + + private void populateMinMaxTime() + { + // Compute and cache minTime, maxTime. + final ColumnHolder columnHolder = index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME); + try (final NumericColumn column = (NumericColumn) columnHolder.getColumn()) { + this.minTime = DateTimes.utc(column.getLongSingleValueRow(0)); + this.maxTime = DateTimes.utc(column.getLongSingleValueRow(column.length() - 1)); + } + } + + @Nullable + private Interval computeCursorInterval(final Granularity gran, final Interval interval) + { + final DateTime minTime = getMinTime(); + final DateTime maxTime = getMaxTime(); + final Interval dataInterval = new Interval(minTime, gran.bucketEnd(maxTime)); + + if (!interval.overlaps(dataInterval)) { + return null; + } + + return interval.overlap(dataInterval); + } + + private ColumnSelectorBitmapIndexSelector makeBitmapIndexSelector(final VirtualColumns virtualColumns) + { + return new ColumnSelectorBitmapIndexSelector( index.getBitmapFactoryForDimensions(), virtualColumns, index ); + } + private FilterAnalysis analyzeFilter( + @Nullable final Filter filter, + ColumnSelectorBitmapIndexSelector bitmapIndexSelector, + @Nullable QueryMetrics queryMetrics + ) + { final int totalRows = index.getNumRows(); /* @@ -242,20 +378,18 @@ public class QueryableIndexStorageAdapter implements StorageAdapter * * Any subfilters that cannot be processed entirely with bitmap indexes will be moved to the post-filtering stage. */ - final Offset offset; final List preFilters; final List postFilters = new ArrayList<>(); int preFilteredRows = totalRows; if (filter == null) { preFilters = Collections.emptyList(); - offset = descending ? new SimpleDescendingOffset(totalRows) : new SimpleAscendingOffset(totalRows); } else { preFilters = new ArrayList<>(); if (filter instanceof AndFilter) { // If we get an AndFilter, we can split the subfilters across both filtering stages for (Filter subfilter : ((AndFilter) filter).getFilters()) { - if (subfilter.supportsBitmapIndex(selector)) { + if (subfilter.supportsBitmapIndex(bitmapIndexSelector)) { preFilters.add(subfilter); } else { postFilters.add(subfilter); @@ -263,33 +397,29 @@ public class QueryableIndexStorageAdapter implements StorageAdapter } } else { // If we get an OrFilter or a single filter, handle the filter in one stage - if (filter.supportsBitmapIndex(selector)) { + if (filter.supportsBitmapIndex(bitmapIndexSelector)) { preFilters.add(filter); } else { postFilters.add(filter); } } + } - if (preFilters.size() == 0) { - offset = descending ? new SimpleDescendingOffset(totalRows) : new SimpleAscendingOffset(totalRows); + final ImmutableBitmap preFilterBitmap; + if (preFilters.isEmpty()) { + preFilterBitmap = null; + } else { + if (queryMetrics != null) { + BitmapResultFactory bitmapResultFactory = + queryMetrics.makeBitmapResultFactory(bitmapIndexSelector.getBitmapFactory()); + long bitmapConstructionStartNs = System.nanoTime(); + // Use AndFilter.getBitmapResult to intersect the preFilters to get its short-circuiting behavior. + preFilterBitmap = AndFilter.getBitmapIndex(bitmapIndexSelector, bitmapResultFactory, preFilters); + preFilteredRows = preFilterBitmap.size(); + queryMetrics.reportBitmapConstructionTime(System.nanoTime() - bitmapConstructionStartNs); } else { - if (queryMetrics != null) { - BitmapResultFactory bitmapResultFactory = - queryMetrics.makeBitmapResultFactory(selector.getBitmapFactory()); - long bitmapConstructionStartNs = System.nanoTime(); - // Use AndFilter.getBitmapResult to intersect the preFilters to get its short-circuiting behavior. - ImmutableBitmap bitmapIndex = AndFilter.getBitmapIndex(selector, bitmapResultFactory, preFilters); - preFilteredRows = bitmapIndex.size(); - offset = BitmapOffset.of(bitmapIndex, descending, totalRows); - queryMetrics.reportBitmapConstructionTime(System.nanoTime() - bitmapConstructionStartNs); - } else { - BitmapResultFactory bitmapResultFactory = new DefaultBitmapResultFactory(selector.getBitmapFactory()); - offset = BitmapOffset.of( - AndFilter.getBitmapIndex(selector, bitmapResultFactory, preFilters), - descending, - totalRows - ); - } + BitmapResultFactory bitmapResultFactory = new DefaultBitmapResultFactory(bitmapIndexSelector.getBitmapFactory()); + preFilterBitmap = AndFilter.getBitmapIndex(bitmapIndexSelector, bitmapResultFactory, preFilters); } } @@ -309,388 +439,33 @@ public class QueryableIndexStorageAdapter implements StorageAdapter queryMetrics.reportPreFilteredRows(preFilteredRows); } - return Sequences.filter( - new CursorSequenceBuilder( - this, - actualInterval, - virtualColumns, - gran, - offset, - minDataTimestamp, - maxDataTimestamp, - descending, - postFilter, - selector - ).build(), - Objects::nonNull - ); + return new FilterAnalysis(preFilterBitmap, postFilter); } - @Nullable - static ColumnCapabilities getColumnCapabilities(ColumnSelector index, String columnName) + private static class FilterAnalysis { - ColumnHolder columnHolder = index.getColumnHolder(columnName); - if (columnHolder == null) { - return null; - } - return columnHolder.getCapabilities(); - } - - private static class CursorSequenceBuilder - { - private final QueryableIndex index; - private final Interval interval; - private final VirtualColumns virtualColumns; - private final Granularity gran; - private final Offset offset; - private final long minDataTimestamp; - private final long maxDataTimestamp; - private final boolean descending; - @Nullable private final Filter postFilter; - private final ColumnSelectorBitmapIndexSelector bitmapIndexSelector; + private final ImmutableBitmap preFilterBitmap; - public CursorSequenceBuilder( - QueryableIndexStorageAdapter storageAdapter, - Interval interval, - VirtualColumns virtualColumns, - Granularity gran, - Offset offset, - long minDataTimestamp, - long maxDataTimestamp, - boolean descending, - @Nullable Filter postFilter, - ColumnSelectorBitmapIndexSelector bitmapIndexSelector + public FilterAnalysis( + @Nullable final ImmutableBitmap preFilterBitmap, + @Nullable final Filter postFilter ) { - this.index = storageAdapter.index; - this.interval = interval; - this.virtualColumns = virtualColumns; - this.gran = gran; - this.offset = offset; - this.minDataTimestamp = minDataTimestamp; - this.maxDataTimestamp = maxDataTimestamp; - this.descending = descending; + this.preFilterBitmap = preFilterBitmap; this.postFilter = postFilter; - this.bitmapIndexSelector = bitmapIndexSelector; } - public Sequence build() + @Nullable + public ImmutableBitmap getPreFilterBitmap() { - final Offset baseOffset = offset.clone(); - - // Column caches shared amongst all cursors in this sequence. - final Map columnCache = new HashMap<>(); - - final NumericColumn timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn(); - - final Closer closer = Closer.create(); - closer.register(timestamps); - - Iterable iterable = gran.getIterable(interval); - if (descending) { - iterable = Lists.reverse(ImmutableList.copyOf(iterable)); - } - - return Sequences.withBaggage( - Sequences.map( - Sequences.simple(iterable), - new Function() - { - @Override - public Cursor apply(final Interval inputInterval) - { - final long timeStart = Math.max(interval.getStartMillis(), inputInterval.getStartMillis()); - final long timeEnd = Math.min( - interval.getEndMillis(), - gran.increment(inputInterval.getStart()).getMillis() - ); - - if (descending) { - for (; baseOffset.withinBounds(); baseOffset.increment()) { - if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) < timeEnd) { - break; - } - } - } else { - for (; baseOffset.withinBounds(); baseOffset.increment()) { - if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) >= timeStart) { - break; - } - } - } - - final Offset offset = descending ? - new DescendingTimestampCheckingOffset( - baseOffset, - timestamps, - timeStart, - minDataTimestamp >= timeStart - ) : - new AscendingTimestampCheckingOffset( - baseOffset, - timestamps, - timeEnd, - maxDataTimestamp < timeEnd - ); - - - final Offset baseCursorOffset = offset.clone(); - final ColumnSelectorFactory columnSelectorFactory = new QueryableIndexColumnSelectorFactory( - index, - virtualColumns, - descending, - closer, - baseCursorOffset.getBaseReadableOffset(), - columnCache - ); - final DateTime myBucket = gran.toDateTime(inputInterval.getStartMillis()); - - if (postFilter == null) { - return new QueryableIndexCursor(baseCursorOffset, columnSelectorFactory, myBucket); - } else { - FilteredOffset filteredOffset = new FilteredOffset( - baseCursorOffset, - columnSelectorFactory, - descending, - postFilter, - bitmapIndexSelector - ); - return new QueryableIndexCursor(filteredOffset, columnSelectorFactory, myBucket); - } - - } - } - ), - closer - ); + return preFilterBitmap; } - } - private static class QueryableIndexCursor implements HistoricalCursor - { - private final Offset cursorOffset; - private final ColumnSelectorFactory columnSelectorFactory; - private final DateTime bucketStart; - - QueryableIndexCursor(Offset cursorOffset, ColumnSelectorFactory columnSelectorFactory, DateTime bucketStart) + @Nullable + public Filter getPostFilter() { - this.cursorOffset = cursorOffset; - this.columnSelectorFactory = columnSelectorFactory; - this.bucketStart = bucketStart; + return postFilter; } - - @Override - public Offset getOffset() - { - return cursorOffset; - } - - @Override - public ColumnSelectorFactory getColumnSelectorFactory() - { - return columnSelectorFactory; - } - - @Override - public DateTime getTime() - { - return bucketStart; - } - - @Override - public void advance() - { - cursorOffset.increment(); - // Must call BaseQuery.checkInterrupted() after cursorOffset.increment(), not before, because - // FilteredOffset.increment() is a potentially long, not an "instant" operation (unlike to all other subclasses - // of Offset) and it returns early on interruption, leaving itself in an illegal state. We should not let - // aggregators, etc. access this illegal state and throw a QueryInterruptedException by calling - // BaseQuery.checkInterrupted(). - BaseQuery.checkInterrupted(); - } - - @Override - public void advanceUninterruptibly() - { - cursorOffset.increment(); - } - - @Override - public void advanceTo(int offset) - { - int count = 0; - while (count < offset && !isDone()) { - advance(); - count++; - } - } - - @Override - public boolean isDone() - { - return !cursorOffset.withinBounds(); - } - - @Override - public boolean isDoneOrInterrupted() - { - return isDone() || Thread.currentThread().isInterrupted(); - } - - @Override - public void reset() - { - cursorOffset.reset(); - } - } - - public abstract static class TimestampCheckingOffset extends Offset - { - final Offset baseOffset; - final NumericColumn timestamps; - final long timeLimit; - final boolean allWithinThreshold; - - TimestampCheckingOffset( - Offset baseOffset, - NumericColumn timestamps, - long timeLimit, - boolean allWithinThreshold - ) - { - this.baseOffset = baseOffset; - this.timestamps = timestamps; - this.timeLimit = timeLimit; - // checks if all the values are within the Threshold specified, skips timestamp lookups and checks if all values - // are within threshold. - this.allWithinThreshold = allWithinThreshold; - } - - @Override - public int getOffset() - { - return baseOffset.getOffset(); - } - - @Override - public boolean withinBounds() - { - if (!baseOffset.withinBounds()) { - return false; - } - if (allWithinThreshold) { - return true; - } - return timeInRange(timestamps.getLongSingleValueRow(baseOffset.getOffset())); - } - - @Override - public void reset() - { - baseOffset.reset(); - } - - @Override - public ReadableOffset getBaseReadableOffset() - { - return baseOffset.getBaseReadableOffset(); - } - - protected abstract boolean timeInRange(long current); - - @Override - public void increment() - { - baseOffset.increment(); - } - - @SuppressWarnings("MethodDoesntCallSuperMethod") - @Override - public Offset clone() - { - throw new IllegalStateException("clone"); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("baseOffset", baseOffset); - inspector.visit("timestamps", timestamps); - inspector.visit("allWithinThreshold", allWithinThreshold); - } - } - - public static class AscendingTimestampCheckingOffset extends TimestampCheckingOffset - { - AscendingTimestampCheckingOffset( - Offset baseOffset, - NumericColumn timestamps, - long timeLimit, - boolean allWithinThreshold - ) - { - super(baseOffset, timestamps, timeLimit, allWithinThreshold); - } - - @Override - protected final boolean timeInRange(long current) - { - return current < timeLimit; - } - - @Override - public String toString() - { - return (baseOffset.withinBounds() ? timestamps.getLongSingleValueRow(baseOffset.getOffset()) : "OOB") + - "<" + timeLimit + "::" + baseOffset; - } - - @SuppressWarnings("MethodDoesntCallSuperMethod") - @Override - public Offset clone() - { - return new AscendingTimestampCheckingOffset(baseOffset.clone(), timestamps, timeLimit, allWithinThreshold); - } - } - - public static class DescendingTimestampCheckingOffset extends TimestampCheckingOffset - { - DescendingTimestampCheckingOffset( - Offset baseOffset, - NumericColumn timestamps, - long timeLimit, - boolean allWithinThreshold - ) - { - super(baseOffset, timestamps, timeLimit, allWithinThreshold); - } - - @Override - protected final boolean timeInRange(long current) - { - return current >= timeLimit; - } - - @Override - public String toString() - { - return timeLimit + ">=" + - (baseOffset.withinBounds() ? timestamps.getLongSingleValueRow(baseOffset.getOffset()) : "OOB") + - "::" + baseOffset; - } - - @SuppressWarnings("MethodDoesntCallSuperMethod") - @Override - public Offset clone() - { - return new DescendingTimestampCheckingOffset(baseOffset.clone(), timestamps, timeLimit, allWithinThreshold); - } - } - - @Override - public Metadata getMetadata() - { - return index.getMetadata(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index 1d0d5bf0246..4e7876b9a83 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -256,6 +256,11 @@ public class VirtualColumns implements Cacheable return virtualColumns.toArray(new VirtualColumn[0]); } + public int size() + { + return virtualColumns.size(); + } + public ColumnSelectorFactory wrap(final ColumnSelectorFactory baseFactory) { return new VirtualizedColumnSelectorFactory(baseFactory, this); diff --git a/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java b/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java index d692725fd12..f22693365e1 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java @@ -19,12 +19,26 @@ package org.apache.druid.segment.column; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; import java.io.Closeable; public interface BaseColumn extends Closeable { ColumnValueSelector makeColumnValueSelector(ReadableOffset offset); + + default VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) + { + throw new UOE("Cannot make VectorValueSelector for column with class[%s]", getClass().getName()); + } + + default VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) + { + throw new UOE("Cannot make VectorObjectSelector for column with class[%s]", getClass().getName()); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java index 5141dba7308..620a0f6e45c 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.java.util.common.ISE; /** + * */ public class ColumnCapabilitiesImpl implements ColumnCapabilities { @@ -38,6 +39,13 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities @JsonIgnore private boolean filterable; + public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other) + { + final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); + capabilities.merge(other); + return capabilities; + } + @JsonIgnore private boolean complete = false; diff --git a/processing/src/main/java/org/apache/druid/segment/column/ComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/column/ComplexColumn.java index acdfcd6b299..1f3c080271b 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ComplexColumn.java @@ -24,6 +24,8 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.ObjectColumnSelector; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; @@ -96,4 +98,54 @@ public interface ComplexColumn extends BaseColumn } }; } + + @Override + default VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) + { + return new VectorObjectSelector() + { + final Object[] vector = new Object[offset.getMaxVectorSize()]; + + private int id = ReadableVectorOffset.NULL_ID; + + @Override + public Object[] getObjectVector() + { + if (id == offset.getId()) { + return vector; + } + + if (offset.isContiguous()) { + final int startOffset = offset.getStartOffset(); + final int vectorSize = offset.getCurrentVectorSize(); + + for (int i = 0; i < vectorSize; i++) { + vector[i] = getRowValue(startOffset + i); + } + } else { + final int[] offsets = offset.getOffsets(); + final int vectorSize = offset.getCurrentVectorSize(); + + for (int i = 0; i < vectorSize; i++) { + vector[i] = getRowValue(offsets[i]); + } + } + + id = offset.getId(); + return vector; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + }; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedColumn.java index 2e67a44ad4f..229d7e35b8d 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/DictionaryEncodedColumn.java @@ -24,6 +24,9 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import javax.annotation.Nullable; @@ -32,11 +35,17 @@ import javax.annotation.Nullable; public interface DictionaryEncodedColumn> extends BaseColumn { int length(); + boolean hasMultipleValues(); + int getSingleValueRow(int rowNum); + IndexedInts getMultiValueRow(int rowNum); + ActualType lookupName(int id); + int lookupId(ActualType name); + int getCardinality(); DimensionSelector makeDimensionSelector(ReadableOffset offset, @Nullable ExtractionFn extractionFn); @@ -46,4 +55,8 @@ public interface DictionaryEncodedColumn { @Nullable @@ -318,6 +321,164 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn { private final GenericIndexed> baseDoubleBuffers; + + // The number of rows in this column. private final int totalSize; + + // The number of doubles per buffer. private final int sizePer; public BlockLayoutColumnarDoublesSupplier( @@ -80,7 +84,9 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier holder; - /** doubleBuffer's position must be 0 */ + /** + * doubleBuffer's position must be 0 + */ DoubleBuffer doubleBuffer; @Override @@ -103,6 +109,63 @@ public class BlockLayoutColumnarDoublesSupplier implements Supplier= sizePer) { + break; + } + + out[i] = doubleBuffer.get(index); + } + + assert i > p; + p = i; + } + } + protected void loadBuffer(int bufferNum) { CloseQuietly.close(holder); diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java index 5673164131e..a7a8deaec1c 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarFloatsSupplier.java @@ -21,7 +21,6 @@ package org.apache.druid.segment.data; import com.google.common.base.Supplier; import org.apache.druid.collections.ResourceHolder; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.CloseQuietly; import java.nio.ByteBuffer; @@ -31,7 +30,11 @@ import java.nio.FloatBuffer; public class BlockLayoutColumnarFloatsSupplier implements Supplier { private final GenericIndexed> baseFloatBuffers; + + // The number of rows in this column. private final int totalSize; + + // The number of floats per buffer. private final int sizePer; public BlockLayoutColumnarFloatsSupplier( @@ -81,7 +84,9 @@ public class BlockLayoutColumnarFloatsSupplier implements Supplier holder; - /** floatBuffer's position must be 0 */ + /** + * floatBuffer's position must be 0 + */ FloatBuffer floatBuffer; @Override @@ -105,17 +110,59 @@ public class BlockLayoutColumnarFloatsSupplier implements Supplier= sizePer) { + break; + } + + out[i] = floatBuffer.get(index); + } + + assert i > p; + p = i; } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java index b8be1de9627..808e7bedce7 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java @@ -21,7 +21,6 @@ package org.apache.druid.segment.data; import com.google.common.base.Supplier; import org.apache.druid.collections.ResourceHolder; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.CloseQuietly; import java.nio.ByteBuffer; @@ -31,7 +30,11 @@ import java.nio.LongBuffer; public class BlockLayoutColumnarLongsSupplier implements Supplier { private final GenericIndexed> baseLongBuffers; + + // The number of rows in this column. private final int totalSize; + + // The number of longs per buffer. private final int sizePer; private final CompressionFactory.LongEncodingReader baseReader; @@ -85,6 +88,7 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier buffer = holder.get(); // asLongBuffer() makes the longBuffer's position = 0 longBuffer = buffer.asLongBuffer(); + reader.setBuffer(buffer); currBufferNum = bufferNum; } }; @@ -120,7 +124,9 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier int currBufferNum = -1; ResourceHolder holder; ByteBuffer buffer; - /** longBuffer's position must be 0 */ + /** + * longBuffer's position must be 0 + */ LongBuffer longBuffer; @Override @@ -144,17 +150,41 @@ public class BlockLayoutColumnarLongsSupplier implements Supplier } @Override - public void fill(int index, long[] toFill) + public void get(final long[] out, final int start, final int length) { - if (totalSize - index < toFill.length) { - throw new IndexOutOfBoundsException( - StringUtils.format( - "Cannot fill array of size[%,d] at index[%,d]. Max size[%,d]", toFill.length, index, totalSize - ) - ); + // division + remainder is optimized by the compiler so keep those together + int bufferNum = start / sizePer; + int bufferIndex = start % sizePer; + + int p = 0; + + while (p < length) { + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int limit = Math.min(length - p, sizePer - bufferIndex); + reader.read(out, p, bufferIndex, limit); + p += limit; + bufferNum++; + bufferIndex = 0; } - for (int i = 0; i < toFill.length; i++) { - toFill[i] = get(index + i); + } + + @Override + public void get(final long[] out, final int[] indexes, final int length) + { + int p = 0; + + while (p < length) { + int bufferNum = indexes[p] / sizePer; + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int numRead = reader.read(out, p, indexes, length - p, bufferNum * sizePer, sizePer); + assert numRead > 0; + p += numRead; } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarDoubles.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarDoubles.java index 39e9cf03aba..4f357af4ecb 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarDoubles.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarDoubles.java @@ -25,7 +25,12 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DoubleColumnSelector; import org.apache.druid.segment.historical.HistoricalColumnSelector; +import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorSelectorUtils; +import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; import java.io.Closeable; /** @@ -38,6 +43,20 @@ public interface ColumnarDoubles extends Closeable double get(int index); + default void get(double[] out, int start, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(i + start); + } + } + + default void get(double[] out, int[] indexes, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(indexes[i]); + } + } + @Override void close(); @@ -106,5 +125,60 @@ public interface ColumnarDoubles extends Closeable return new HistoricalDoubleColumnSelectorWithNulls(); } } -} + default VectorValueSelector makeVectorValueSelector( + final ReadableVectorOffset theOffset, + final ImmutableBitmap nullValueBitmap + ) + { + class ColumnarDoublesVectorValueSelector extends BaseDoubleVectorValueSelector + { + private final double[] doubleVector; + + private int id = ReadableVectorOffset.NULL_ID; + + @Nullable + private boolean[] nullVector = null; + + private ColumnarDoublesVectorValueSelector() + { + super(theOffset); + this.doubleVector = new double[offset.getMaxVectorSize()]; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeVectorsIfNeeded(); + return nullVector; + } + + @Override + public double[] getDoubleVector() + { + computeVectorsIfNeeded(); + return doubleVector; + } + + private void computeVectorsIfNeeded() + { + if (id == offset.getId()) { + return; + } + + if (offset.isContiguous()) { + ColumnarDoubles.this.get(doubleVector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + ColumnarDoubles.this.get(doubleVector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + + nullVector = VectorSelectorUtils.populateNullVector(nullVector, offset, nullValueBitmap); + + id = offset.getId(); + } + } + + return new ColumnarDoublesVectorValueSelector(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarFloats.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarFloats.java index f8441a27b68..adcffb4597a 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarFloats.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarFloats.java @@ -25,7 +25,12 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.FloatColumnSelector; import org.apache.druid.segment.historical.HistoricalColumnSelector; +import org.apache.druid.segment.vector.BaseFloatVectorValueSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorSelectorUtils; +import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; import java.io.Closeable; /** @@ -38,7 +43,19 @@ public interface ColumnarFloats extends Closeable float get(int index); - void fill(int index, float[] toFill); + default void get(float[] out, int start, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(i + start); + } + } + + default void get(float[] out, int[] indexes, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(indexes[i]); + } + } @Override void close(); @@ -108,4 +125,60 @@ public interface ColumnarFloats extends Closeable return new HistoricalFloatColumnSelectorwithNulls(); } } + + default VectorValueSelector makeVectorValueSelector( + final ReadableVectorOffset theOffset, + final ImmutableBitmap nullValueBitmap + ) + { + class ColumnarFloatsVectorValueSelector extends BaseFloatVectorValueSelector + { + private final float[] floatVector; + + private int id = ReadableVectorOffset.NULL_ID; + + @Nullable + private boolean[] nullVector = null; + + private ColumnarFloatsVectorValueSelector() + { + super(theOffset); + this.floatVector = new float[offset.getMaxVectorSize()]; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeVectorsIfNeeded(); + return nullVector; + } + + @Override + public float[] getFloatVector() + { + computeVectorsIfNeeded(); + return floatVector; + } + + private void computeVectorsIfNeeded() + { + if (id == offset.getId()) { + return; + } + + if (offset.isContiguous()) { + ColumnarFloats.this.get(floatVector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + ColumnarFloats.this.get(floatVector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + + nullVector = VectorSelectorUtils.populateNullVector(nullVector, offset, nullValueBitmap); + + id = offset.getId(); + } + } + + return new ColumnarFloatsVectorValueSelector(); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java index 00ff086ddd8..41a3ac10410 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java @@ -25,7 +25,12 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.historical.HistoricalColumnSelector; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.VectorSelectorUtils; +import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; import java.io.Closeable; /** @@ -38,7 +43,19 @@ public interface ColumnarLongs extends Closeable long get(int index); - void fill(int index, long[] toFill); + default void get(long[] out, int start, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(i + start); + } + } + + default void get(long[] out, int[] indexes, int length) + { + for (int i = 0; i < length; i++) { + out[i] = get(indexes[i]); + } + } @Override void close(); @@ -108,4 +125,60 @@ public interface ColumnarLongs extends Closeable return new HistoricalLongColumnSelectorWithNulls(); } } + + default VectorValueSelector makeVectorValueSelector( + final ReadableVectorOffset theOffset, + final ImmutableBitmap nullValueBitmap + ) + { + class ColumnarLongsVectorValueSelector extends BaseLongVectorValueSelector + { + private final long[] longVector; + + private int id = ReadableVectorOffset.NULL_ID; + + @Nullable + private boolean[] nullVector = null; + + private ColumnarLongsVectorValueSelector() + { + super(theOffset); + this.longVector = new long[offset.getMaxVectorSize()]; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeVectorsIfNeeded(); + return nullVector; + } + + @Override + public long[] getLongVector() + { + computeVectorsIfNeeded(); + return longVector; + } + + private void computeVectorsIfNeeded() + { + if (id == offset.getId()) { + return; + } + + if (offset.isContiguous()) { + ColumnarLongs.this.get(longVector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + ColumnarLongs.this.get(longVector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + + nullVector = VectorSelectorUtils.populateNullVector(nullVector, offset, nullValueBitmap); + + id = offset.getId(); + } + } + + return new ColumnarLongsVectorValueSelector(); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarMultiInts.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarMultiInts.java index 8392e133ff7..0064e126cd4 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarMultiInts.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarMultiInts.java @@ -27,6 +27,16 @@ import java.io.Closeable; */ public interface ColumnarMultiInts extends Indexed, Closeable { + /** + * Returns the values at a given row index. The IndexedInts object may potentially be reused, so callers should + * not keep references to it. + */ @Override IndexedInts get(int index); + + /** + * Returns the values at a given row index. The IndexedInts object will not be reused. This method may be less + * efficient than plain "get". + */ + IndexedInts getUnshared(int index); } diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarIntsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarIntsSupplier.java index 335dabfe154..7f10c103b29 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarIntsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarIntsSupplier.java @@ -289,7 +289,9 @@ public class CompressedVSizeColumnarIntsSupplier implements WritableSupplier holder; - /** buffer's position must be 0 */ + /** + * buffer's position must be 0 + */ ByteBuffer buffer; boolean bigEndian; @@ -322,6 +324,66 @@ public class CompressedVSizeColumnarIntsSupplier implements WritableSupplier> div; + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int currBufferStart = bufferNum * sizePer; + final int nextBufferStart = currBufferStart + sizePer; + + int i; + for (i = p; i < length; i++) { + final int index = start + i; + if (index >= nextBufferStart) { + break; + } + + out[i] = _get(buffer, bigEndian, index - currBufferStart); + } + + assert i > p; + p = i; + } + } + + @Override + public void get(final int[] out, final int[] indexes, final int length) + { + int p = 0; + + while (p < length) { + // assumes the number of entries in each buffer is a power of 2 + final int bufferNum = indexes[p] >> div; + if (bufferNum != currBufferNum) { + loadBuffer(bufferNum); + } + + final int currBufferStart = bufferNum * sizePer; + final int nextBufferStart = currBufferStart + sizePer; + + int i; + for (i = p; i < length; i++) { + final int index = indexes[i]; + if (index >= nextBufferStart) { + break; + } + + out[i] = _get(buffer, bigEndian, index - currBufferStart); + } + + assert i > p; + p = i; + } + } + /** * Returns the value at the given bufferIndex in the current decompression buffer * diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java index 4df38560646..0017e8d95e3 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVSizeColumnarMultiIntsSupplier.java @@ -46,9 +46,13 @@ public class CompressedVSizeColumnarMultiIntsSupplier implements WritableSupplie { private static final byte version = 0x2; - /** See class-level comment */ + /** + * See class-level comment + */ private final CompressedVSizeColumnarIntsSupplier offsetSupplier; - /** See class-level comment */ + /** + * See class-level comment + */ private final CompressedVSizeColumnarIntsSupplier valueSupplier; private CompressedVSizeColumnarMultiIntsSupplier( @@ -178,6 +182,39 @@ public class CompressedVSizeColumnarMultiIntsSupplier implements WritableSupplie return rowValues; } + @Override + public IndexedInts getUnshared(int index) + { + final int offset = offsets.get(index); + final int size = offsets.get(index + 1) - offset; + + class UnsharedIndexedInts implements IndexedInts + { + @Override + public int size() + { + return size; + } + + @Override + public int get(int index) + { + if (index >= size) { + throw new IAE("Index[%d] >= size[%d]", index, size); + } + return values.get(index + offset); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("values", values); + } + } + + return new UnsharedIndexedInts(); + } + @Override public int indexOf(IndexedInts value) { diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java b/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java index b204bfd97af..b7e7c96b26b 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java @@ -283,6 +283,27 @@ public class CompressionFactory long read(int index); + default void read(long[] out, int outPosition, int startIndex, int length) + { + for (int i = 0; i < length; i++) { + out[outPosition + i] = read(startIndex + i); + } + } + + default int read(long[] out, int outPosition, int[] indexes, int length, int indexOffset, int limit) + { + for (int i = 0; i < length; i++) { + int index = indexes[outPosition + i] - indexOffset; + if (index >= limit) { + return i; + } + + out[outPosition + i] = read(index); + } + + return length; + } + LongEncodingReader duplicate(); } diff --git a/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarFloatsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarFloatsSupplier.java index 612f8b08ef5..05e35b539fb 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarFloatsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/EntireLayoutColumnarFloatsSupplier.java @@ -20,7 +20,6 @@ package org.apache.druid.segment.data; import com.google.common.base.Supplier; -import org.apache.druid.java.util.common.StringUtils; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -58,21 +57,6 @@ public class EntireLayoutColumnarFloatsSupplier implements Supplier { @@ -55,21 +54,6 @@ public class EntireLayoutColumnarLongsSupplier implements Supplier 0); if (baseMatchers.length == 1) { @@ -211,5 +232,32 @@ public class AndFilter implements BooleanFilter }; } + private static VectorValueMatcher makeVectorMatcher(final VectorValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } + return new BaseVectorValueMatcher(baseMatchers[0]) + { + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + ReadableVectorMatch match = mask; + + for (VectorValueMatcher matcher : baseMatchers) { + if (match.isAllFalse()) { + // Short-circuit if the entire vector is false. + break; + } + + match = matcher.match(match); + } + + assert match.isValid(mask); + return match; + } + }; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index 6b7b6fdc1a5..28b8548f9d7 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -35,11 +35,15 @@ import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.IntListUtils; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.Comparator; @@ -124,6 +128,22 @@ public class BoundFilter implements Filter return Filters.makeValueMatcher(factory, boundDimFilter.getDimension(), getPredicateFactory()); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + boundDimFilter.getDimension(), + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(getPredicateFactory()); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java index fe2a6a647e1..fbcff2a2daf 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/DimensionPredicateFilter.java @@ -31,8 +31,12 @@ import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; /** */ @@ -100,6 +104,22 @@ public class DimensionPredicateFilter implements Filter return Filters.makeValueMatcher(factory, dimension, predicateFactory); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(predicateFactory); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java index 18a2db358c2..8186781ee8a 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java @@ -33,10 +33,14 @@ import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.IntIteratorUtils; import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.Iterator; import java.util.Set; @@ -142,6 +146,22 @@ public class InFilter implements Filter return Filters.makeValueMatcher(factory, dimension, getPredicateFactory()); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(getPredicateFactory()); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java index 018102a1833..765bcb430fd 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/LikeFilter.java @@ -30,11 +30,15 @@ import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.io.IOException; import java.io.UncheckedIOException; @@ -75,6 +79,22 @@ public class LikeFilter implements Filter return Filters.makeValueMatcher(factory, dimension, likeMatcher.predicateFactory(extractionFn)); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(likeMatcher.predicateFactory(extractionFn)); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java index 20ed4ffdc91..ef12e169334 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java @@ -23,9 +23,14 @@ import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.BaseVectorValueMatcher; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; /** */ @@ -70,6 +75,34 @@ public class NotFilter implements Filter }; } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + final VectorValueMatcher baseMatcher = baseFilter.makeVectorMatcher(factory); + + return new BaseVectorValueMatcher(baseMatcher) + { + final VectorMatch scratch = VectorMatch.wrap(new int[factory.getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + final ReadableVectorMatch baseMatch = baseMatcher.match(mask); + + scratch.copyFrom(mask); + scratch.removeAll(baseMatch); + assert scratch.isValid(mask); + return scratch; + } + }; + } + + @Override + public boolean canVectorizeMatcher() + { + return baseFilter.canVectorizeMatcher(); + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 70d0532d17b..01d82298e40 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -29,9 +29,14 @@ import org.apache.druid.query.filter.BooleanFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.RowOffsetMatcherFactory; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.BaseVectorValueMatcher; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import java.util.ArrayList; import java.util.List; @@ -77,6 +82,23 @@ public class OrFilter implements BooleanFilter return makeMatcher(matchers); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + final VectorValueMatcher[] matchers = new VectorValueMatcher[filters.size()]; + + for (int i = 0; i < filters.size(); i++) { + matchers[i] = filters.get(i).makeVectorMatcher(factory); + } + return makeVectorMatcher(matchers); + } + + @Override + public boolean canVectorizeMatcher() + { + return filters.stream().allMatch(Filter::canVectorizeMatcher); + } + @Override public ValueMatcher makeMatcher( BitmapIndexSelector selector, @@ -105,39 +127,6 @@ public class OrFilter implements BooleanFilter return makeMatcher(matchers.toArray(AndFilter.EMPTY_VALUE_MATCHER_ARRAY)); } - - private ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) - { - Preconditions.checkState(baseMatchers.length > 0); - - if (baseMatchers.length == 1) { - return baseMatchers[0]; - } - - return new ValueMatcher() - { - @Override - public boolean matches() - { - for (ValueMatcher matcher : baseMatchers) { - if (matcher.matches()) { - return true; - } - } - return false; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("firstBaseMatcher", baseMatchers[0]); - inspector.visit("secondBaseMatcher", baseMatchers[1]); - // Don't inspect the 3rd and all consequent baseMatchers, cut runtime shape combinations at this point. - // Anyway if the filter is so complex, Hotspot won't inline all calls because of the inline limit. - } - }; - } - @Override public List getFilters() { @@ -182,4 +171,74 @@ public class OrFilter implements BooleanFilter { return StringUtils.format("(%s)", OR_JOINER.join(filters)); } + + private static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } + + return new ValueMatcher() + { + @Override + public boolean matches() + { + for (ValueMatcher matcher : baseMatchers) { + if (matcher.matches()) { + return true; + } + } + return false; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("firstBaseMatcher", baseMatchers[0]); + inspector.visit("secondBaseMatcher", baseMatchers[1]); + // Don't inspect the 3rd and all consequent baseMatchers, cut runtime shape combinations at this point. + // Anyway if the filter is so complex, Hotspot won't inline all calls because of the inline limit. + } + }; + } + + private static VectorValueMatcher makeVectorMatcher(final VectorValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } + + return new BaseVectorValueMatcher(baseMatchers[0]) + { + final VectorMatch currentMask = VectorMatch.wrap(new int[getMaxVectorSize()]); + final VectorMatch scratch = VectorMatch.wrap(new int[getMaxVectorSize()]); + final VectorMatch retVal = VectorMatch.wrap(new int[getMaxVectorSize()]); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask) + { + ReadableVectorMatch currentMatch = baseMatchers[0].match(mask); + + currentMask.copyFrom(mask); + retVal.copyFrom(currentMatch); + + for (int i = 1; i < baseMatchers.length; i++) { + if (retVal.isAllTrue(getCurrentVectorSize())) { + // Short-circuit if the entire vector is true. + break; + } + + currentMask.removeAll(currentMatch); + currentMatch = baseMatchers[i].match(currentMask); + retVal.addAll(currentMatch, scratch); + } + + assert retVal.isValid(mask); + return retVal; + } + }; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java index fe83220fc35..3640b7d3a51 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SelectorFilter.java @@ -24,8 +24,12 @@ import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnStrategizer; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; /** */ @@ -55,6 +59,16 @@ public class SelectorFilter implements Filter return Filters.makeValueMatcher(factory, dimension, value); } + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnStrategizer.instance(), + factory + ).makeMatcher(value); + } + @Override public boolean supportsBitmapIndex(BitmapIndexSelector selector) { @@ -73,6 +87,12 @@ public class SelectorFilter implements Filter return (double) indexSelector.getBitmapIndex(dimension, value).size() / indexSelector.getNumRows(); } + @Override + public boolean canVectorizeMatcher() + { + return true; + } + @Override public String toString() { diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java index e1b04c39f58..69c71641c85 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java @@ -38,19 +38,21 @@ import javax.annotation.Nullable; */ class IncrementalIndexColumnSelectorFactory implements ColumnSelectorFactory { + private final IncrementalIndexStorageAdapter adapter; private final IncrementalIndex index; private final VirtualColumns virtualColumns; private final boolean descending; private final IncrementalIndexRowHolder rowHolder; IncrementalIndexColumnSelectorFactory( - IncrementalIndex index, + IncrementalIndexStorageAdapter adapter, VirtualColumns virtualColumns, boolean descending, IncrementalIndexRowHolder rowHolder ) { - this.index = index; + this.adapter = adapter; + this.index = adapter.index; this.virtualColumns = virtualColumns; this.descending = descending; this.rowHolder = rowHolder; @@ -126,6 +128,7 @@ class IncrementalIndexColumnSelectorFactory implements ColumnSelectorFactory return virtualColumns.getColumnCapabilities(columnName); } - return index.getCapabilities(columnName); + // Use adapter.getColumnCapabilities instead of index.getCapabilities (see note in IncrementalIndexStorageAdapater) + return adapter.getColumnCapabilities(columnName); } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexRowIterator.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexRowIterator.java index 627c63f2d8a..639b6abe175 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexRowIterator.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexRowIterator.java @@ -64,7 +64,12 @@ class IncrementalIndexRowIterator implements TransformableRowIterator ) { ColumnSelectorFactory columnSelectorFactory = - new IncrementalIndexColumnSelectorFactory(incrementalIndex, VirtualColumns.EMPTY, false, rowHolder); + new IncrementalIndexColumnSelectorFactory( + new IncrementalIndexStorageAdapter(incrementalIndex), + VirtualColumns.EMPTY, + false, + rowHolder + ); ColumnValueSelector[] dimensionSelectors = incrementalIndex .getDimensions() .stream() diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index ae8e14a2aad..26835cc2cb2 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -32,13 +32,15 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.segment.Capabilities; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionIndexer; -import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.Metadata; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.filter.BooleanValueMatcher; @@ -52,7 +54,7 @@ import java.util.Iterator; */ public class IncrementalIndexStorageAdapter implements StorageAdapter { - private final IncrementalIndex index; + final IncrementalIndex index; public IncrementalIndexStorageAdapter(IncrementalIndex index) { @@ -97,7 +99,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter DimensionIndexer indexer = desc.getIndexer(); int cardinality = indexer.getCardinality(); - return cardinality != DimensionSelector.CARDINALITY_UNKNOWN ? cardinality : Integer.MAX_VALUE; + return cardinality != DimensionDictionarySelector.CARDINALITY_UNKNOWN ? cardinality : Integer.MAX_VALUE; } @Override @@ -154,7 +156,24 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter @Override public ColumnCapabilities getColumnCapabilities(String column) { - return index.getCapabilities(column); + // Different from index.getCapabilities because, in a way, IncrementalIndex's string-typed dimensions + // are always potentially multi-valued at query time. (Missing / null values for a row can potentially be + // represented by an empty array; see StringDimensionIndexer.IndexerDimensionSelector's getRow method.) + // + // We don't want to represent this as having-multiple-values in index.getCapabilities, because that's used + // at index-persisting time to determine if we need a multi-value column or not. However, that means we + // need to tweak the capabilities here in the StorageAdapter (a query-time construct), so at query time + // they appear multi-valued. + + final ColumnCapabilities capabilitiesFromIndex = index.getCapabilities(column); + final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(column); + if (dimensionDesc != null && dimensionDesc.getCapabilities().getType() == ValueType.STRING) { + final ColumnCapabilitiesImpl retVal = ColumnCapabilitiesImpl.copyOf(capabilitiesFromIndex); + retVal.setHasMultipleValues(true); + return retVal; + } else { + return capabilitiesFromIndex; + } } @Override @@ -237,7 +256,12 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter ) { currEntry = new IncrementalIndexRowHolder(); - columnSelectorFactory = new IncrementalIndexColumnSelectorFactory(index, virtualColumns, descending, currEntry); + columnSelectorFactory = new IncrementalIndexColumnSelectorFactory( + IncrementalIndexStorageAdapter.this, + virtualColumns, + descending, + currEntry + ); // Set maxRowIndex before creating the filterMatcher. See https://github.com/apache/incubator-druid/pull/6340 maxRowIndex = index.getLastRowIndex(); filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory); diff --git a/processing/src/main/java/org/apache/druid/segment/vector/BaseDoubleVectorValueSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/BaseDoubleVectorValueSelector.java new file mode 100644 index 00000000000..1e94e53c728 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/BaseDoubleVectorValueSelector.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +public abstract class BaseDoubleVectorValueSelector implements VectorValueSelector +{ + protected final ReadableVectorOffset offset; + + private int longId = ReadableVectorOffset.NULL_ID; + private int floatId = ReadableVectorOffset.NULL_ID; + + private long[] longVector; + private float[] floatVector; + + public BaseDoubleVectorValueSelector(final ReadableVectorOffset offset) + { + this.offset = offset; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public long[] getLongVector() + { + if (longId == offset.getId()) { + return longVector; + } + + if (longVector == null) { + longVector = new long[offset.getMaxVectorSize()]; + } + + final double[] doubleVector = getDoubleVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + longVector[i] = (long) doubleVector[i]; + } + + longId = offset.getId(); + return longVector; + } + + @Override + public float[] getFloatVector() + { + if (floatId == offset.getId()) { + return floatVector; + } + + if (floatVector == null) { + floatVector = new float[offset.getMaxVectorSize()]; + } + + final double[] doubleVector = getDoubleVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + floatVector[i] = (float) doubleVector[i]; + } + + floatId = offset.getId(); + return floatVector; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/BaseFloatVectorValueSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/BaseFloatVectorValueSelector.java new file mode 100644 index 00000000000..0ec248f39c1 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/BaseFloatVectorValueSelector.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +public abstract class BaseFloatVectorValueSelector implements VectorValueSelector +{ + protected final ReadableVectorOffset offset; + + private int longId = ReadableVectorOffset.NULL_ID; + private int doubleId = ReadableVectorOffset.NULL_ID; + + private long[] longVector; + private double[] doubleVector; + + public BaseFloatVectorValueSelector(final ReadableVectorOffset offset) + { + this.offset = offset; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public long[] getLongVector() + { + if (longId == offset.getId()) { + return longVector; + } + + if (longVector == null) { + longVector = new long[offset.getMaxVectorSize()]; + } + + final float[] floatVector = getFloatVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + longVector[i] = (long) floatVector[i]; + } + + longId = offset.getId(); + return longVector; + } + + @Override + public double[] getDoubleVector() + { + if (doubleId == offset.getId()) { + return doubleVector; + } + + if (doubleVector == null) { + doubleVector = new double[offset.getMaxVectorSize()]; + } + + final float[] floatVector = getFloatVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + doubleVector[i] = (double) floatVector[i]; + } + + doubleId = offset.getId(); + return doubleVector; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/BaseLongVectorValueSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/BaseLongVectorValueSelector.java new file mode 100644 index 00000000000..ddf48657744 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/BaseLongVectorValueSelector.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +public abstract class BaseLongVectorValueSelector implements VectorValueSelector +{ + protected final ReadableVectorOffset offset; + + private int floatId = ReadableVectorOffset.NULL_ID; + private int doubleId = ReadableVectorOffset.NULL_ID; + + private float[] floatVector; + private double[] doubleVector; + + public BaseLongVectorValueSelector(final ReadableVectorOffset offset) + { + this.offset = offset; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public float[] getFloatVector() + { + if (floatId == offset.getId()) { + return floatVector; + } + + if (floatVector == null) { + floatVector = new float[offset.getMaxVectorSize()]; + } + + final long[] longVector = getLongVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + floatVector[i] = (float) longVector[i]; + } + + floatId = offset.getId(); + return floatVector; + } + + @Override + public double[] getDoubleVector() + { + if (doubleId == offset.getId()) { + return doubleVector; + } + + if (doubleVector == null) { + doubleVector = new double[offset.getMaxVectorSize()]; + } + + final long[] longVector = getLongVector(); + for (int i = 0; i < getCurrentVectorSize(); i++) { + doubleVector[i] = (double) longVector[i]; + } + + doubleId = offset.getId(); + return doubleVector; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/BitmapVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/BitmapVectorOffset.java new file mode 100644 index 00000000000..7d26480938d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/BitmapVectorOffset.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import com.google.common.base.Preconditions; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.roaringbitmap.BatchIterator; + +public class BitmapVectorOffset implements VectorOffset +{ + private final ImmutableBitmap bitmap; + private final int[] offsets; + private final int startOffset; + private final int endOffset; + + private BatchIterator iterator; + private boolean pastEnd; + private int currentVectorSize; + + public BitmapVectorOffset( + final int vectorSize, + final ImmutableBitmap bitmap, + final int startOffset, + final int endOffset + ) + { + this.bitmap = bitmap; + this.offsets = new int[vectorSize]; + this.startOffset = startOffset; + this.endOffset = endOffset; + reset(); + } + + @Override + public int getId() + { + // Should not be called when the offset is empty. + Preconditions.checkState(currentVectorSize > 0, "currentVectorSize > 0"); + return offsets[0]; + } + + @Override + public void advance() + { + currentVectorSize = 0; + + if (pastEnd) { + return; + } + + while (currentVectorSize == 0 && iterator.hasNext()) { + final int numRead = iterator.nextBatch(offsets); + + int from = 0; + while (from < numRead && offsets[from] < startOffset) { + from++; + } + + if (from > 0) { + System.arraycopy(offsets, from, offsets, 0, numRead - from); + } + + int to = numRead - from; + while (to > 0 && offsets[to - 1] >= endOffset) { + pastEnd = true; + to--; + } + + currentVectorSize = to; + } + } + + @Override + public boolean isDone() + { + return currentVectorSize == 0; + } + + @Override + public boolean isContiguous() + { + return false; + } + + @Override + public int getMaxVectorSize() + { + return offsets.length; + } + + @Override + public int getCurrentVectorSize() + { + return currentVectorSize; + } + + @Override + public int getStartOffset() + { + throw new UnsupportedOperationException("not contiguous"); + } + + @Override + public int[] getOffsets() + { + return offsets; + } + + @Override + public void reset() + { + iterator = bitmap.batchIterator(); + currentVectorSize = 0; + pastEnd = false; + advance(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java new file mode 100644 index 00000000000..a6313339c66 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/FilteredVectorOffset.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; + +public class FilteredVectorOffset implements VectorOffset +{ + private final VectorOffset baseOffset; + private final VectorValueMatcher filterMatcher; + private final int[] offsets; + private int currentVectorSize = 0; + private boolean allTrue = false; + + private FilteredVectorOffset(final VectorOffset baseOffset, final VectorValueMatcher filterMatcher) + { + this.baseOffset = baseOffset; + this.filterMatcher = filterMatcher; + this.offsets = new int[baseOffset.getMaxVectorSize()]; + advanceWhileVectorIsEmptyAndPopulateOffsets(); + } + + public static FilteredVectorOffset create( + final VectorOffset baseOffset, + final VectorColumnSelectorFactory baseColumnSelectorFactory, + final Filter filter + ) + { + // This is not the same logic as the row-by-row FilteredOffset, which uses bitmaps whenever possible. + // I am not convinced that approach is best in all cases (it's potentially too eager) and also have not implemented + // it for vector matchers yet. So let's keep this method simple for now, and try to harmonize them in the future. + Preconditions.checkState(filter.canVectorizeMatcher(), "Cannot vectorize"); + final VectorValueMatcher filterMatcher = filter.makeVectorMatcher(baseColumnSelectorFactory); + return new FilteredVectorOffset(baseOffset, filterMatcher); + } + + @Override + public int getId() + { + // Should not be called when the offset is empty. + Preconditions.checkState(currentVectorSize > 0, "currentVectorSize > 0"); + return baseOffset.getId(); + } + + @Override + public void advance() + { + baseOffset.advance(); + advanceWhileVectorIsEmptyAndPopulateOffsets(); + } + + @Override + public boolean isDone() + { + return currentVectorSize == 0; + } + + @Override + public boolean isContiguous() + { + return allTrue && baseOffset.isContiguous(); + } + + @Override + public int getMaxVectorSize() + { + return baseOffset.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return currentVectorSize; + } + + @Override + public int getStartOffset() + { + if (isContiguous()) { + return baseOffset.getStartOffset(); + } else { + throw new ISE("Cannot call getStartOffset when not contiguous!"); + } + } + + @Override + public int[] getOffsets() + { + if (isContiguous()) { + throw new ISE("Cannot call getOffsets when not contiguous!"); + } else if (allTrue) { + return baseOffset.getOffsets(); + } else { + return offsets; + } + } + + private void advanceWhileVectorIsEmptyAndPopulateOffsets() + { + allTrue = false; + + int j = 0; + + while (j == 0) { + if (baseOffset.isDone()) { + currentVectorSize = 0; + return; + } + + final ReadableVectorMatch match = filterMatcher.match(VectorMatch.allTrue(baseOffset.getCurrentVectorSize())); + + if (match.isAllTrue(baseOffset.getCurrentVectorSize())) { + currentVectorSize = baseOffset.getCurrentVectorSize(); + allTrue = true; + return; + } else if (match.isAllFalse()) { + baseOffset.advance(); + } else { + final int[] selection = match.getSelection(); + final int selectionSize = match.getSelectionSize(); + + if (baseOffset.isContiguous()) { + final int startOffset = baseOffset.getStartOffset(); + + for (int i = 0; i < selectionSize; i++) { + offsets[j++] = startOffset + selection[i]; + } + } else { + final int[] baseOffsets = baseOffset.getOffsets(); + + for (int i = 0; i < selectionSize; i++) { + offsets[j++] = baseOffsets[selection[i]]; + } + } + + if (j == 0) { + baseOffset.advance(); + } + } + } + + currentVectorSize = j; + } + + @Override + public void reset() + { + currentVectorSize = 0; + allTrue = false; + baseOffset.reset(); + advanceWhileVectorIsEmptyAndPopulateOffsets(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/MultiValueDimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/MultiValueDimensionVectorSelector.java new file mode 100644 index 00000000000..baacb3be28b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/MultiValueDimensionVectorSelector.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.segment.DimensionDictionarySelector; +import org.apache.druid.segment.data.IndexedInts; + +/** + * Vectorized selector for a multi-valued string-typed column. + * + * @see org.apache.druid.segment.DimensionSelector, the non-vectorized version. + * @see SingleValueDimensionVectorSelector, the singly-valued version. + */ +public interface MultiValueDimensionVectorSelector extends DimensionDictionarySelector, VectorSizeInspector +{ + /** + * Get the current vector. The array will be reused, so it is not a good idea to retain a reference to it. + */ + IndexedInts[] getRowVector(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java new file mode 100644 index 00000000000..3aeb32c1f26 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/NilVectorSelector.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.QueryableIndexStorageAdapter; + +import javax.annotation.Nullable; + +public class NilVectorSelector + implements VectorValueSelector, VectorObjectSelector, SingleValueDimensionVectorSelector, IdLookup +{ + private static final boolean[] DEFAULT_NULLS_VECTOR = new boolean[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final int[] DEFAULT_INT_VECTOR = new int[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final long[] DEFAULT_LONG_VECTOR = new long[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final float[] DEFAULT_FLOAT_VECTOR = new float[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final double[] DEFAULT_DOUBLE_VECTOR = new double[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + private static final Object[] DEFAULT_OBJECT_VECTOR = new Object[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; + + static { + for (int i = 0; i < DEFAULT_NULLS_VECTOR.length; i++) { + DEFAULT_NULLS_VECTOR[i] = true; + } + } + + private final VectorSizeInspector vectorSizeInspector; + private final boolean[] nulls; + private final int[] ints; + private final long[] longs; + private final float[] floats; + private final double[] doubles; + private final Object[] objects; + + private NilVectorSelector( + final VectorSizeInspector vectorSizeInspector, + final boolean[] nulls, + final int[] ints, + final long[] longs, + final float[] floats, + final double[] doubles, + final Object[] objects + ) + { + this.vectorSizeInspector = vectorSizeInspector; + this.nulls = nulls; + this.ints = ints; + this.longs = longs; + this.floats = floats; + this.doubles = doubles; + this.objects = objects; + } + + public static NilVectorSelector create(final VectorSizeInspector vectorSizeInspector) + { + if (vectorSizeInspector.getMaxVectorSize() <= QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE) { + // Reuse static vars when possible. + return new NilVectorSelector( + vectorSizeInspector, + DEFAULT_NULLS_VECTOR, + DEFAULT_INT_VECTOR, + DEFAULT_LONG_VECTOR, + DEFAULT_FLOAT_VECTOR, + DEFAULT_DOUBLE_VECTOR, + DEFAULT_OBJECT_VECTOR + ); + } else { + return new NilVectorSelector( + vectorSizeInspector, + new boolean[vectorSizeInspector.getMaxVectorSize()], + new int[vectorSizeInspector.getMaxVectorSize()], + new long[vectorSizeInspector.getMaxVectorSize()], + new float[vectorSizeInspector.getMaxVectorSize()], + new double[vectorSizeInspector.getMaxVectorSize()], + new Object[vectorSizeInspector.getMaxVectorSize()] + ); + } + } + + @Override + public long[] getLongVector() + { + return longs; + } + + @Override + public float[] getFloatVector() + { + return floats; + } + + @Override + public double[] getDoubleVector() + { + return doubles; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + return nulls; + } + + @Override + public int[] getRowVector() + { + return ints; + } + + @Override + public int getValueCardinality() + { + return 1; + } + + @Nullable + @Override + public String lookupName(final int id) + { + assert id == 0 : "id = " + id; + return null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return false; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return this; + } + + @Override + public int lookupId(@Nullable final String name) + { + return NullHandling.isNullOrEquivalent(name) ? 0 : -1; + } + + @Override + public Object[] getObjectVector() + { + return objects; + } + + @Override + public int getCurrentVectorSize() + { + return vectorSizeInspector.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return vectorSizeInspector.getMaxVectorSize(); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/NoFilterVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/NoFilterVectorOffset.java new file mode 100644 index 00000000000..a24fabcff0e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/NoFilterVectorOffset.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +public class NoFilterVectorOffset implements VectorOffset +{ + private final int maxVectorSize; + private final int start; + private final int end; + private int theOffset; + + public NoFilterVectorOffset(final int maxVectorSize, final int start, final int end) + { + this.maxVectorSize = maxVectorSize; + this.start = start; + this.end = end; + reset(); + } + + @Override + public int getId() + { + return theOffset; + } + + @Override + public void advance() + { + theOffset += maxVectorSize; + } + + @Override + public boolean isDone() + { + return theOffset >= end; + } + + @Override + public boolean isContiguous() + { + return true; + } + + @Override + public int getMaxVectorSize() + { + return maxVectorSize; + } + + @Override + public int getCurrentVectorSize() + { + return Math.min(maxVectorSize, end - theOffset); + } + + @Override + public int getStartOffset() + { + return theOffset; + } + + @Override + public int[] getOffsets() + { + throw new UnsupportedOperationException("no filter"); + } + + @Override + public void reset() + { + theOffset = start; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java new file mode 100644 index 00000000000..83110499862 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.column.BaseColumn; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.DictionaryEncodedColumn; +import org.apache.druid.segment.column.ValueType; + +import javax.annotation.Nullable; +import java.util.HashMap; +import java.util.Map; + +public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSelectorFactory +{ + private final QueryableIndex index; + private final ReadableVectorOffset offset; + private final Closer closer; + private final Map columnCache; + + // Shared selectors are useful, since they cache vectors internally, and we can avoid recomputation if the same + // selector is used by more than one part of a query. + private final Map singleValueDimensionSelectorCache; + private final Map multiValueDimensionSelectorCache; + private final Map valueSelectorCache; + private final Map objectSelectorCache; + + public QueryableIndexVectorColumnSelectorFactory( + final QueryableIndex index, + final ReadableVectorOffset offset, + final Closer closer, + final Map columnCache + ) + { + this.index = index; + this.offset = offset; + this.closer = closer; + this.columnCache = columnCache; + this.singleValueDimensionSelectorCache = new HashMap<>(); + this.multiValueDimensionSelectorCache = new HashMap<>(); + this.valueSelectorCache = new HashMap<>(); + this.objectSelectorCache = new HashMap<>(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final DimensionSpec dimensionSpec) + { + if (!dimensionSpec.canVectorize()) { + throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec); + } + + return multiValueDimensionSelectorCache.computeIfAbsent( + dimensionSpec, + spec -> { + final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); + if (holder == null + || !holder.getCapabilities().isDictionaryEncoded() + || holder.getCapabilities().getType() != ValueType.STRING + || !holder.getCapabilities().hasMultipleValues()) { + throw new ISE( + "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", + spec.getDimension() + ); + } + + @SuppressWarnings("unchecked") + final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) + getCachedColumn(spec.getDimension()); + + final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector( + offset + ); + + return spec.decorate(selector); + } + ); + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final DimensionSpec dimensionSpec) + { + if (!dimensionSpec.canVectorize()) { + throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec); + } + + return singleValueDimensionSelectorCache.computeIfAbsent( + dimensionSpec, + spec -> { + final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); + if (holder == null + || !holder.getCapabilities().isDictionaryEncoded() + || holder.getCapabilities().getType() != ValueType.STRING) { + // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. + return NilVectorSelector.create(offset); + } + + if (holder.getCapabilities().hasMultipleValues()) { + // Asking for a single-value dimension selector on a multi-value column gets you an error. + throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension()); + } + + @SuppressWarnings("unchecked") + final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) + getCachedColumn(spec.getDimension()); + + final SingleValueDimensionVectorSelector selector = + dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset); + + return spec.decorate(selector); + } + ); + } + + @Override + public VectorValueSelector makeValueSelector(final String columnName) + { + return valueSelectorCache.computeIfAbsent( + columnName, + name -> { + final BaseColumn column = getCachedColumn(name); + if (column == null) { + return NilVectorSelector.create(offset); + } else { + return column.makeVectorValueSelector(offset); + } + } + ); + } + + @Override + public VectorObjectSelector makeObjectSelector(final String columnName) + { + return objectSelectorCache.computeIfAbsent( + columnName, + name -> { + final BaseColumn column = getCachedColumn(name); + if (column == null) { + return NilVectorSelector.create(offset); + } else { + return column.makeVectorObjectSelector(offset); + } + } + ); + } + + @Nullable + private BaseColumn getCachedColumn(final String columnName) + { + return columnCache.computeIfAbsent(columnName, name -> { + ColumnHolder holder = index.getColumnHolder(name); + if (holder != null) { + return closer.register(holder.getColumn()); + } else { + return null; + } + }); + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(final String columnName) + { + return QueryableIndexStorageAdapter.getColumnCapabilities(index, columnName); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/ReadableVectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/ReadableVectorOffset.java new file mode 100644 index 00000000000..4fb494f9bc6 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/ReadableVectorOffset.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +/** + * Provides a batch of offsets, ostensibly as indexes into an array. + * + * A ReadableVectorOffset should be given to classes (e.g. column selector objects) by something which keeps a + * reference to the base VectorOffset object and increments it. + * + * @see VectorOffset, the movable version. + * @see org.apache.druid.segment.data.ReadableOffset, the non-vectorized version. + */ +public interface ReadableVectorOffset extends VectorSizeInspector +{ + /** + * A marker value that will never be returned by "getId". + */ + int NULL_ID = -1; + + /** + * Returns an integer that uniquely identifies the current position of the offset. Should *not* be construed as an + * actual offset; for that, use "getStartOffset" or "getOffsets". This is useful for caching: it is safe to assume + * nothing has changed in the offset so long as the id remains the same. + */ + int getId(); + + /** + * Checks if the current batch is a contiguous range or not. This is only good for one batch at a time, since the + * same object may return some contiguous batches and some non-contiguous batches. So, callers must check this method + * each time they want to retrieve the current batch of offsets. + */ + boolean isContiguous(); + + /** + * If "isContiguous" is true, this method returns the start offset of the range. The length of the range is + * given by "getCurrentVectorSize". + * + * Throws an exception if "isContiguous" is false. + */ + int getStartOffset(); + + /** + * If "isContiguous" is false, this method returns a batch of offsets. The array may be longer than the number of + * valid offsets, so callers need to check "getCurrentVectorSize" too. + * + * Throws an exception if "isContiguous" is true. + */ + int[] getOffsets(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/SingleValueDimensionVectorSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/SingleValueDimensionVectorSelector.java new file mode 100644 index 00000000000..f9fd484ad38 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/SingleValueDimensionVectorSelector.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.segment.DimensionDictionarySelector; + +/** + * Vectorized selector for a singly-valued string-typed column. Unlike the non-vectorized version, this is done as + * a separate interface, which is useful since it allows "getRowVector" to be a primitive int array. + * + * @see org.apache.druid.segment.DimensionSelector, the non-vectorized version. + * @see MultiValueDimensionVectorSelector, the multi-valued version. + */ +public interface SingleValueDimensionVectorSelector extends DimensionDictionarySelector, VectorSizeInspector +{ + /** + * Get the current vector. The array will be reused, so it is not a good idea to retain a reference to it. + */ + int[] getRowVector(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java new file mode 100644 index 00000000000..1634cc6ab3a --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorColumnSelectorFactory.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.column.ColumnCapabilities; + +import javax.annotation.Nullable; + +/** + * + * + * @see org.apache.druid.segment.ColumnSelectorFactory, the non-vectorized version. + */ +public interface VectorColumnSelectorFactory +{ + /** + * Returns the maximum vector size for the {@link VectorCursor} that generated this object. + * + * @see VectorCursor#getMaxVectorSize() + */ + int getMaxVectorSize(); + + /** + * Returns a string-typed, single-value-per-row column selector. + */ + SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec); + + /** + * Returns a string-typed, multi-value-per-row column selector. + */ + MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec); + + /** + * Returns a primitive column selector. + */ + VectorValueSelector makeValueSelector(String column); + + /** + * Returns an object selector, useful for complex columns. + */ + VectorObjectSelector makeObjectSelector(String column); + + /** + * Returns capabilities of a particular column, or null if the column doesn't exist. Unlike ColumnSelectorFactory, + * null does not potentially indicate a dynamically discovered column. + * + * @return capabilities, or null if the column doesn't exist. + */ + @Nullable + ColumnCapabilities getColumnCapabilities(String column); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java new file mode 100644 index 00000000000..fde58855a53 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorCursor.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import java.io.Closeable; + +/** + * Vectorized cursor used during query execution. VectorCursors are returned by + * {@link org.apache.druid.segment.StorageAdapter#makeVectorCursor} and are created by + * {@link org.apache.druid.segment.QueryableIndexCursorSequenceBuilder#buildVectorized}. + * + * Unlike the non-vectorized version, VectorCursor does not have a getTime() method. This is because we are trying to + * avoid creating needlessly-small vectors when the time granularity is very fine. See + * {@link org.apache.druid.query.vector.VectorCursorGranularizer} for a helper that makes it easier for query engines to + * do their own time granularization. + * + * An example of how to use the methods in this class: + * + *

+ *   try (VectorCursor cursor = adapter.makeVectorCursor(...)) {
+ *     // ProcessorClass is some vectorized processor class.
+ *     ProcessorClass o = makeProcessor(cursor.getColumnSelectorFactory());
+ *     for (; !cursor.isDone(); cursor.advance()) {
+ *       o.process();
+ *     }
+ *   }
+ * 
+ * + * @see org.apache.druid.segment.Cursor, the non-vectorized version. + */ +public interface VectorCursor extends VectorSizeInspector, Closeable +{ + /** + * Returns a vectorized column selector factory. + */ + VectorColumnSelectorFactory getColumnSelectorFactory(); + + /** + * Advances the cursor, skipping forward a number of rows equal to the current vector size. + */ + void advance(); + + /** + * Returns false if the cursor is readable, true if it has nothing left to read. + */ + boolean isDone(); + + /** + * Resets the cursor back to its original state. Useful for query engines that want to make multiple passes. + */ + @SuppressWarnings("unused") /* Not currently used, but anticipated to be used by topN in the future. */ + void reset(); + + /** + * Close the cursor and release its resources. + */ + @Override + void close(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorObjectSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorObjectSelector.java new file mode 100644 index 00000000000..f64e861396d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorObjectSelector.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +/** + * Vectorized object selector, useful for complex columns. + * + * @see org.apache.druid.segment.ColumnValueSelector, the non-vectorized version. + */ +public interface VectorObjectSelector extends VectorSizeInspector +{ + /** + * Get the current vector. Individual elements of the array may be null. + */ + Object[] getObjectVector(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorOffset.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorOffset.java new file mode 100644 index 00000000000..e2aefc8d97b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorOffset.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +/** + * The movable version of ReadableVectorOffset. + * + * @see org.apache.druid.segment.data.Offset, the non-vectorized version. + */ +public interface VectorOffset extends ReadableVectorOffset +{ + /** + * Advances by one batch. + */ + void advance(); + + /** + * Checks if iteration is "done", meaning the current batch of offsets is empty, and there are no more coming. + */ + boolean isDone(); + + /** + * Resets the object back to its original state. Once this is done, iteration can begin anew. + */ + void reset(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorSelectorUtils.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorSelectorUtils.java new file mode 100644 index 00000000000..112d1b119d5 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorSelectorUtils.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.collections.bitmap.ImmutableBitmap; + +import javax.annotation.Nullable; + +public class VectorSelectorUtils +{ + /** + * Helper used by ColumnarLongs, ColumnarDoubles, etc. to populate null-flag vectors. + */ + @Nullable + public static boolean[] populateNullVector( + @Nullable final boolean[] nullVector, + final ReadableVectorOffset offset, + final ImmutableBitmap nullValueBitmap + ) + { + if (nullValueBitmap.isEmpty()) { + return null; + } + + final boolean[] retVal; + + if (nullVector != null) { + retVal = nullVector; + } else { + retVal = new boolean[offset.getMaxVectorSize()]; + } + + // Probably not super efficient to call "get" so much, but, no worse than the non-vectorized version. + if (offset.isContiguous()) { + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + retVal[i] = nullValueBitmap.get(i + offset.getStartOffset()); + } + } else { + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + retVal[i] = nullValueBitmap.get(offset.getOffsets()[i]); + } + } + + return retVal; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorSizeInspector.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorSizeInspector.java new file mode 100644 index 00000000000..a5d6c8a1508 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorSizeInspector.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +/** + * Common interface for vectorized column selectors, matchers, etc, where callers are given the ability to inspect + * current and maximum vector sizes. + */ +public interface VectorSizeInspector +{ + /** + * Returns the maximum vector size for this cursor. It will not change for the lifetime of this cursor, and is + * generally used to allocate scratch arrays for later processing. Will always be greater than zero. + */ + int getMaxVectorSize(); + + /** + * Returns the current vector size for this cursor. Will never be larger than the max size returned by + * {@link #getMaxVectorSize()}. + */ + int getCurrentVectorSize(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/vector/VectorValueSelector.java b/processing/src/main/java/org/apache/druid/segment/vector/VectorValueSelector.java new file mode 100644 index 00000000000..216967cc930 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/VectorValueSelector.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import javax.annotation.Nullable; + +/** + * Vectorized selector for primitive columns. + * + * @see org.apache.druid.segment.ColumnValueSelector, the non-vectorized version. + */ +public interface VectorValueSelector extends VectorSizeInspector +{ + /** + * Get the current vector, casting to longs as necessary. The array will be reused, so it is not a good idea to + * retain a reference to it. + */ + long[] getLongVector(); + + /** + * Get the current vector, casting to floats as necessary. The array will be reused, so it is not a good idea to + * retain a reference to it. + */ + float[] getFloatVector(); + + /** + * Get the current vector, casting to doubles as necessary. The array will be reused, so it is not a good idea to + * retain a reference to it. + */ + double[] getDoubleVector(); + + /** + * Gets a vector of booleans signifying which rows are null and which are not (true for null). Returns null if it is + * known that there are no nulls in the vector, possibly because the column is non-nullable. + */ + @Nullable + boolean[] getNullVector(); +} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java index d9581e798a8..0b8c4435362 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java @@ -27,6 +27,7 @@ import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.data.IndexedInts; @@ -64,7 +65,7 @@ public class SingleStringInputCachingExpressionColumnValueSelector implements Co final Supplier inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector, false); this.bindings = name -> inputSupplier.get(); - if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN) { + if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN) { throw new ISE("Selector must have a dictionary"); } else if (selector.getValueCardinality() <= CACHE_SIZE) { arrayEvalCache = new ExprEval[selector.getValueCardinality()]; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java index d6462b14852..074ad739217 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.math.expr.Expr; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelectorUtils; import org.apache.druid.segment.IdLookup; @@ -53,7 +54,7 @@ public class SingleStringInputDimensionSelector implements DimensionSelector } // Verify selector has a working dictionary. - if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN + if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN || !selector.nameLookupPossibleInAdvance()) { throw new ISE("Selector of class[%s] does not have a dictionary, cannot use it.", selector.getClass().getName()); } diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index 094e8fc2c32..944139e9cd5 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -322,6 +322,15 @@ public class QueryRunnerTestHelper }; } + /** + * Check if a QueryRunner returned by {@link #makeQueryRunners(QueryRunnerFactory)} is vectorizable. + */ + public static boolean isTestRunnerVectorizable(QueryRunner runner) + { + final String runnerName = runner.toString(); + return !("rtIndex".equals(runnerName) || "noRollupRtIndex".equals(runnerName)); + } + public static > List> makeQueryRunners( QueryRunnerFactory factory ) diff --git a/processing/src/test/java/org/apache/druid/query/filter/vector/VectorMatchTest.java b/processing/src/test/java/org/apache/druid/query/filter/vector/VectorMatchTest.java new file mode 100644 index 00000000000..5a675455282 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/filter/vector/VectorMatchTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter.vector; + +import org.junit.Assert; +import org.junit.Test; + +public class VectorMatchTest +{ + private static final int VECTOR_SIZE = 10; + + @Test + public void testRemoveAll() + { + assertMatchEquals( + VectorMatch.allFalse(), + copy(VectorMatch.allTrue(VECTOR_SIZE)).removeAll(VectorMatch.allTrue(VECTOR_SIZE)) + ); + + assertMatchEquals( + VectorMatch.allTrue(VECTOR_SIZE), + copy(VectorMatch.allTrue(VECTOR_SIZE)).removeAll(VectorMatch.allFalse()) + ); + + assertMatchEquals( + createMatch(new int[]{3, 6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).removeAll(createMatch(new int[]{4, 5, 9})) + ); + + assertMatchEquals( + createMatch(new int[]{3, 6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).removeAll(createMatch(new int[]{2, 5, 9})) + ); + + assertMatchEquals( + createMatch(new int[]{6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).removeAll(createMatch(new int[]{3, 5, 9})) + ); + + assertMatchEquals( + createMatch(new int[]{6, 7, 8}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).removeAll(createMatch(new int[]{3, 5, 10})) + ); + } + + @Test + public void testAddAll() + { + final VectorMatch scratch = VectorMatch.wrap(new int[VECTOR_SIZE]); + + assertMatchEquals( + VectorMatch.allTrue(VECTOR_SIZE), + copy(VectorMatch.allTrue(VECTOR_SIZE)).addAll(VectorMatch.allTrue(VECTOR_SIZE), scratch) + ); + + assertMatchEquals( + VectorMatch.allTrue(VECTOR_SIZE), + createMatch(new int[]{}).addAll(VectorMatch.allTrue(VECTOR_SIZE), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{3, 4, 5, 6, 7, 8, 9, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).addAll(createMatch(new int[]{4, 5, 9}), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{3, 4, 5, 6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8}).addAll(createMatch(new int[]{4, 5, 10}), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{2, 3, 5, 6, 7, 8, 9, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).addAll(createMatch(new int[]{2, 5, 9}), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{3, 5, 6, 7, 8, 9, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).addAll(createMatch(new int[]{3, 5, 9}), scratch) + ); + + assertMatchEquals( + createMatch(new int[]{3, 5, 6, 7, 8, 10}), + createMatch(new int[]{3, 5, 6, 7, 8, 10}).addAll(createMatch(new int[]{3, 5, 10}), scratch) + ); + } + + /** + * Useful because VectorMatch equality is based on identity, not value. (Since they are mutable.) + */ + private static void assertMatchEquals(ReadableVectorMatch expected, ReadableVectorMatch actual) + { + Assert.assertEquals(expected.toString(), actual.toString()); + } + + private static VectorMatch copy(final ReadableVectorMatch match) + { + final int[] selection = match.getSelection(); + final int[] newSelection = new int[selection.length]; + System.arraycopy(selection, 0, newSelection, 0, selection.length); + return VectorMatch.wrap(newSelection).setSelectionSize(match.getSelectionSize()); + } + + private static VectorMatch createMatch(final int[] selection) + { + final VectorMatch match = VectorMatch.wrap(new int[VECTOR_SIZE]); + System.arraycopy(selection, 0, match.getSelection(), 0, selection.length); + match.setSelectionSize(selection.length); + return match; + } +} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 27d0da9b952..3eb69c0dda4 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -25,6 +25,7 @@ import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Ordering; @@ -186,8 +187,10 @@ public class GroupByQueryRunnerTest private static final Closer resourceCloser = Closer.create(); private final QueryRunner runner; + private final String runnerName; private final GroupByQueryRunnerFactory factory; private final GroupByQueryConfig config; + private final boolean vectorize; @Rule public ExpectedException expectedException = ExpectedException.none(); @@ -236,6 +239,13 @@ public class GroupByQueryRunnerTest return GroupByStrategySelector.STRATEGY_V2; } + @Override + public int getBufferGrouperInitialBuckets() + { + // Small initial table to force some growing. + return 4; + } + @Override public String toString() { @@ -400,7 +410,7 @@ public class GroupByQueryRunnerTest } @Parameterized.Parameters(name = "{0}") - public static Collection constructorFeeder() + public static Collection constructorFeeder() { final List constructors = new ArrayList<>(); for (GroupByQueryConfig config : testConfigs()) { @@ -408,12 +418,14 @@ public class GroupByQueryRunnerTest final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs; resourceCloser.register(factoryAndCloser.rhs); for (QueryRunner runner : QueryRunnerTestHelper.makeQueryRunners(factory)) { - final String testName = StringUtils.format( - "config=%s, runner=%s", - config.toString(), - runner.toString() - ); - constructors.add(new Object[]{testName, config, factory, runner}); + for (boolean vectorize : ImmutableList.of(false, true)) { + final String testName = StringUtils.format("config=%s, runner=%s, vectorize=%s", config, runner, vectorize); + + // Add vectorization tests for any indexes that support it. + if (!vectorize || QueryRunnerTestHelper.isTestRunnerVectorizable(runner)) { + constructors.add(new Object[]{testName, config, factory, runner, vectorize}); + } + } } } @@ -430,19 +442,21 @@ public class GroupByQueryRunnerTest String testName, GroupByQueryConfig config, GroupByQueryRunnerFactory factory, - QueryRunner runner + QueryRunner runner, + boolean vectorize ) { this.config = config; this.factory = factory; this.runner = factory.mergeRunners(Execs.directExecutor(), ImmutableList.of(runner)); + this.runnerName = runner.toString(); + this.vectorize = vectorize; } @Test public void testGroupBy() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -700,8 +714,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByOnMissingColumn() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -727,8 +743,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithStringPostAggregator() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -958,8 +973,17 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithStringVirtualColumn() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + // Cannot run with groupBy v1 on IncrementalIndex, because expressions would turn multi-value inputs + // into cardinalityless selectors, and groupBy v1 requires selectors that have a cardinality. + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1) + && ImmutableSet.of("rtIndex", "noRollupRtIndex").contains(runnerName)) { + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setVirtualColumns( @@ -1020,8 +1044,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithDurationGranularity() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -1061,8 +1084,7 @@ public class GroupByQueryRunnerTest expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("[alias] already defined"); - GroupByQuery - .builder() + makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -1077,14 +1099,13 @@ public class GroupByQueryRunnerTest if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { return; } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("sortByDimsFirst", true)) + .overrideContext(ImmutableMap.of("sortByDimsFirst", true)) .build(); List expectedResults = Arrays.asList( @@ -1123,14 +1144,13 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithChunkPeriod() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.allGran) - .setContext(ImmutableMap.of("chunkPeriod", "P1D")) + .overrideContext(ImmutableMap.of("chunkPeriod", "P1D")) .build(); List expectedResults = Arrays.asList( @@ -1152,8 +1172,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByNoAggregators() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -1189,8 +1208,10 @@ public class GroupByQueryRunnerTest @Test public void testMultiValueDimension() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to multi-value dimensions. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("placementish", "alias")) @@ -1217,8 +1238,10 @@ public class GroupByQueryRunnerTest @Test public void testTwoMultiValueDimensions() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to multi-value dimensions. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimFilter(new SelectorDimFilter("placementish", "a", null)) @@ -1283,8 +1306,10 @@ public class GroupByQueryRunnerTest @Test public void testMultipleDimensionsOneOfWhichIsMultiValue1() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to multi-value dimensions. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -1502,8 +1527,10 @@ public class GroupByQueryRunnerTest @Test public void testMultipleDimensionsOneOfWhichIsMultiValueDifferentOrder() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to multi-value dimensions. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -1721,14 +1748,13 @@ public class GroupByQueryRunnerTest @Test public void testGroupByMaxRowsLimitContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("maxResults", 1)) + .overrideContext(ImmutableMap.of("maxResults", 1)) .build(); List expectedResults = null; @@ -1781,14 +1807,13 @@ public class GroupByQueryRunnerTest @Test public void testGroupByTimeoutContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, 60000)) + .overrideContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, 60000)) .build(); List expectedResults = Arrays.asList( @@ -1820,14 +1845,13 @@ public class GroupByQueryRunnerTest @Test public void testGroupByMaxOnDiskStorageContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("maxOnDiskStorage", 0, "bufferGrouperMaxSize", 1)) + .overrideContext(ImmutableMap.of("maxOnDiskStorage", 0, "bufferGrouperMaxSize", 1)) .build(); List expectedResults = null; @@ -1881,14 +1905,13 @@ public class GroupByQueryRunnerTest @Test public void testNotEnoughDictionarySpaceThroughContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("maxOnDiskStorage", 0, "maxMergingDictionarySize", 1)) + .overrideContext(ImmutableMap.of("maxOnDiskStorage", 0, "maxMergingDictionarySize", 1)) .build(); List expectedResults = null; @@ -1942,14 +1965,13 @@ public class GroupByQueryRunnerTest @Test public void testNotEnoughDiskSpaceThroughContextOverride() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("maxOnDiskStorage", 1, "maxMergingDictionarySize", 1)) + .overrideContext(ImmutableMap.of("maxOnDiskStorage", 1, "maxMergingDictionarySize", 1)) .build(); List expectedResults = null; @@ -2008,8 +2030,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithOuterMaxOnDiskStorageContextOverride() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2020,7 +2041,7 @@ public class GroupByQueryRunnerTest null ) ) - .setContext( + .overrideContext( ImmutableMap.of( "maxOnDiskStorage", Integer.MAX_VALUE, "bufferGrouperMaxSize", Integer.MAX_VALUE @@ -2028,13 +2049,12 @@ public class GroupByQueryRunnerTest ) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()).setAggregatorSpecs(new CountAggregatorFactory("count")) .setGranularity(QueryRunnerTestHelper.allGran) - .setContext(ImmutableMap.of("maxOnDiskStorage", 0, "bufferGrouperMaxSize", 0)) + .overrideContext(ImmutableMap.of("maxOnDiskStorage", 0, "bufferGrouperMaxSize", 0)) .build(); // v1 strategy throws an exception for this query because it tries to merge the noop outer @@ -2056,6 +2076,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithRebucketRename() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + Map map = new HashMap<>(); map.put("automotive", "automotive0"); map.put("business", "business0"); @@ -2066,8 +2089,7 @@ public class GroupByQueryRunnerTest map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -2123,6 +2145,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithSimpleRenameRetainMissingNonInjective() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + Map map = new HashMap<>(); map.put("automotive", "automotive0"); map.put("business", "business0"); @@ -2133,8 +2158,7 @@ public class GroupByQueryRunnerTest map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -2200,8 +2224,7 @@ public class GroupByQueryRunnerTest map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -2267,8 +2290,7 @@ public class GroupByQueryRunnerTest map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -2333,8 +2355,7 @@ public class GroupByQueryRunnerTest map.put("premium", "premium0"); map.put("technology", "technology0"); map.put("travel", "travel0"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -2389,8 +2410,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithUniques() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.qualityUniques) @@ -2414,8 +2434,7 @@ public class GroupByQueryRunnerTest @Test(expected = IllegalArgumentException.class) public void testGroupByWithUniquesAndPostAggWithSameName() { - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new HyperUniquesAggregatorFactory( @@ -2447,8 +2466,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithCardinality() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.qualityCardinality) @@ -2472,8 +2493,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithFirstLast() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to "first", "last" aggregators. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("market", "market")) @@ -2554,8 +2577,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithNoResult() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.emptyInterval) .setDimensions(new DefaultDimensionSpec("market", "market")) @@ -2577,6 +2602,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithNullProducingDimExtractionFn() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + final ExtractionFn nullExtractionFn = new RegexDimExtractionFn("(\\w{1})", false, null) { @Override @@ -2591,8 +2619,7 @@ public class GroupByQueryRunnerTest return "mezzanine".equals(dimValue) ? null : super.apply(dimValue); } }; - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) @@ -2651,8 +2678,7 @@ public class GroupByQueryRunnerTest } }; - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) @@ -2692,22 +2718,22 @@ public class GroupByQueryRunnerTest { DateTimeZone tz = DateTimes.inferTzFromString("America/Los_Angeles"); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setInterval("2011-03-31T00:00:00-07:00/2011-04-02T00:00:00-07:00") - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory( - "idx", - "index" - )) - .setGranularity( - new PeriodGranularity( - new Period("P1D"), - null, - tz - ) - ) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setInterval("2011-03-31T00:00:00-07:00/2011-04-02T00:00:00-07:00") + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory( + "idx", + "index" + )) + .setGranularity( + new PeriodGranularity( + new Period("P1D"), + null, + tz + ) + ) + .build(); List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow( @@ -2882,8 +2908,7 @@ public class GroupByQueryRunnerTest @Test public void testMergeResults() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2960,8 +2985,7 @@ public class GroupByQueryRunnerTest private void doTestMergeResultsWithValidLimit(final int limit) { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -2997,8 +3021,7 @@ public class GroupByQueryRunnerTest public void testMergeResultsAcrossMultipleDaysWithLimitAndOrderBy() { final int limit = 14; - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3040,9 +3063,11 @@ public class GroupByQueryRunnerTest @Test public void testMergeResultsAcrossMultipleDaysWithLimitAndOrderByUsingMathExpressions() { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + final int limit = 14; - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval(QueryRunnerTestHelper.firstToThird) .setVirtualColumns( @@ -3092,8 +3117,7 @@ public class GroupByQueryRunnerTest @Test(expected = IllegalArgumentException.class) public void testMergeResultsWithNegativeLimit() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3166,8 +3190,7 @@ public class GroupByQueryRunnerTest private void doTestMergeResultsWithOrderBy(LimitSpec orderBySpec, List expectedResults) { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3207,8 +3230,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByOrderLimit() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + // Cannot vectorize due to expression-based aggregator. + cannotVectorize(); + + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3300,8 +3325,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithOrderLimit2() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3337,8 +3361,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithOrderLimit3() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3375,8 +3398,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByOrderLimitNumeric() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3420,7 +3442,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithSameCaseOrdering() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -3466,7 +3488,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithOrderLimit4() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec( @@ -3503,7 +3525,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithOrderOnHyperUnique() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec( @@ -3566,7 +3588,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithHavingOnHyperUnique() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec( @@ -3613,7 +3635,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithHavingOnFinalizedHyperUnique() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec( @@ -3663,7 +3685,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithLimitOnFinalizedHyperUnique() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -3728,6 +3750,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithAlphaNumericDimensionOrder() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + Map map = new HashMap<>(); map.put("automotive", "health105"); map.put("business", "health20"); @@ -3739,8 +3764,7 @@ public class GroupByQueryRunnerTest map.put("technology", "travel123"); map.put("travel", "travel555"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -3784,6 +3808,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithLookupAndLimitAndSortByDimsFirst() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + Map map = new HashMap<>(); map.put("automotive", "9"); map.put("business", "8"); @@ -3795,8 +3822,7 @@ public class GroupByQueryRunnerTest map.put("technology", "2"); map.put("travel", "1"); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(new ExtractionDimensionSpec( "quality", @@ -3810,7 +3836,7 @@ public class GroupByQueryRunnerTest ) ) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("sortByDimsFirst", true)) + .overrideContext(ImmutableMap.of("sortByDimsFirst", true)) .build(); List expectedResults = Arrays.asList( @@ -3842,7 +3868,7 @@ public class GroupByQueryRunnerTest // in time when Druid does support this, we can re-evaluate this test. public void testLimitPerGrouping() { - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.dayGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -3905,8 +3931,7 @@ public class GroupByQueryRunnerTest ) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -3951,8 +3976,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithOrderLimitHavingSpec() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-01-25/2011-01-28") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4051,8 +4075,7 @@ public class GroupByQueryRunnerTest ) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4079,14 +4102,14 @@ public class GroupByQueryRunnerTest @Test public void testHavingSpec() { + List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 2L, "idx", 217L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4133,8 +4156,7 @@ public class GroupByQueryRunnerTest null ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4183,8 +4205,7 @@ public class GroupByQueryRunnerTest null ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4209,8 +4230,7 @@ public class GroupByQueryRunnerTest GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4293,8 +4313,7 @@ public class GroupByQueryRunnerTest ) ); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4460,8 +4479,7 @@ public class GroupByQueryRunnerTest ) ); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4490,8 +4508,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithRegEx() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimFilter(new RegexDimFilter("quality", "auto.*", null)) @@ -4513,8 +4530,7 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithNonexistentDimension() { - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .addDimension("billy") @@ -4568,8 +4584,7 @@ public class GroupByQueryRunnerTest @Test public void testIdenticalSubquery() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4587,8 +4602,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) @@ -4626,8 +4640,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithMultipleIntervalsInOuterQuery() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4645,8 +4658,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec( new MultipleIntervalSegmentSpec( @@ -4691,8 +4703,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithMultipleIntervalsInOuterQueryAndChunkPeriod() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4708,11 +4719,10 @@ public class GroupByQueryRunnerTest new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen") ) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("chunkPeriod", "P1D")) + .overrideContext(ImmutableMap.of("chunkPeriod", "P1D")) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec( new MultipleIntervalSegmentSpec( @@ -4759,8 +4769,7 @@ public class GroupByQueryRunnerTest { //https://github.com/apache/incubator-druid/issues/2556 - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4778,8 +4787,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec( new MultipleIntervalSegmentSpec( @@ -4806,8 +4814,10 @@ public class GroupByQueryRunnerTest @Test public void testDifferentGroupingSubquery() { - GroupByQuery subquery = GroupByQuery - .builder() + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4819,8 +4829,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs( @@ -4842,7 +4851,7 @@ public class GroupByQueryRunnerTest GroupByQueryRunnerTestHelper.runQuery(factory, runner, query), "subquery" ); - subquery = new GroupByQuery.Builder(subquery) + subquery = makeQueryBuilder(subquery) .setVirtualColumns( new ExpressionVirtualColumn("expr", "-index + 100", ValueType.FLOAT, TestExprMacroTable.INSTANCE) ) @@ -4869,8 +4878,7 @@ public class GroupByQueryRunnerTest @Test public void testDifferentGroupingSubqueryMultipleAggregatorsOnSameField() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4890,8 +4898,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs( @@ -4920,8 +4927,7 @@ public class GroupByQueryRunnerTest @Test public void testDifferentGroupingSubqueryWithFilter() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "quality")) @@ -4929,8 +4935,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx", "idx")) @@ -4964,8 +4969,7 @@ public class GroupByQueryRunnerTest @Test public void testDifferentIntervalSubquery() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -4973,8 +4977,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.secondOnly) .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx", "idx")) @@ -4997,8 +5000,7 @@ public class GroupByQueryRunnerTest "'__time' cannot be used as an output name for dimensions, aggregators, or post-aggregators." ); - GroupByQuery - .builder() + makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions( @@ -5032,8 +5034,7 @@ public class GroupByQueryRunnerTest "'__time' cannot be used as an output name for dimensions, aggregators, or post-aggregators." ); - GroupByQuery - .builder() + makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "__time")) @@ -5059,8 +5060,7 @@ public class GroupByQueryRunnerTest @Test public void testEmptySubquery() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.emptyInterval) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -5068,8 +5068,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx", "idx")) @@ -5083,8 +5082,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithPostAggregators() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -5108,8 +5106,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) @@ -5341,17 +5338,11 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithPostAggregatorsAndHaving() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setDimFilter(new JavaScriptDimFilter( - "quality", - "function(dim){ return true; }", - null, - JavaScriptConfig.getEnabledInstance() - )).setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx_subagg", "index")) + .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx_subagg", "index")) .setPostAggregatorSpecs( Collections.singletonList( new ArithmeticPostAggregator( @@ -5385,8 +5376,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) @@ -5596,8 +5586,10 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithMultiColumnAggregators() { - final GroupByQuery subquery = GroupByQuery - .builder() + // Cannot vectorize due to javascript functionality. + cannotVectorize(); + + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -5652,8 +5644,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) @@ -5763,8 +5754,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithOuterFilterAggregator() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) @@ -5773,8 +5763,7 @@ public class GroupByQueryRunnerTest .build(); final DimFilter filter = new SelectorDimFilter("market", "spot", null); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(Collections.emptyList()) @@ -5792,8 +5781,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithOuterTimeFilter() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) @@ -5811,8 +5799,7 @@ public class GroupByQueryRunnerTest ImmutableList.of("1", "2", "3"), new TimeFormatExtractionFn("d", null, null, null, false) ); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(Collections.emptyList()) @@ -5839,21 +5826,19 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithContextTimeout() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()).setAggregatorSpecs(new CountAggregatorFactory("count")) .setGranularity(QueryRunnerTestHelper.allGran) - .setContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, 10000)) + .overrideContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, 10000)) .build(); List expectedResults = Collections.singletonList( @@ -5866,16 +5851,14 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithOuterVirtualColumns() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setVirtualColumns(new ExpressionVirtualColumn("expr", "1", ValueType.FLOAT, TestExprMacroTable.INSTANCE)) @@ -5893,17 +5876,15 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithOuterCardinalityAggregator() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("index", "index")) - .setGranularity(QueryRunnerTestHelper.dayGran) + .setGranularity(QueryRunnerTestHelper.allGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(Collections.emptyList()) @@ -5925,8 +5906,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithOuterCountAggregator() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -5939,8 +5919,7 @@ public class GroupByQueryRunnerTest ) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()).setAggregatorSpecs(new CountAggregatorFactory("count")) @@ -5968,8 +5947,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithOuterDimJavascriptAggregators() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) @@ -5977,8 +5955,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "quality")) @@ -6021,8 +5998,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithOuterJavascriptAggregators() { - final GroupByQuery subquery = GroupByQuery - .builder() + final GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("market", "market"), new DefaultDimensionSpec("quality", "quality")) @@ -6030,8 +6006,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final GroupByQuery query = GroupByQuery - .builder() + final GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "quality")) @@ -6074,8 +6049,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithHyperUniques() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -6087,8 +6061,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias")) @@ -6210,8 +6183,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithHyperUniquesPostAggregator() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()) @@ -6228,8 +6200,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ArrayList<>()) @@ -6268,8 +6239,10 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithFirstLast() { - GroupByQuery subquery = GroupByQuery - .builder() + // Cannot vectorize due to "first", "last" aggregators. + cannotVectorize(); + + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(new DefaultDimensionSpec("market", "market")) @@ -6279,11 +6252,10 @@ public class GroupByQueryRunnerTest new LongLastAggregatorFactory("innerlast", "index") ) .setGranularity(QueryRunnerTestHelper.dayGran) - .setContext(ImmutableMap.of("finalize", true)) + .overrideContext(ImmutableMap.of("finalize", true)) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions(Collections.emptyList()) @@ -6308,12 +6280,14 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithSubtotalsSpec() { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setVirtualColumns(new ExpressionVirtualColumn("alias", "quality", ValueType.STRING, TestExprMacroTable.INSTANCE)) @@ -6884,8 +6858,7 @@ public class GroupByQueryRunnerTest return; } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList( @@ -7352,10 +7325,6 @@ public class GroupByQueryRunnerTest ); Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - - for (Row row : results) { - System.out.println(row); - } TestHelper.assertExpectedObjects(expectedResults, results, "subtotal-long-dim"); } @@ -7366,8 +7335,7 @@ public class GroupByQueryRunnerTest return; } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList( @@ -7439,8 +7407,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithTimeColumn() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to javascript aggregator. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs( @@ -7470,8 +7440,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByTimeExtraction() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions( @@ -7688,6 +7660,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByTimeExtractionWithNulls() { + // Cannot vectorize due to extraction dimension specs. + cannotVectorize(); + final DimExtractionFn nullWednesdays = new DimExtractionFn() { @Override @@ -7719,8 +7694,7 @@ public class GroupByQueryRunnerTest } }; - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnIntervalSpec) .setDimensions( @@ -7961,15 +7935,14 @@ public class GroupByQueryRunnerTest for (int i = 0; i < segmentCount; i++) { bySegmentResults.add(singleSegmentResult); } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) - .setContext(ImmutableMap.of("bySegment", true)); + .overrideContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -7997,6 +7970,9 @@ public class GroupByQueryRunnerTest @Test public void testBySegmentResultsUnOptimizedDimextraction() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + int segmentCount = 32; Result singleSegmentResult = new Result<>( DateTimes.of("2011-01-12T00:00:00.000Z"), @@ -8020,8 +7996,7 @@ public class GroupByQueryRunnerTest for (int i = 0; i < segmentCount; i++) { bySegmentResults.add(singleSegmentResult); } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04").setDimensions(new ExtractionDimensionSpec( "quality", @@ -8036,7 +8011,7 @@ public class GroupByQueryRunnerTest )).setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) - .setContext(ImmutableMap.of("bySegment", true)); + .overrideContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -8086,8 +8061,7 @@ public class GroupByQueryRunnerTest for (int i = 0; i < segmentCount; i++) { bySegmentResults.add(singleSegmentResult); } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04").setDimensions(new ExtractionDimensionSpec( "quality", @@ -8102,7 +8076,7 @@ public class GroupByQueryRunnerTest )).setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) - .setContext(ImmutableMap.of("bySegment", true)); + .overrideContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -8149,17 +8123,17 @@ public class GroupByQueryRunnerTest new SelectorDimFilter("quality", "travel", null) ); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs( - QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index") - ) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(new OrDimFilter(dimFilters)) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new OrDimFilter(dimFilters)) + .build(); List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), @@ -8203,17 +8177,17 @@ public class GroupByQueryRunnerTest MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs( - QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index") - ) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null)) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null)) + .build(); List expectedResults; @@ -8243,19 +8217,14 @@ public class GroupByQueryRunnerTest MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs( - QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index") - ) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter( - new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null) - ) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null)) + .build(); List expectedResults = Collections.emptyList(); Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); @@ -8279,23 +8248,23 @@ public class GroupByQueryRunnerTest extractionMap.put("", "NOT_USED"); } - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("null_column", "alias")) - .setAggregatorSpecs( - QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index") - ) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter( - new ExtractionDimFilter( - "null_column", - "REPLACED_VALUE", - lookupExtractionFn, - null - ) - ).build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("null_column", "alias")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter( + new ExtractionDimFilter( + "null_column", + "REPLACED_VALUE", + lookupExtractionFn, + null + ) + ).build(); List expectedResults = Arrays .asList( @@ -8324,19 +8293,16 @@ public class GroupByQueryRunnerTest MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "missing", true, false); DimFilter filter = new ExtractionDimFilter("quality", "mezzanineANDnews", lookupExtractionFn, null); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs(new FilteredAggregatorFactory( - QueryRunnerTestHelper.rowsCount, - filter - ), new FilteredAggregatorFactory( - new LongSumAggregatorFactory("idx", "index"), - filter - )) - .setGranularity(QueryRunnerTestHelper.dayGran) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs( + new FilteredAggregatorFactory(QueryRunnerTestHelper.rowsCount, filter), + new FilteredAggregatorFactory(new LongSumAggregatorFactory("idx", "index"), filter) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .build(); List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow( "2011-04-01", @@ -8485,24 +8451,24 @@ public class GroupByQueryRunnerTest MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, true); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("quality", "alias")) - .setAggregatorSpecs( - QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index") - ) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter( - new ExtractionDimFilter( - "quality", - "newsANDmezzanine", - lookupExtractionFn, - null - ) - ) - .build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("quality", "alias")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter( + new ExtractionDimFilter( + "quality", + "newsANDmezzanine", + lookupExtractionFn, + null + ) + ) + .build(); List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), @@ -8531,8 +8497,7 @@ public class GroupByQueryRunnerTest lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "EMPTY", true, true); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("null_column", "alias")) @@ -8611,15 +8576,14 @@ public class GroupByQueryRunnerTest superFilterList.add(new JavaScriptDimFilter("quality", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance())); DimFilter superFilter = new AndDimFilter(superFilterList); - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) .setDimFilter(superFilter) - .setContext(ImmutableMap.of("bySegment", true)); + .overrideContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -8664,24 +8628,21 @@ public class GroupByQueryRunnerTest superFilterList.add( new SearchQueryDimFilter("null_column", new ContainsSearchQuerySpec("EMPTY", true), extractionFn) ); - superFilterList.add(new JavaScriptDimFilter( - "null_column", - jsFn, - extractionFn, - JavaScriptConfig.getEnabledInstance() - )); + superFilterList.add( + new JavaScriptDimFilter("null_column", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance()) + ); DimFilter superFilter = new AndDimFilter(superFilterList); - GroupByQuery query = GroupByQuery.builder() - .setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(new DefaultDimensionSpec("null_column", "alias")) - .setAggregatorSpecs( - QueryRunnerTestHelper.rowsCount, - new LongSumAggregatorFactory("idx", "index") - ) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(superFilter).build(); + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(new DefaultDimensionSpec("null_column", "alias")) + .setAggregatorSpecs( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(superFilter).build(); List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L), @@ -8695,11 +8656,13 @@ public class GroupByQueryRunnerTest @Test public void testGroupByCardinalityAggWithExtractionFn() { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + String helloJsFn = "function(str) { return 'hello' }"; ExtractionFn helloFn = new JavaScriptExtractionFn(helloJsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("market", "alias")) @@ -8779,8 +8742,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByCardinalityAggOnFloat() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("market", "alias")) @@ -8864,8 +8829,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("qualityLong", "ql_alias", ValueType.LONG)) @@ -8922,8 +8886,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("qualityLong", "ql_alias", ValueType.LONG)) @@ -8975,6 +8938,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByLongColumnWithExFn() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); @@ -8983,8 +8949,7 @@ public class GroupByQueryRunnerTest String jsFn = "function(str) { return 'super-' + str; }"; ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ExtractionDimensionSpec("qualityLong", "ql_alias", jsExtractionFn)) @@ -9025,8 +8990,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("__time", "time_alias", ValueType.LONG)) @@ -9062,11 +9026,13 @@ public class GroupByQueryRunnerTest @Test public void testGroupByLongTimeColumnWithExFn() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + String jsFn = "function(str) { return 'super-' + str; }"; ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ExtractionDimensionSpec("__time", "time_alias", jsExtractionFn)) @@ -9107,8 +9073,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("index", "index_alias", ValueType.FLOAT)) @@ -9166,8 +9131,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("qualityFloat", "qf_alias", ValueType.FLOAT)) @@ -9224,8 +9188,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("qualityDouble", "alias", ValueType.DOUBLE)) @@ -9277,6 +9240,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByFloatColumnWithExFn() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); @@ -9285,8 +9251,7 @@ public class GroupByQueryRunnerTest String jsFn = "function(str) { return 'super-' + str; }"; ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ExtractionDimensionSpec("index", "index_alias", jsExtractionFn)) @@ -9330,8 +9295,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9387,8 +9351,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9436,8 +9399,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); } - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9450,8 +9412,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9489,6 +9450,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByNumericStringsAsNumericWithDecoration() { + // Cannot vectorize due to regex-filtered dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); @@ -9506,8 +9470,7 @@ public class GroupByQueryRunnerTest true ); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(regexSpec, listFilteredSpec) @@ -9541,6 +9504,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByDecorationOnNumerics() { + // Cannot vectorize due to filtered dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); @@ -9557,8 +9523,7 @@ public class GroupByQueryRunnerTest true ); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(regexSpec, listFilteredSpec) @@ -9611,8 +9576,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9630,8 +9594,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9684,8 +9647,7 @@ public class GroupByQueryRunnerTest expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery subQuery = GroupByQuery - .builder() + GroupByQuery subQuery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9696,8 +9658,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.allGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subQuery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9763,6 +9724,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByStringOutputAsLong() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); @@ -9770,8 +9734,7 @@ public class GroupByQueryRunnerTest ExtractionFn strlenFn = StrlenExtractionFn.instance(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new ExtractionDimensionSpec( @@ -9813,8 +9776,10 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithAggsOnNumericDimensions() { - GroupByQuery query = GroupByQuery - .builder() + // Cannot vectorize due to javascript aggregators. + cannotVectorize(); + + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -9877,6 +9842,9 @@ public class GroupByQueryRunnerTest @Test public void testGroupByNestedOuterExtractionFnOnFloatInner() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); @@ -9885,8 +9853,7 @@ public class GroupByQueryRunnerTest String jsFn = "function(obj) { return obj; }"; ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance()); - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias"), new ExtractionDimensionSpec( @@ -9900,8 +9867,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias"), new ExtractionDimensionSpec( @@ -9929,13 +9895,15 @@ public class GroupByQueryRunnerTest @Test public void testGroupByNestedDoubleTimeExtractionFnWithLongOutputTypes() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -9952,8 +9920,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery outerQuery = GroupByQuery - .builder() + GroupByQuery outerQuery = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("alias", "alias"), new ExtractionDimensionSpec( @@ -9984,7 +9951,7 @@ public class GroupByQueryRunnerTest if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -10000,7 +9967,7 @@ public class GroupByQueryRunnerTest 2 ) ).setAggregatorSpecs(QueryRunnerTestHelper.rowsCount) - .setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) + .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) .build(); List expectedResults = Arrays.asList( @@ -10030,8 +9997,7 @@ public class GroupByQueryRunnerTest if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -10042,7 +10008,7 @@ public class GroupByQueryRunnerTest 5 ) ) - .setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) + .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) .setGranularity(Granularities.ALL); final GroupByQuery allGranQuery = builder.build(); @@ -10097,8 +10063,7 @@ public class GroupByQueryRunnerTest if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -10109,7 +10074,7 @@ public class GroupByQueryRunnerTest 5 ) ) - .setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) + .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) .setGranularity(Granularities.ALL); final GroupByQuery allGranQuery = builder.build(); @@ -10162,8 +10127,7 @@ public class GroupByQueryRunnerTest if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias"), new DefaultDimensionSpec("market", "market")) @@ -10177,7 +10141,7 @@ public class GroupByQueryRunnerTest 5 ) ) - .setContext( + .overrideContext( ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true) ) .setGranularity(Granularities.ALL); @@ -10282,8 +10246,7 @@ public class GroupByQueryRunnerTest if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { return; } - GroupByQuery.Builder builder = GroupByQuery - .builder() + GroupByQuery.Builder builder = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") .setDimensions(new DefaultDimensionSpec("quality", "alias"), new DefaultDimensionSpec("market", "market")) @@ -10307,7 +10270,7 @@ public class GroupByQueryRunnerTest 5 ) ) - .setContext( + .overrideContext( ImmutableMap.of( GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true @@ -10415,7 +10378,7 @@ public class GroupByQueryRunnerTest expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("Limit push down when sorting by a post aggregator is not supported."); - GroupByQuery query = new GroupByQuery.Builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran).setDimensions(new DefaultDimensionSpec( QueryRunnerTestHelper.marketDimension, @@ -10434,7 +10397,7 @@ public class GroupByQueryRunnerTest .setPostAggregatorSpecs( Collections.singletonList(new ConstantPostAggregator("constant", 1)) ) - .setContext( + .overrideContext( ImmutableMap.of( GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true @@ -10448,8 +10411,7 @@ public class GroupByQueryRunnerTest @Test public void testEmptySubqueryWithLimitPushDown() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.emptyInterval) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -10466,8 +10428,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setAggregatorSpecs(new DoubleMaxAggregatorFactory("idx", "idx")) @@ -10483,8 +10444,7 @@ public class GroupByQueryRunnerTest @Test public void testSubqueryWithMultipleIntervalsInOuterQueryWithLimitPushDown() { - GroupByQuery subquery = GroupByQuery - .builder() + GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -10508,8 +10468,7 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(subquery) .setQuerySegmentSpec( new MultipleIntervalSegmentSpec( @@ -10559,8 +10518,7 @@ public class GroupByQueryRunnerTest expectedException.expect(IAE.class); expectedException.expectMessage("Cannot force limit push down when a having spec is present."); - GroupByQuery - .builder() + makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setGranularity(QueryRunnerTestHelper.allGran) .setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.marketDimension, "marketalias")) @@ -10572,7 +10530,7 @@ public class GroupByQueryRunnerTest ) ) .setAggregatorSpecs(QueryRunnerTestHelper.rowsCount) - .setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) + .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)) .setHavingSpec(new GreaterThanHavingSpec("rows", 10)) .build(); } @@ -10580,13 +10538,15 @@ public class GroupByQueryRunnerTest @Test public void testTypeConversionWithMergingChainedExecutionRunner() { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING."); } - GroupByQuery query = GroupByQuery - .builder() + GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions( @@ -10636,4 +10596,38 @@ public class GroupByQueryRunnerTest Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, mergingRunner, query); TestHelper.assertExpectedObjects(expectedResults, results, "type-conversion"); } + + /** + * Use this method instead of makeQueryBuilder() to make sure the context is set properly. Also, avoid + * setContext in tests. Only use overrideContext. + */ + private GroupByQuery.Builder makeQueryBuilder() + { + return GroupByQuery.builder().overrideContext(makeContext()); + } + + /** + * Use this method instead of makeQueryBuilder() to make sure the context is set properly. Also, avoid + * setContext in tests. Only use overrideContext. + */ + private GroupByQuery.Builder makeQueryBuilder(final GroupByQuery query) + { + return new GroupByQuery.Builder(query).overrideContext(makeContext()); + } + + private Map makeContext() + { + return ImmutableMap.builder() + .put("vectorize", vectorize ? "force" : "false") + .put("vectorSize", 16) // Small vector size to ensure we use more than one. + .build(); + } + + private void cannotVectorize() + { + if (vectorize && config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("Cannot vectorize!"); + } + } } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByTimeseriesQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByTimeseriesQueryRunnerTest.java index b1770919dd5..464fb6b00c4 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByTimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByTimeseriesQueryRunnerTest.java @@ -21,7 +21,6 @@ package org.apache.druid.query.groupby; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import org.apache.druid.data.input.MapBasedRow; import org.apache.druid.data.input.Row; import org.apache.druid.java.util.common.DateTimes; @@ -52,6 +51,8 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Map; /** @@ -69,7 +70,7 @@ public class GroupByTimeseriesQueryRunnerTest extends TimeseriesQueryRunnerTest } @SuppressWarnings("unchecked") - @Parameterized.Parameters(name = "{0}") + @Parameterized.Parameters(name = "{0}, vectorize = {1}") public static Iterable constructorFeeder() { GroupByQueryConfig config = new GroupByQueryConfig(); @@ -79,74 +80,76 @@ public class GroupByTimeseriesQueryRunnerTest extends TimeseriesQueryRunnerTest ); final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs; resourceCloser.register(factoryAndCloser.rhs); - return QueryRunnerTestHelper.transformToConstructionFeeder( - Lists.transform( - QueryRunnerTestHelper.makeQueryRunners(factory), - new Function, Object>() - { - @Override - public Object apply(final QueryRunner input) + + final List constructors = new ArrayList<>(); + + for (QueryRunner runner : QueryRunnerTestHelper.makeQueryRunners(factory)) { + final QueryRunner modifiedRunner = new QueryRunner() + { + @Override + public Sequence run(QueryPlus queryPlus, Map responseContext) + { + TimeseriesQuery tsQuery = (TimeseriesQuery) queryPlus.getQuery(); + QueryRunner newRunner = factory.mergeRunners( + Execs.directExecutor(), ImmutableList.of(runner) + ); + QueryToolChest toolChest = factory.getToolchest(); + + newRunner = new FinalizeResultsQueryRunner<>( + toolChest.mergeResults(toolChest.preMergeQueryDecoration(newRunner)), + toolChest + ); + + GroupByQuery newQuery = GroupByQuery + .builder() + .setDataSource(tsQuery.getDataSource()) + .setQuerySegmentSpec(tsQuery.getQuerySegmentSpec()) + .setGranularity(tsQuery.getGranularity()) + .setDimFilter(tsQuery.getDimensionsFilter()) + .setAggregatorSpecs(tsQuery.getAggregatorSpecs()) + .setPostAggregatorSpecs(tsQuery.getPostAggregatorSpecs()) + .setVirtualColumns(tsQuery.getVirtualColumns()) + .setContext(tsQuery.getContext()) + .build(); + + return Sequences.map( + newRunner.run(queryPlus.withQuery(newQuery), responseContext), + new Function>() { - return new QueryRunner() + @Override + public Result apply(final Row input) { - @Override - public Sequence run(QueryPlus queryPlus, Map responseContext) - { - TimeseriesQuery tsQuery = (TimeseriesQuery) queryPlus.getQuery(); - QueryRunner newRunner = factory.mergeRunners( - Execs.directExecutor(), ImmutableList.of(input) - ); - QueryToolChest toolChest = factory.getToolchest(); + MapBasedRow row = (MapBasedRow) input; - newRunner = new FinalizeResultsQueryRunner<>( - toolChest.mergeResults(toolChest.preMergeQueryDecoration(newRunner)), - toolChest - ); - - GroupByQuery newQuery = GroupByQuery - .builder() - .setDataSource(tsQuery.getDataSource()) - .setQuerySegmentSpec(tsQuery.getQuerySegmentSpec()) - .setGranularity(tsQuery.getGranularity()) - .setDimFilter(tsQuery.getDimensionsFilter()) - .setAggregatorSpecs(tsQuery.getAggregatorSpecs()) - .setPostAggregatorSpecs(tsQuery.getPostAggregatorSpecs()) - .setVirtualColumns(tsQuery.getVirtualColumns()) - .setContext(tsQuery.getContext()) - .build(); - - return Sequences.map( - newRunner.run(queryPlus.withQuery(newQuery), responseContext), - new Function>() - { - @Override - public Result apply(final Row input) - { - MapBasedRow row = (MapBasedRow) input; - - return new Result<>( - row.getTimestamp(), new TimeseriesResultValue(row.getEvent()) - ); - } - } - ); - } - - @Override - public String toString() - { - return input.toString(); - } - }; + return new Result<>( + row.getTimestamp(), new TimeseriesResultValue(row.getEvent()) + ); + } } - } - ) - ); + ); + } + + @Override + public String toString() + { + return runner.toString(); + } + }; + + for (boolean vectorize : ImmutableList.of(false, true)) { + // Add vectorization tests for any indexes that support it. + if (!vectorize || QueryRunnerTestHelper.isTestRunnerVectorizable(runner)) { + constructors.add(new Object[]{modifiedRunner, vectorize}); + } + } + } + + return constructors; } - public GroupByTimeseriesQueryRunnerTest(QueryRunner runner) + public GroupByTimeseriesQueryRunnerTest(QueryRunner runner, boolean vectorize) { - super(runner, false, QueryRunnerTestHelper.commonDoubleAggregators); + super(runner, false, vectorize, QueryRunnerTestHelper.commonDoubleAggregators); } // GroupBy handles timestamps differently when granularity is ALL diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java index 8f2d3f69a57..98d46fc9d12 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java @@ -27,6 +27,7 @@ import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.MapBasedRow; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; @@ -44,7 +45,7 @@ public class BufferArrayGrouperTest public void testAggregate() { final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory(); - final IntGrouper grouper = newGrouper(columnSelectorFactory, 1024); + final IntGrouper grouper = newGrouper(columnSelectorFactory, 32768); columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L))); grouper.aggregate(12); @@ -77,11 +78,13 @@ public class BufferArrayGrouperTest final BufferArrayGrouper grouper = new BufferArrayGrouper( Suppliers.ofInstance(buffer), - columnSelectorFactory, - new AggregatorFactory[]{ - new LongSumAggregatorFactory("valueSum", "value"), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), 1000 ); grouper.init(); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java index 64b270d0f4d..a2275f7f32f 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java @@ -27,7 +27,7 @@ import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.MapBasedRow; -import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.CloserRule; @@ -56,11 +56,13 @@ public class BufferHashGrouperTest final Grouper grouper = new BufferHashGrouper<>( Suppliers.ofInstance(ByteBuffer.allocate(1000)), GrouperTestUtil.intKeySerde(), - columnSelectorFactory, - new AggregatorFactory[]{ - new LongSumAggregatorFactory("valueSum", "value"), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), Integer.MAX_VALUE, 0, 0, @@ -187,11 +189,13 @@ public class BufferHashGrouperTest final BufferHashGrouper grouper = new BufferHashGrouper<>( Suppliers.ofInstance(buffer), GrouperTestUtil.intKeySerde(), - columnSelectorFactory, - new AggregatorFactory[]{ - new LongSumAggregatorFactory("valueSum", "value"), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), Integer.MAX_VALUE, maxLoadFactor, initialBuckets, diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java index 4ce6f4593ea..70e1abcde4a 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java @@ -20,12 +20,13 @@ package org.apache.druid.query.groupby.epinephelinae; import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.MapBasedRow; import org.apache.druid.java.util.common.IAE; -import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.junit.Assert; @@ -202,11 +203,13 @@ public class LimitedBufferHashGrouperTest LimitedBufferHashGrouper grouper = new LimitedBufferHashGrouper<>( Suppliers.ofInstance(ByteBuffer.allocate(bufferSize)), GrouperTestUtil.intKeySerde(), - columnSelectorFactory, - new AggregatorFactory[]{ - new LongSumAggregatorFactory("valueSum", "value"), - new CountAggregatorFactory("count") - }, + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), Integer.MAX_VALUE, 0.5f, initialBuckets, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 5c613acf658..e0b45c5ecf2 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -212,7 +212,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1, preferedSize1, 1, "preferred", @@ -253,7 +253,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap2, placementSize2, 1, null, @@ -299,7 +299,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, @@ -367,7 +367,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 1, null, @@ -435,7 +435,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 1, null, @@ -504,7 +504,7 @@ public class SegmentMetadataQueryTest } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, size1 + size2, 1, "preferred", @@ -525,7 +525,7 @@ public class SegmentMetadataQueryTest } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, size1 + size2, 3, "spot", @@ -546,7 +546,7 @@ public class SegmentMetadataQueryTest } ColumnAnalysis analysis = new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, size1 + size2, 9, "automotive", @@ -632,7 +632,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, @@ -694,7 +694,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, @@ -752,7 +752,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, @@ -810,7 +810,7 @@ public class SegmentMetadataQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap1 || !mmap2, 0, 0, null, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java index 15588a9fdae..c1b37b21581 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -97,7 +97,7 @@ public class SegmentMetadataUnionQueryTest "placement", new ColumnAnalysis( ValueType.STRING.toString(), - false, + !mmap, mmap ? 43524 : 43056, 1, "preferred", diff --git a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java index af64fe2706f..90f9730bc8d 100644 --- a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java @@ -144,7 +144,7 @@ public class SpecificSegmentQueryRunnerTest ); CountAggregator rows = new CountAggregator(); rows.aggregate(); - builder.addMetric("rows", rows); + builder.addMetric("rows", rows.get()); final Result value = builder.build(); final SpecificSegmentQueryRunner queryRunner = new SpecificSegmentQueryRunner( diff --git a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java index c24fc72c0da..4c8d91c0fe5 100644 --- a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -66,7 +66,9 @@ import org.joda.time.DateTimeZone; import org.joda.time.Interval; import org.joda.time.Period; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -76,19 +78,23 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; /** */ @RunWith(Parameterized.class) public class TimeseriesQueryRunnerTest { - public static final Map CONTEXT = ImmutableMap.of(); - @Parameterized.Parameters(name = "{0}:descending={1}") + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Parameterized.Parameters(name = "{0}:descending={1},vectorize={2}") public static Iterable constructorFeeder() { - return QueryRunnerTestHelper.cartesian( + final Iterable baseConstructors = QueryRunnerTestHelper.cartesian( // runners QueryRunnerTestHelper.makeQueryRunners( new TimeseriesQueryRunnerFactory( @@ -101,8 +107,25 @@ public class TimeseriesQueryRunnerTest ), // descending? Arrays.asList(false, true), + // vectorize? + Arrays.asList(false, true), + // double vs. float Arrays.asList(QueryRunnerTestHelper.commonDoubleAggregators, QueryRunnerTestHelper.commonFloatAggregators) ); + + // Add vectorization tests for any indexes that support it. + return StreamSupport + .stream(baseConstructors.spliterator(), false) + .filter( + constructor -> { + boolean canVectorize = + QueryRunnerTestHelper.isTestRunnerVectorizable((QueryRunner) constructor[0]) + && !(boolean) constructor[1] /* descending */; + final boolean vectorize = (boolean) constructor[2]; /* vectorize */ + return !vectorize || canVectorize; + } + ) + .collect(Collectors.toList()); } private void assertExpectedResults(Iterable> expectedResults, Iterable> results) @@ -115,22 +138,28 @@ public class TimeseriesQueryRunnerTest protected final QueryRunner runner; protected final boolean descending; + protected final boolean vectorize; private final List aggregatorFactoryList; public TimeseriesQueryRunnerTest( QueryRunner runner, boolean descending, + boolean vectorize, List aggregatorFactoryList ) { this.runner = runner; this.descending = descending; + this.vectorize = vectorize; this.aggregatorFactoryList = aggregatorFactoryList; } @Test public void testEmptyTimeseries() { + // Cannot vectorize due to "doubleFirst" aggregator. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) @@ -144,6 +173,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); Map resultMap = new HashMap<>(); resultMap.put("rows", 0L); @@ -177,6 +207,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> results = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -277,6 +308,7 @@ public class TimeseriesQueryRunnerTest .granularity(gran) .intervals(QueryRunnerTestHelper.fullOnIntervalSpec) .descending(descending) + .context(makeContext()) .build(); Iterable> results = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -302,6 +334,9 @@ public class TimeseriesQueryRunnerTest @Test public void testFullOnTimeseriesMaxMin() { + // Cannot vectorize due to "doubleMin", "doubleMax" aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(Granularities.ALL) @@ -313,6 +348,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); DateTime expectedEarliest = DateTimes.of("2011-01-12"); @@ -349,6 +385,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); Assert.assertEquals( @@ -405,6 +442,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -575,6 +613,9 @@ public class TimeseriesQueryRunnerTest @Test public void testTimeseriesWithVirtualColumn() { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.dayGran) @@ -595,6 +636,7 @@ public class TimeseriesQueryRunnerTest TestExprMacroTable.INSTANCE ) ) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -640,6 +682,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -684,6 +727,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults1 = Collections.singletonList( @@ -716,6 +760,7 @@ public class TimeseriesQueryRunnerTest QueryRunnerTestHelper.qualityUniques ) ) + .context(makeContext()) .build(); List> expectedResults2 = Collections.singletonList( @@ -759,6 +804,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults1 = Arrays.asList( @@ -802,6 +848,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> lotsOfZeroes = new ArrayList<>(); @@ -870,6 +917,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults1 = Collections.singletonList( @@ -908,6 +956,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults1 = Collections.singletonList( @@ -940,6 +989,7 @@ public class TimeseriesQueryRunnerTest QueryRunnerTestHelper.qualityUniques ) ) + .context(makeContext()) .build(); List> expectedResults2 = Collections.singletonList( @@ -976,6 +1026,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Collections.emptyList(); @@ -999,6 +1050,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1036,16 +1088,21 @@ public class TimeseriesQueryRunnerTest TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.dayGran) - .filters(new RegexDimFilter(QueryRunnerTestHelper.marketDimension, "^.p.*$", null)) // spot and upfront - .intervals(QueryRunnerTestHelper.firstToThird) - .aggregators( - QueryRunnerTestHelper.rowsCount, - QueryRunnerTestHelper.indexLongSum, - QueryRunnerTestHelper.qualityUniques - ) - .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) - .descending(descending) - .build(); + .filters(new RegexDimFilter( + QueryRunnerTestHelper.marketDimension, + "^.p.*$", + null + )) // spot and upfront + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.indexLongSum, + QueryRunnerTestHelper.qualityUniques + ) + .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) + .descending(descending) + .context(makeContext()) + .build(); List> expectedResults = Arrays.asList( new Result<>( @@ -1091,6 +1148,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1137,6 +1195,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1183,6 +1242,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1229,6 +1289,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1275,6 +1336,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1321,6 +1383,7 @@ public class TimeseriesQueryRunnerTest .aggregators(QueryRunnerTestHelper.commonDoubleAggregators) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1373,6 +1436,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1431,6 +1495,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1477,6 +1542,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1519,6 +1585,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Map resultMap = new HashMap<>(); @@ -1558,11 +1625,13 @@ public class TimeseriesQueryRunnerTest .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .context(ImmutableMap.of("skipEmptyBuckets", "true")) .descending(descending) + .context(makeContext(ImmutableMap.of("skipEmptyBuckets", "true"))) .build(); List> expectedResults = Collections.emptyList(); - Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()).toList(); + Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()) + .toList(); assertExpectedResults(expectedResults, results); } @@ -1577,6 +1646,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1604,7 +1674,8 @@ public class TimeseriesQueryRunnerTest ) ); - Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()).toList(); + Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()) + .toList(); assertExpectedResults(expectedResults, results); } @@ -1619,6 +1690,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -1646,7 +1718,8 @@ public class TimeseriesQueryRunnerTest ) ); - Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()).toList(); + Iterable> results = runner.run(QueryPlus.wrap(query), new HashMap()) + .toList(); assertExpectedResults(expectedResults, results); } @@ -1661,6 +1734,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Map resultMap = new HashMap<>(); resultMap.put("rows", 0L); @@ -1702,6 +1776,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Map resultMap = new HashMap<>(); resultMap.put("rows", 0L); @@ -1731,6 +1806,9 @@ public class TimeseriesQueryRunnerTest @Test public void testTimeseriesWithMultiValueFilteringJavascriptAggregator() { + // Cannot vectorize due to JavaScript aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) @@ -1743,6 +1821,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = ImmutableList.of( @@ -1765,6 +1844,9 @@ public class TimeseriesQueryRunnerTest @Test public void testTimeseriesWithMultiValueFilteringJavascriptAggregatorAndAlsoRegularFilters() { + // Cannot vectorize due to JavaScript aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) @@ -1778,6 +1860,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = ImmutableList.of( @@ -1800,6 +1883,9 @@ public class TimeseriesQueryRunnerTest @Test public void testTimeseriesWithFirstLastAggregator() { + // Cannot vectorize due to "doubleFirst", "doubleLast" aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.monthGran) @@ -1811,6 +1897,7 @@ public class TimeseriesQueryRunnerTest ) ) .descending(descending) + .context(makeContext()) .build(); // There's a difference between ascending and descending results since granularity of druid.sample.tsv is days, @@ -1914,6 +2001,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); TimeseriesQuery query1 = Druids @@ -1924,6 +2012,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = runner.run(QueryPlus.wrap(query1), CONTEXT).toList(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -1942,6 +2031,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); TimeseriesQuery query1 = Druids @@ -1953,6 +2043,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = runner.run(QueryPlus.wrap(query1), CONTEXT).toList(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -1975,6 +2066,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); AndDimFilter andDimFilter2 = new AndDimFilter( @@ -1991,6 +2083,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = runner.run(QueryPlus.wrap(query2), CONTEXT).toList(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2013,6 +2106,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); AndDimFilter andDimFilter2 = new AndDimFilter( @@ -2029,6 +2123,7 @@ public class TimeseriesQueryRunnerTest .aggregators(aggregatorFactoryList) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> expectedResults = runner.run(QueryPlus.wrap(query2), CONTEXT).toList(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2048,14 +2143,15 @@ public class TimeseriesQueryRunnerTest Iterables.concat( aggregatorFactoryList, Collections.singletonList(new FilteredAggregatorFactory( - new CountAggregatorFactory("filteredAgg"), - new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null) - )) + new CountAggregatorFactory("filteredAgg"), + new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null) + )) ) ) ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2100,6 +2196,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2145,6 +2242,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2192,6 +2290,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2236,6 +2335,7 @@ public class TimeseriesQueryRunnerTest ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) .descending(descending) + .context(makeContext()) .build(); Iterable> actualResults = runner.run(QueryPlus.wrap(query), CONTEXT).toList(); @@ -2260,6 +2360,9 @@ public class TimeseriesQueryRunnerTest @Test public void testTimeseriesWithTimeColumn() { + // Cannot vectorize due to JavaScript aggregators. + cannotVectorize(); + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .intervals(QueryRunnerTestHelper.firstToThird) @@ -2270,6 +2373,7 @@ public class TimeseriesQueryRunnerTest ) .granularity(QueryRunnerTestHelper.allGran) .descending(descending) + .context(makeContext()) .build(); List> expectedResults = Collections.singletonList( @@ -2342,6 +2446,7 @@ public class TimeseriesQueryRunnerTest QueryRunnerTestHelper.qualityUniques ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -2385,7 +2490,11 @@ public class TimeseriesQueryRunnerTest .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.dayGran) .filters( - new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "upfront", lookupExtractionFn) + new SelectorDimFilter( + QueryRunnerTestHelper.marketDimension, + "upfront", + lookupExtractionFn + ) ) .intervals(QueryRunnerTestHelper.firstToThird) .aggregators( @@ -2394,6 +2503,7 @@ public class TimeseriesQueryRunnerTest QueryRunnerTestHelper.qualityUniques ) .postAggregators(QueryRunnerTestHelper.addRowsIndexConstant) + .context(makeContext()) .build(); List> expectedResults = Arrays.asList( @@ -2451,6 +2561,7 @@ public class TimeseriesQueryRunnerTest ) .descending(descending) .limit(10) + .context(makeContext()) .build(); // Must create a toolChest so we can run mergeResults. @@ -2467,4 +2578,26 @@ public class TimeseriesQueryRunnerTest final List list = finalRunner.run(QueryPlus.wrap(query), CONTEXT).toList(); Assert.assertEquals(10, list.size()); } + + private Map makeContext() + { + return makeContext(ImmutableMap.of()); + } + + private Map makeContext(final Map myContext) + { + final Map context = new HashMap<>(); + context.put("vectorize", vectorize ? "force" : "false"); + context.put("vectorSize", 16); // Small vector size to ensure we use more than one. + context.putAll(myContext); + return context; + } + + private void cannotVectorize() + { + if (vectorize) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("Cannot vectorize!"); + } + } } diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java new file mode 100644 index 00000000000..ea93e1b75c2 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilderTest.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.NumericColumn; +import org.apache.druid.segment.data.ReadableOffset; +import org.junit.Assert; +import org.junit.Test; + +public class QueryableIndexCursorSequenceBuilderTest +{ + @Test + public void testTimeSearch() + { + final int[] values = new int[]{0, 1, 1, 1, 1, 1, 1, 1, 5, 7, 10}; + final NumericColumn column = new NumericColumn() + { + @Override + public int length() + { + return values.length; + } + + @Override + public long getLongSingleValueRow(int rowNum) + { + return values[rowNum]; + } + + @Override + public void close() + { + throw new UnsupportedOperationException(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + throw new UnsupportedOperationException(); + } + + @Override + public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) + { + throw new UnsupportedOperationException(); + } + }; + + Assert.assertEquals( + 0, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 0, values.length) + ); + + Assert.assertEquals( + 2, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 2, values.length) + ); + + Assert.assertEquals( + 0, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 0, 0, values.length / 2) + ); + + Assert.assertEquals( + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 0, values.length) + ); + + Assert.assertEquals( + 2, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 2, values.length) + ); + + Assert.assertEquals( + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 0, values.length / 2) + ); + + Assert.assertEquals( + 1, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 1, 1, 8) + ); + + Assert.assertEquals( + 8, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 2, 0, values.length) + ); + + Assert.assertEquals( + 10, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 10, 0, values.length) + ); + + Assert.assertEquals( + 11, + QueryableIndexCursorSequenceBuilder.timeSearch(column, 15, 0, values.length) + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/data/CompressedFloatsSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/data/CompressedFloatsSerdeTest.java index ef041a2ae15..72e3cf5703f 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/CompressedFloatsSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/CompressedFloatsSerdeTest.java @@ -144,7 +144,7 @@ public class CompressedFloatsSerdeTest private void tryFill(ColumnarFloats indexed, float[] vals, final int startIndex, final int size) { float[] filled = new float[size]; - indexed.fill(startIndex, filled); + indexed.get(filled, startIndex, filled.length); for (int i = startIndex; i < filled.length; i++) { Assert.assertEquals(vals[i + startIndex], filled[i], DELTA); diff --git a/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java index 02e30f6ee49..1480db6a7d0 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/CompressedLongsSerdeTest.java @@ -167,7 +167,7 @@ public class CompressedLongsSerdeTest private void tryFill(ColumnarLongs indexed, long[] vals, final int startIndex, final int size) { long[] filled = new long[size]; - indexed.fill(startIndex, filled); + indexed.get(filled, startIndex, size); for (int i = startIndex; i < filled.length; i++) { Assert.assertEquals(vals[i + startIndex], filled[i]); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 9200ec48b92..335ca490319 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import org.apache.druid.common.guava.SettableSupplier; import org.apache.druid.data.input.InputRow; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; @@ -36,12 +37,14 @@ import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.groupby.RowBasedColumnSelectorFactory; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -60,6 +63,9 @@ import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorCursor; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; @@ -71,7 +77,9 @@ import org.junit.rules.TemporaryFolder; import org.junit.runners.Parameterized; import java.io.Closeable; +import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; @@ -94,11 +102,12 @@ public abstract class BaseFilterTest protected final IndexBuilder indexBuilder; protected final Function> finisher; - protected StorageAdapter adapter; - protected boolean cnf; - protected boolean optimize; + protected final boolean cnf; + protected final boolean optimize; protected final String testName; + protected StorageAdapter adapter; + // JUnit creates a new test instance for every test method call. // For filter tests, the test setup creates a segment. // Creating a new segment for every test method call is pretty slow, so cache the StorageAdapters. @@ -204,10 +213,11 @@ public abstract class BaseFilterTest for (boolean cnf : ImmutableList.of(false, true)) { for (boolean optimize : ImmutableList.of(false, true)) { final String testName = StringUtils.format( - "bitmaps[%s], indexMerger[%s], finisher[%s], optimize[%s]", + "bitmaps[%s], indexMerger[%s], finisher[%s], cnf[%s], optimize[%s]", bitmapSerdeFactoryEntry.getKey(), segmentWriteOutMediumFactoryEntry.getKey(), finisherEntry.getKey(), + cnf, optimize ); final IndexBuilder indexBuilder = IndexBuilder @@ -256,6 +266,20 @@ public abstract class BaseFilterTest ); } + private VectorCursor makeVectorCursor(final Filter filter) + { + return adapter.makeVectorCursor( + filter, + Intervals.ETERNITY, + // VirtualColumns do not support vectorization yet. Avoid passing them in, and any tests that need virtual + // columns should skip vectorization tests. + VirtualColumns.EMPTY, + false, + 3, // Vector size smaller than the number of rows, to ensure we use more than one. + null + ); + } + /** * Selects elements from "selectColumn" from rows matching a filter. selectColumn must be a single valued dimension. */ @@ -291,30 +315,66 @@ public abstract class BaseFilterTest private long selectCountUsingFilteredAggregator(final DimFilter filter) { - final Sequence cursors = makeCursorSequence(makeFilter(filter)); + final Sequence cursors = makeCursorSequence(null); Sequence aggSeq = Sequences.map( cursors, - new Function() - { - @Override - public Aggregator apply(Cursor input) - { - Aggregator agg = new FilteredAggregatorFactory( - new CountAggregatorFactory("count"), - maybeOptimize(filter) - ).factorize(input.getColumnSelectorFactory()); + cursor -> { + Aggregator agg = new FilteredAggregatorFactory( + new CountAggregatorFactory("count"), + maybeOptimize(filter) + ).factorize(cursor.getColumnSelectorFactory()); - for (; !input.isDone(); input.advance()) { - agg.aggregate(); - } - - return agg; + for (; !cursor.isDone(); cursor.advance()) { + agg.aggregate(); } + + return agg; } ); return aggSeq.toList().get(0).getLong(); } + private long selectCountUsingVectorizedFilteredAggregator(final DimFilter dimFilter) + { + Preconditions.checkState(makeFilter(dimFilter).canVectorizeMatcher(), "Cannot vectorize filter: %s", dimFilter); + + try (final VectorCursor cursor = makeVectorCursor(null)) { + final FilteredAggregatorFactory aggregatorFactory = new FilteredAggregatorFactory( + new CountAggregatorFactory("count"), + maybeOptimize(dimFilter) + ); + final VectorAggregator aggregator = aggregatorFactory.factorizeVector(cursor.getColumnSelectorFactory()); + final ByteBuffer buf = ByteBuffer.allocate(aggregatorFactory.getMaxIntermediateSizeWithNulls() * 2); + + // Use two slots: one for each form of aggregate. + aggregator.init(buf, 0); + aggregator.init(buf, aggregatorFactory.getMaxIntermediateSizeWithNulls()); + + for (; !cursor.isDone(); cursor.advance()) { + aggregator.aggregate(buf, 0, 0, cursor.getCurrentVectorSize()); + + final int[] positions = new int[cursor.getCurrentVectorSize()]; + Arrays.fill(positions, aggregatorFactory.getMaxIntermediateSizeWithNulls()); + + final int[] allRows = new int[cursor.getCurrentVectorSize()]; + for (int i = 0; i < allRows.length; i++) { + allRows[i] = i; + } + + aggregator.aggregate(buf, cursor.getCurrentVectorSize(), positions, allRows, 0); + } + + final long val1 = (long) aggregator.get(buf, 0); + final long val2 = (long) aggregator.get(buf, aggregatorFactory.getMaxIntermediateSizeWithNulls()); + + if (val1 != val2) { + throw new ISE("Oh no, val1[%d] != val2[%d]", val1, val2); + } + + return val1; + } + } + private List selectColumnValuesMatchingFilterUsingPostFiltering( final DimFilter filter, final String selectColumn @@ -382,6 +442,100 @@ public abstract class BaseFilterTest return seq.toList().get(0); } + private List selectColumnValuesMatchingFilterUsingVectorizedPostFiltering( + final DimFilter filter, + final String selectColumn + ) + { + final Filter theFilter = makeFilter(filter); + final Filter postFilteringFilter = new Filter() + { + @Override + public T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory bitmapResultFactory) + { + throw new UnsupportedOperationException(); + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return theFilter.makeMatcher(factory); + } + + @Override + public boolean supportsBitmapIndex(BitmapIndexSelector selector) + { + return false; + } + + @Override + public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) + { + return theFilter.makeVectorMatcher(factory); + } + + @Override + public boolean canVectorizeMatcher() + { + return theFilter.canVectorizeMatcher(); + } + + @Override + public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + { + return false; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + return 1.0; + } + }; + + try (final VectorCursor cursor = makeVectorCursor(postFilteringFilter)) { + final SingleValueDimensionVectorSelector selector = cursor + .getColumnSelectorFactory() + .makeSingleValueDimensionSelector(new DefaultDimensionSpec(selectColumn, selectColumn)); + + final List values = new ArrayList<>(); + + while (!cursor.isDone()) { + final int[] rowVector = selector.getRowVector(); + for (int i = 0; i < cursor.getCurrentVectorSize(); i++) { + values.add(selector.lookupName(rowVector[i])); + } + cursor.advance(); + } + + return values; + } + } + + private List selectColumnValuesMatchingFilterUsingVectorCursor( + final DimFilter filter, + final String selectColumn + ) + { + try (final VectorCursor cursor = makeVectorCursor(makeFilter(filter))) { + final SingleValueDimensionVectorSelector selector = cursor + .getColumnSelectorFactory() + .makeSingleValueDimensionSelector(new DefaultDimensionSpec(selectColumn, selectColumn)); + + final List values = new ArrayList<>(); + + while (!cursor.isDone()) { + final int[] rowVector = selector.getRowVector(); + for (int i = 0; i < cursor.getCurrentVectorSize(); i++) { + values.add(selector.lookupName(rowVector[i])); + } + cursor.advance(); + } + + return values; + } + } + private List selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory( final DimFilter filter, final String selectColumn @@ -412,22 +566,68 @@ public abstract class BaseFilterTest final DimFilter filter, final List expectedRows ) + { + // IncrementalIndex cannot ever vectorize. + final boolean testVectorized = !(adapter instanceof IncrementalIndexStorageAdapter); + assertFilterMatches(filter, expectedRows, testVectorized); + } + + protected void assertFilterMatchesSkipVectorize( + final DimFilter filter, + final List expectedRows + ) + { + assertFilterMatches(filter, expectedRows, false); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows, + final boolean testVectorized + ) { Assert.assertEquals( "Cursor: " + filter, expectedRows, selectColumnValuesMatchingFilter(filter, "dim0") ); + + if (testVectorized) { + Assert.assertEquals( + "Cursor (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorCursor(filter, "dim0") + ); + } + Assert.assertEquals( "Cursor with postFiltering: " + filter, expectedRows, selectColumnValuesMatchingFilterUsingPostFiltering(filter, "dim0") ); + + if (testVectorized) { + Assert.assertEquals( + "Cursor with postFiltering (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorizedPostFiltering(filter, "dim0") + ); + } + Assert.assertEquals( "Filtered aggregator: " + filter, expectedRows.size(), selectCountUsingFilteredAggregator(filter) ); + + if (testVectorized) { + Assert.assertEquals( + "Filtered aggregator (vectorized): " + filter, + expectedRows.size(), + selectCountUsingVectorizedFilteredAggregator(filter) + ); + } + Assert.assertEquals( "RowBasedColumnSelectorFactory: " + filter, expectedRows, diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java index 6e0a3cf050c..0da79d0fd9f 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java @@ -436,12 +436,12 @@ public class BoundFilterTest extends BaseFilterTest @Test public void testNumericMatchVirtualColumn() { - assertFilterMatches( + assertFilterMatchesSkipVectorize( new BoundDimFilter("expr", "1", "2", false, false, false, null, StringComparators.NUMERIC), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7") ); - assertFilterMatches( + assertFilterMatchesSkipVectorize( new BoundDimFilter("expr", "2", "3", false, false, false, null, StringComparators.NUMERIC), ImmutableList.of() ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java index a57d36f731e..676fa4d4743 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java @@ -45,6 +45,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import java.io.Closeable; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -97,19 +98,19 @@ public class ColumnComparisonFilterTest extends BaseFilterTest @Test public void testColumnsWithoutNulls() { - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim0"), DefaultDimensionSpec.of("dim1") )), ImmutableList.of("2", "5", "8")); - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim0"), DefaultDimensionSpec.of("dim2") )), ImmutableList.of("3", "4", "5")); - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim1"), DefaultDimensionSpec.of("dim2") )), ImmutableList.of("5", "9")); - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim0"), DefaultDimensionSpec.of("dim1"), DefaultDimensionSpec.of("dim2") @@ -119,35 +120,56 @@ public class ColumnComparisonFilterTest extends BaseFilterTest @Test public void testMissingColumnNotSpecifiedInDimensionList() { - assertFilterMatches( - new ColumnComparisonDimFilter( - ImmutableList.of(DefaultDimensionSpec.of("dim6"), DefaultDimensionSpec.of("dim7")) - ), - ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") - ); + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim6"), + DefaultDimensionSpec.of("dim7") + )), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + if (NullHandling.replaceWithDefault()) { - assertFilterMatches( + // "" is equivalent to null which is equivalent to a missing dimension + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim1"), + DefaultDimensionSpec.of("dim6") + )), ImmutableList.of("0")); + + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim2"), + DefaultDimensionSpec.of("dim6") + )), ImmutableList.of("1", "2", "6", "7", "8")); + + assertFilterMatchesSkipVectorize( new ColumnComparisonDimFilter( ImmutableList.of(DefaultDimensionSpec.of("dim1"), DefaultDimensionSpec.of("dim6")) ), ImmutableList.of("0") ); - assertFilterMatches( + assertFilterMatchesSkipVectorize( new ColumnComparisonDimFilter( ImmutableList.of(DefaultDimensionSpec.of("dim2"), DefaultDimensionSpec.of("dim6")) ), ImmutableList.of("1", "2", "6", "7", "8") ); } else { - assertFilterMatches( + // "" is not equivalent to a missing dimension + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim1"), + DefaultDimensionSpec.of("dim6") + )), Collections.emptyList()); + + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( + DefaultDimensionSpec.of("dim2"), + DefaultDimensionSpec.of("dim6") + )), ImmutableList.of("1", "6", "7", "8")); + + assertFilterMatchesSkipVectorize( new ColumnComparisonDimFilter( ImmutableList.of(DefaultDimensionSpec.of("dim1"), DefaultDimensionSpec.of("dim6")) ), ImmutableList.of() ); - assertFilterMatches( + assertFilterMatchesSkipVectorize( new ColumnComparisonDimFilter( ImmutableList.of(DefaultDimensionSpec.of("dim2"), DefaultDimensionSpec.of("dim6")) ), @@ -165,7 +187,7 @@ public class ColumnComparisonFilterTest extends BaseFilterTest LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, true, null, false, true); - assertFilterMatches(new ColumnComparisonDimFilter(ImmutableList.of( + assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( new ExtractionDimensionSpec("dim0", "dim0", lookupFn), new ExtractionDimensionSpec("dim1", "dim1", lookupFn) )), ImmutableList.of("2", "5", "7", "8")); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java index 91b89652392..578e4d2af0c 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java @@ -117,128 +117,141 @@ public class ExpressionFilterTest extends BaseFilterTest @Test public void testOneSingleValuedStringColumn() { - assertFilterMatches(edf("dim3 == ''"), ImmutableList.of("0")); - assertFilterMatches(edf("dim3 == '1'"), ImmutableList.of("3", "4", "6")); - assertFilterMatches(edf("dim3 == 'a'"), ImmutableList.of("7")); - assertFilterMatches(edf("dim3 == 1"), ImmutableList.of("3", "4", "6")); - assertFilterMatches(edf("dim3 == 1.0"), ImmutableList.of("3", "4", "6")); - assertFilterMatches(edf("dim3 == 1.234"), ImmutableList.of("9")); - assertFilterMatches(edf("dim3 < '2'"), ImmutableList.of("0", "1", "3", "4", "6", "9")); + assertFilterMatchesSkipVectorize(edf("dim3 == ''"), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(edf("dim3 == '1'"), ImmutableList.of("3", "4", "6")); + assertFilterMatchesSkipVectorize(edf("dim3 == 'a'"), ImmutableList.of("7")); + assertFilterMatchesSkipVectorize(edf("dim3 == 1"), ImmutableList.of("3", "4", "6")); + assertFilterMatchesSkipVectorize(edf("dim3 == 1.0"), ImmutableList.of("3", "4", "6")); + assertFilterMatchesSkipVectorize(edf("dim3 == 1.234"), ImmutableList.of("9")); + assertFilterMatchesSkipVectorize(edf("dim3 < '2'"), ImmutableList.of("0", "1", "3", "4", "6", "9")); if (NullHandling.replaceWithDefault()) { - assertFilterMatches(edf("dim3 < 2"), ImmutableList.of("0", "3", "4", "6", "7", "9")); - assertFilterMatches(edf("dim3 < 2.0"), ImmutableList.of("0", "3", "4", "6", "7", "9")); + assertFilterMatchesSkipVectorize(edf("dim3 < 2"), ImmutableList.of("0", "3", "4", "6", "7", "9")); + assertFilterMatchesSkipVectorize(edf("dim3 < 2.0"), ImmutableList.of("0", "3", "4", "6", "7", "9")); } else { // Empty String and "a" will not match - assertFilterMatches(edf("dim3 < 2"), ImmutableList.of("3", "4", "6", "9")); - assertFilterMatches(edf("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9")); + assertFilterMatchesSkipVectorize(edf("dim3 < 2"), ImmutableList.of("3", "4", "6", "9")); + assertFilterMatchesSkipVectorize(edf("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9")); } - assertFilterMatches(edf("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9")); + assertFilterMatchesSkipVectorize(edf("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9")); } @Test public void testOneMultiValuedStringColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(edf("dim4 == ''"), ImmutableList.of("1", "2", "6", "7", "8")); + assertFilterMatchesSkipVectorize(edf("dim4 == ''"), ImmutableList.of("1", "2", "6", "7", "8")); } else { - assertFilterMatches(edf("dim4 == ''"), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(edf("dim4 == ''"), ImmutableList.of("2")); // AS per SQL standard null == null returns false. - assertFilterMatches(edf("dim4 == null"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("dim4 == null"), ImmutableList.of()); } - assertFilterMatches(edf("dim4 == '1'"), ImmutableList.of("0")); - assertFilterMatches(edf("dim4 == '3'"), ImmutableList.of("3")); + assertFilterMatchesSkipVectorize(edf("dim4 == '1'"), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(edf("dim4 == '3'"), ImmutableList.of("3")); } @Test public void testOneLongColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(edf("dim1 == ''"), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(edf("dim1 == ''"), ImmutableList.of("0")); } else { // A long does not match empty string - assertFilterMatches(edf("dim1 == ''"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("dim1 == ''"), ImmutableList.of()); } - assertFilterMatches(edf("dim1 == '1'"), ImmutableList.of("1")); - assertFilterMatches(edf("dim1 == 2"), ImmutableList.of("2")); - assertFilterMatches(edf("dim1 < '2'"), ImmutableList.of("0", "1")); - assertFilterMatches(edf("dim1 < 2"), ImmutableList.of("0", "1")); - assertFilterMatches(edf("dim1 < 2.0"), ImmutableList.of("0", "1")); - assertFilterMatches(edf("like(dim1, '1%')"), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(edf("dim1 == '1'"), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(edf("dim1 == 2"), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(edf("dim1 < '2'"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(edf("dim1 < 2"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(edf("dim1 < 2.0"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(edf("like(dim1, '1%')"), ImmutableList.of("1")); } @Test public void testOneFloatColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(edf("dim2 == ''"), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(edf("dim2 == ''"), ImmutableList.of("0")); } else { // A float does not match empty string - assertFilterMatches(edf("dim2 == ''"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("dim2 == ''"), ImmutableList.of()); } - assertFilterMatches(edf("dim2 == '1'"), ImmutableList.of("1")); - assertFilterMatches(edf("dim2 == 2"), ImmutableList.of("2")); - assertFilterMatches(edf("dim2 < '2'"), ImmutableList.of("0", "1")); - assertFilterMatches(edf("dim2 < 2"), ImmutableList.of("0", "1")); - assertFilterMatches(edf("dim2 < 2.0"), ImmutableList.of("0", "1")); - assertFilterMatches(edf("like(dim2, '1%')"), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(edf("dim2 == '1'"), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(edf("dim2 == 2"), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(edf("dim2 < '2'"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(edf("dim2 < 2"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(edf("dim2 < 2.0"), ImmutableList.of("0", "1")); + assertFilterMatchesSkipVectorize(edf("like(dim2, '1%')"), ImmutableList.of("1")); } @Test public void testConstantExpression() { - assertFilterMatches(edf("1 + 1"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - assertFilterMatches(edf("0 + 0"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("1 + 1"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + assertFilterMatchesSkipVectorize(edf("0 + 0"), ImmutableList.of()); } @Test public void testCompareColumns() { // String vs string - assertFilterMatches(edf("dim0 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatchesSkipVectorize(edf("dim0 == dim3"), ImmutableList.of("2", "5", "8")); if (NullHandling.replaceWithDefault()) { // String vs long - assertFilterMatches(edf("dim1 == dim3"), ImmutableList.of("0", "2", "5", "8")); + assertFilterMatchesSkipVectorize(edf("dim1 == dim3"), ImmutableList.of("0", "2", "5", "8")); // String vs float - assertFilterMatches(edf("dim2 == dim3"), ImmutableList.of("0", "2", "5", "8")); + assertFilterMatchesSkipVectorize(edf("dim2 == dim3"), ImmutableList.of("0", "2", "5", "8")); } else { // String vs long - assertFilterMatches(edf("dim1 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatchesSkipVectorize(edf("dim1 == dim3"), ImmutableList.of("2", "5", "8")); // String vs float - assertFilterMatches(edf("dim2 == dim3"), ImmutableList.of("2", "5", "8")); + assertFilterMatchesSkipVectorize(edf("dim2 == dim3"), ImmutableList.of("2", "5", "8")); } - assertFilterMatches(edf("dim0 == dim4"), ImmutableList.of("3", "4", "5")); + // String vs. multi-value string + assertFilterMatchesSkipVectorize(edf("dim0 == dim4"), ImmutableList.of("3", "4", "5")); } @Test public void testMissingColumn() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(edf("missing == ''"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + assertFilterMatchesSkipVectorize( + edf("missing == ''"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); } else { // AS per SQL standard null == null returns false. - assertFilterMatches(edf("missing == null"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing == null"), ImmutableList.of()); } - assertFilterMatches(edf("missing == '1'"), ImmutableList.of()); - assertFilterMatches(edf("missing == 2"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing == '1'"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing == 2"), ImmutableList.of()); if (NullHandling.replaceWithDefault()) { // missing equivaluent to 0 - assertFilterMatches(edf("missing < '2'"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - assertFilterMatches(edf("missing < 2"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - assertFilterMatches(edf("missing < 2.0"), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + assertFilterMatchesSkipVectorize( + edf("missing < '2'"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); + assertFilterMatchesSkipVectorize( + edf("missing < 2"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); + assertFilterMatchesSkipVectorize( + edf("missing < 2.0"), + ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + ); } else { // missing equivalent to null - assertFilterMatches(edf("missing < '2'"), ImmutableList.of()); - assertFilterMatches(edf("missing < 2"), ImmutableList.of()); - assertFilterMatches(edf("missing < 2.0"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing < '2'"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing < 2"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing < 2.0"), ImmutableList.of()); } - assertFilterMatches(edf("missing > '2'"), ImmutableList.of()); - assertFilterMatches(edf("missing > 2"), ImmutableList.of()); - assertFilterMatches(edf("missing > 2.0"), ImmutableList.of()); - assertFilterMatches(edf("like(missing, '1%')"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing > '2'"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing > 2"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("missing > 2.0"), ImmutableList.of()); + assertFilterMatchesSkipVectorize(edf("like(missing, '1%')"), ImmutableList.of()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FloatAndDoubleFilteringTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FloatAndDoubleFilteringTest.java index 797ffe68c31..1ef93152f36 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/FloatAndDoubleFilteringTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/FloatAndDoubleFilteringTest.java @@ -211,13 +211,13 @@ public class FloatAndDoubleFilteringTest extends BaseFilterTest String jsFn = "function(x) { return(x === 3 || x === 5) }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(columnName, jsFn, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5") ); String jsFn2 = "function(x) { return(x === 3.0 || x === 5.0) }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(columnName, jsFn2, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5") ); @@ -338,7 +338,7 @@ public class FloatAndDoubleFilteringTest extends BaseFilterTest ); String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(columnName, jsFn, exfn, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "4") ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java index f06da4f4494..2786edbb324 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/JavaScriptFilterTest.java @@ -101,27 +101,27 @@ public class JavaScriptFilterTest extends BaseFilterTest @Test public void testSingleValueStringColumnWithoutNulls() { - assertFilterMatches(newJavaScriptDimFilter("dim0", jsNullFilter, null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter(""), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter("0"), null), ImmutableList.of("0")); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter("1"), null), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim0", jsNullFilter, null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim0", jsValueFilter(""), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim0", jsValueFilter("0"), null), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim0", jsValueFilter("1"), null), ImmutableList.of("1")); } @Test public void testSingleValueStringColumnWithNulls() { if (NullHandling.replaceWithDefault()) { - assertFilterMatches(newJavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of("0")); } else { - assertFilterMatches(newJavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter(""), null), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter(""), null), ImmutableList.of("0")); } - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("10"), null), ImmutableList.of("1")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("2"), null), ImmutableList.of("2")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("1"), null), ImmutableList.of("3")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("def"), null), ImmutableList.of("4")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("abc"), null), ImmutableList.of("5")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("ab"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("10"), null), ImmutableList.of("1")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("2"), null), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("1"), null), ImmutableList.of("3")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("def"), null), ImmutableList.of("4")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("abc"), null), ImmutableList.of("5")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim1", jsValueFilter("ab"), null), ImmutableList.of()); } @Test @@ -129,33 +129,45 @@ public class JavaScriptFilterTest extends BaseFilterTest { // multi-val null...... if (NullHandling.replaceWithDefault()) { - assertFilterMatches(newJavaScriptDimFilter("dim2", jsNullFilter, null), ImmutableList.of("1", "2", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsNullFilter, null), + ImmutableList.of("1", "2", "5") + ); } else { - assertFilterMatches(newJavaScriptDimFilter("dim2", jsNullFilter, null), ImmutableList.of("1", "5")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter(""), null), ImmutableList.of("2")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsNullFilter, null), ImmutableList.of("1", "5")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsValueFilter(""), null), ImmutableList.of("2")); } - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("a"), null), ImmutableList.of("0", "3")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("b"), null), ImmutableList.of("0")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("c"), null), ImmutableList.of("4")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("d"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsValueFilter("a"), null), + ImmutableList.of("0", "3") + ); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsValueFilter("b"), null), ImmutableList.of("0")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsValueFilter("c"), null), ImmutableList.of("4")); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim2", jsValueFilter("d"), null), ImmutableList.of()); } @Test public void testMissingColumnSpecifiedInDimensionList() { - assertFilterMatches(newJavaScriptDimFilter("dim3", jsNullFilter, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("a"), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("b"), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("c"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim3", jsNullFilter, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim3", jsValueFilter("a"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim3", jsValueFilter("b"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim3", jsValueFilter("c"), null), ImmutableList.of()); } @Test public void testMissingColumnNotSpecifiedInDimensionList() { - assertFilterMatches(newJavaScriptDimFilter("dim4", jsNullFilter, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("a"), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("b"), null), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("c"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim4", jsNullFilter, null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim4", jsValueFilter("a"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim4", jsValueFilter("b"), null), ImmutableList.of()); + assertFilterMatchesSkipVectorize(newJavaScriptDimFilter("dim4", jsValueFilter("c"), null), ImmutableList.of()); } @Test @@ -170,20 +182,50 @@ public class JavaScriptFilterTest extends BaseFilterTest LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("1")); - assertFilterMatches(newJavaScriptDimFilter("dim0", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "2", "3", "4", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim0", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of("1") + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim0", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "2", "3", "4", "5") + ); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("3", "4")); - assertFilterMatches(newJavaScriptDimFilter("dim1", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim1", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of("3", "4") + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim1", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "5") + ); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("0", "3")); - assertFilterMatches(newJavaScriptDimFilter("dim2", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "4", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of("0", "3") + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim2", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "4", "5") + ); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("HELLO"), lookupFn), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim3", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim3", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim3", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("HELLO"), lookupFn), ImmutableList.of()); - assertFilterMatches(newJavaScriptDimFilter("dim4", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim4", jsValueFilter("HELLO"), lookupFn), + ImmutableList.of() + ); + assertFilterMatchesSkipVectorize( + newJavaScriptDimFilter("dim4", jsValueFilter("UNKNOWN"), lookupFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); } private JavaScriptDimFilter newJavaScriptDimFilter( diff --git a/processing/src/test/java/org/apache/druid/segment/filter/LongFilteringTest.java b/processing/src/test/java/org/apache/druid/segment/filter/LongFilteringTest.java index 94c5fbde17c..1cee93ba72e 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/LongFilteringTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/LongFilteringTest.java @@ -255,7 +255,7 @@ public class LongFilteringTest extends BaseFilterTest ); String jsFn = "function(x) { return(x === 3 || x === 5) }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(LONG_COLUMN, jsFn, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5") ); @@ -363,7 +363,7 @@ public class LongFilteringTest extends BaseFilterTest ); String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(LONG_COLUMN, jsFn, exfn, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "4") ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java index 55f8c09b9c9..a36aeb910a9 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/SelectorFilterTest.java @@ -69,9 +69,20 @@ public class SelectorFilterTest extends BaseFilterTest ); private static final List ROWS = ImmutableList.of( - PARSER.parseBatch(ImmutableMap.of("dim0", "0", "dim1", "", "dim2", ImmutableList.of("a", "b"), "dim6", "2017-07-25")).get(0), - PARSER.parseBatch(ImmutableMap.of("dim0", "1", "dim1", "10", "dim2", ImmutableList.of(), "dim6", "2017-07-25")).get(0), - PARSER.parseBatch(ImmutableMap.of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""), "dim6", "2017-05-25")).get(0), + PARSER.parseBatch(ImmutableMap.of( + "dim0", + "0", + "dim1", + "", + "dim2", + ImmutableList.of("a", "b"), + "dim6", + "2017-07-25" + )).get(0), + PARSER.parseBatch(ImmutableMap.of("dim0", "1", "dim1", "10", "dim2", ImmutableList.of(), "dim6", "2017-07-25")) + .get(0), + PARSER.parseBatch(ImmutableMap.of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""), "dim6", "2017-05-25")) + .get(0), PARSER.parseBatch(ImmutableMap.of("dim0", "3", "dim1", "1", "dim2", ImmutableList.of("a"))).get(0), PARSER.parseBatch(ImmutableMap.of("dim0", "4", "dim1", "def", "dim2", ImmutableList.of("c"))).get(0), PARSER.parseBatch(ImmutableMap.of("dim0", "5", "dim1", "abc")).get(0) @@ -107,10 +118,24 @@ public class SelectorFilterTest extends BaseFilterTest @Test public void testWithTimeExtractionFnNull() { - assertFilterMatches(new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("dim6", "2017-07", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("0", "1")); - assertFilterMatches(new SelectorDimFilter("dim6", "2017-05", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("2")); + assertFilterMatches( + new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), + ImmutableList.of() + ); + assertFilterMatches( + new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), + ImmutableList.of("3", "4", "5") + ); + assertFilterMatches(new SelectorDimFilter( + "dim6", + "2017-07", + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + ), ImmutableList.of("0", "1")); + assertFilterMatches(new SelectorDimFilter( + "dim6", + "2017-05", + new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true) + ), ImmutableList.of("2")); } @Test @@ -187,8 +212,11 @@ public class SelectorFilterTest extends BaseFilterTest @Test public void testExpressionVirtualColumn() { - assertFilterMatches(new SelectorDimFilter("expr", "1.1", null), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("expr", "1.2", null), ImmutableList.of()); + assertFilterMatchesSkipVectorize( + new SelectorDimFilter("expr", "1.1", null), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatchesSkipVectorize(new SelectorDimFilter("expr", "1.2", null), ImmutableList.of()); } @Test @@ -213,10 +241,16 @@ public class SelectorFilterTest extends BaseFilterTest assertFilterMatches(new SelectorDimFilter("dim2", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "4", "5")); assertFilterMatches(new SelectorDimFilter("dim3", "HELLO", lookupFn), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim3", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches( + new SelectorDimFilter("dim3", "UNKNOWN", lookupFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); assertFilterMatches(new SelectorDimFilter("dim4", "HELLO", lookupFn), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim4", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches( + new SelectorDimFilter("dim4", "UNKNOWN", lookupFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); final Map stringMap2 = ImmutableMap.of( "2", "5" @@ -299,7 +333,10 @@ public class SelectorFilterTest extends BaseFilterTest // tests that ExtractionDimFilter (identical to SelectorDimFilter now) optimize() with lookup works // remove these when ExtractionDimFilter is removed. - assertFilterMatches(new ExtractionDimFilter("dim1", "UNKNOWN", lookupFn, null), ImmutableList.of("0", "1", "2", "5")); + assertFilterMatches( + new ExtractionDimFilter("dim1", "UNKNOWN", lookupFn, null), + ImmutableList.of("0", "1", "2", "5") + ); assertFilterMatches(new ExtractionDimFilter("dim0", "5", lookupFn2, null), ImmutableList.of("2", "5")); if (NullHandling.replaceWithDefault()) { assertFilterMatches( diff --git a/processing/src/test/java/org/apache/druid/segment/filter/TimeFilteringTest.java b/processing/src/test/java/org/apache/druid/segment/filter/TimeFilteringTest.java index 1b192f1db7c..0cb15ccda73 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/TimeFilteringTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/TimeFilteringTest.java @@ -142,7 +142,7 @@ public class TimeFilteringTest extends BaseFilterTest ); String jsFn = "function(x) { return(x === 3 || x === 5) }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(ColumnHolder.TIME_COLUMN_NAME, jsFn, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5") ); @@ -206,7 +206,7 @@ public class TimeFilteringTest extends BaseFilterTest ); String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }"; - assertFilterMatches( + assertFilterMatchesSkipVectorize( new JavaScriptDimFilter(ColumnHolder.TIME_COLUMN_NAME, jsFn, exfn, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("2", "3") ); @@ -337,7 +337,7 @@ public class TimeFilteringTest extends BaseFilterTest // increment timestamp by 2 hours String timeBoosterJsFn = "function(x) { return(Number(x) + 7200000) }"; ExtractionFn exFn = new JavaScriptExtractionFn(timeBoosterJsFn, true, JavaScriptConfig.getEnabledInstance()); - assertFilterMatches( + assertFilterMatchesSkipVectorize( new IntervalDimFilter( "dim0", Collections.singletonList(Intervals.of("1970-01-01T02:00:00.001Z/1970-01-01T02:00:00.005Z")), diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java index 3363a570110..2b42c0350e7 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.BaseLongColumnValueSelector; import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelectorUtils; import org.apache.druid.segment.IdLookup; @@ -319,7 +320,7 @@ public class VirtualColumnsTest @Override public int getValueCardinality() { - return DimensionSelector.CARDINALITY_UNKNOWN; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index 8c4482286e5..8bffa486210 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -35,6 +35,7 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.Druids; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.post.ExpressionPostAggregator; @@ -89,6 +90,7 @@ import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -222,6 +224,10 @@ public class BaseCalciteQueryTest extends CalciteTestBase @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + public boolean cannotVectorize = false; + public boolean skipVectorize = false; + public SpecificSegmentsQuerySegmentWalker walker = null; public QueryLogHook queryLogHook; @@ -493,6 +499,19 @@ public class BaseCalciteQueryTest extends CalciteTestBase testQuery(plannerConfig, QUERY_CONTEXT_DEFAULT, sql, authenticationResult, expectedQueries, expectedResults); } + private Query recursivelyOverrideContext(final Query q, final Map context) + { + final Query q2; + if (q.getDataSource() instanceof QueryDataSource) { + final Query subQuery = ((QueryDataSource) q.getDataSource()).getQuery(); + q2 = q.withDataSource(new QueryDataSource(recursivelyOverrideContext(subQuery, context))); + } else { + q2 = q; + } + + return q2.withOverriddenContext(context); + } + public void testQuery( final PlannerConfig plannerConfig, final Map queryContext, @@ -503,9 +522,38 @@ public class BaseCalciteQueryTest extends CalciteTestBase ) throws Exception { log.info("SQL: %s", sql); - queryLogHook.clearRecordedQueries(); - final List plannerResults = getResults(plannerConfig, queryContext, sql, authenticationResult); - verifyResults(sql, expectedQueries, expectedResults, plannerResults); + + final List vectorizeValues = new ArrayList<>(); + + vectorizeValues.add("false"); + + if (!skipVectorize) { + vectorizeValues.add("force"); + } + + for (final String vectorize : vectorizeValues) { + queryLogHook.clearRecordedQueries(); + + final Map theQueryContext = new HashMap<>(queryContext); + theQueryContext.put("vectorize", vectorize); + + if (!"false".equals(vectorize)) { + theQueryContext.put("vectorSize", 2); // Small vector size to ensure we use more than one. + } + + final List theQueries = new ArrayList<>(); + for (Query query : expectedQueries) { + theQueries.add(recursivelyOverrideContext(query, theQueryContext)); + } + + if (cannotVectorize && "force".equals(vectorize)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("Cannot vectorize"); + } + + final List plannerResults = getResults(plannerConfig, theQueryContext, sql, authenticationResult); + verifyResults(sql, theQueries, expectedResults, plannerResults); + } } public List getResults( @@ -608,4 +656,14 @@ public class BaseCalciteQueryTest extends CalciteTestBase } } } + + protected void cannotVectorize() + { + cannotVectorize = true; + } + + protected void skipVectorize() + { + skipVectorize = true; + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 7ec81620afd..009c8ec103b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -557,12 +557,15 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExplainSelectStar() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + testQuery( "EXPLAIN PLAN FOR SELECT * FROM druid.foo", ImmutableList.of(), ImmutableList.of( new Object[]{ - "DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"order\":\"none\",\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" + "DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"limit\":9223372036854775807,\"order\":\"none\",\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" } ) ); @@ -1173,6 +1176,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testHavingOnApproximateCountDistinct() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT dim2, COUNT(DISTINCT m1) FROM druid.foo GROUP BY dim2 HAVING COUNT(DISTINCT m1) > 1", ImmutableList.of( @@ -1321,6 +1327,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testColumnComparison() throws Exception { + // Cannot vectorize due to expression filter. + cannotVectorize(); + testQuery( "SELECT dim1, m1, COUNT(*) FROM druid.foo WHERE m1 - 1 = dim1 GROUP BY dim1, m1", ImmutableList.of( @@ -1686,6 +1695,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByCaseWhen() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " CASE EXTRACT(DAY FROM __time)\n" @@ -1736,6 +1748,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByCaseWhenOfTripleAnd() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " CASE WHEN m1 > 1 AND m1 < 5 AND cnt = 1 THEN 'x' ELSE NULL END," @@ -1856,6 +1871,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest // Doesn't conform to the SQL standard, but it's how we do it. // This example is used in the sql.md doc. + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT COALESCE(dim2, dim1), COUNT(*) FROM druid.foo GROUP BY COALESCE(dim2, dim1)\n", ImmutableList.of( @@ -2033,6 +2051,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest // This query should actually return [0, null] rather than an empty result set, but it doesn't. // This test just "documents" the current behavior. + // Cannot vectorize due to "longMax" aggregator. + cannotVectorize(); + testQuery( "SELECT COUNT(*), MAX(cnt) FROM druid.foo WHERE dim1 = 'foobar'", ImmutableList.of( @@ -2055,6 +2076,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByWithFilterMatchingNothingWithGroupByLiteral() throws Exception { + // Cannot vectorize due to "longMax" aggregator. + cannotVectorize(); + testQuery( "SELECT COUNT(*), MAX(cnt) FROM druid.foo WHERE dim1 = 'foobar' GROUP BY 'dummy'", ImmutableList.of( @@ -2126,6 +2150,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testCountNullableExpression() throws Exception { + // Cannot vectorize due to expression filter. + cannotVectorize(); + testQuery( "SELECT COUNT(CASE WHEN dim2 = 'abc' THEN 'yes' WHEN dim2 = 'def' THEN 'yes' END) FROM druid.foo", ImmutableList.of( @@ -2229,6 +2256,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExplainCountStarOnView() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + final String explanation = "DruidQueryRel(query=[{" + "\"queryType\":\"timeseries\"," @@ -2241,7 +2271,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest + "\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}]," + "\"postAggregations\":[]," + "\"limit\":2147483647," - + "\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"}}]" + + "\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"}}]" + ", signature=[{a0:LONG}])\n"; testQuery( @@ -2405,6 +2435,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testFilterOnStringAsNumber() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT distinct dim1 FROM druid.foo WHERE " + "dim1 = 10 OR " @@ -2443,6 +2476,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testSimpleAggregations() throws Exception { + // Cannot vectorize due to "longMax" aggregator. + cannotVectorize(); + testQuery( "SELECT COUNT(*), COUNT(cnt), COUNT(dim1), AVG(cnt), SUM(cnt), SUM(cnt) + MIN(cnt) + MAX(cnt), COUNT(dim2) FROM druid.foo", ImmutableList.of( @@ -2532,6 +2568,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest { // Use PlannerConfig to disable topN, so this query becomes a groupBy. + // Cannot vectorize due to "floatMin", "floatMax" aggregators. + cannotVectorize(); + testQuery( PLANNER_CONFIG_NO_TOPN, "SELECT dim1, MIN(m1) + MAX(m1) AS x FROM druid.foo GROUP BY dim1 ORDER BY x LIMIT 3", @@ -2575,6 +2614,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest { // Use context to disable topN, so this query becomes a groupBy. + // Cannot vectorize due to "floatMin", "floatMax" aggregators. + cannotVectorize(); + testQuery( PLANNER_CONFIG_DEFAULT, QUERY_CONTEXT_NO_TOPN, @@ -2621,6 +2663,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testFilteredAggregations() throws Exception { + // Cannot vectorize due to "cardinality", "longMax" aggregators. + cannotVectorize(); + testQuery( "SELECT " + "SUM(case dim1 when 'abc' then cnt end), " @@ -2792,6 +2837,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExpressionAggregations() throws Exception { + // Cannot vectorize due to "doubleMax" aggregator. + cannotVectorize(); + final ExprMacroTable macroTable = CalciteTests.createExprMacroTable(); testQuery( @@ -2830,6 +2878,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExpressionFilteringAndGrouping() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " FLOOR(m1 / 2) * 2,\n" @@ -2876,6 +2927,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExpressionFilteringAndGroupingUsingCastToLong() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " CAST(m1 AS BIGINT) / 2 * 2,\n" @@ -2924,6 +2978,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExpressionFilteringAndGroupingOnStringCastToNumber() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " FLOOR(CAST(dim1 AS FLOAT) / 2) * 2,\n" @@ -3612,6 +3669,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testSumOfString() throws Exception { + // Cannot vectorize due to expressions in aggregators. + cannotVectorize(); + testQuery( "SELECT SUM(CAST(dim1 AS INTEGER)) FROM druid.foo", ImmutableList.of( @@ -3639,6 +3699,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testSumOfExtractionFn() throws Exception { + // Cannot vectorize due to expressions in aggregators. + cannotVectorize(); + testQuery( "SELECT SUM(CAST(SUBSTRING(dim1, 1, 10) AS INTEGER)) FROM druid.foo", ImmutableList.of( @@ -3666,6 +3729,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testTimeseriesWithTimeFilterOnLongColumnUsingMillisToTimestamp() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " FLOOR(MILLIS_TO_TIMESTAMP(cnt) TO YEAR),\n" @@ -3745,6 +3811,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testSelectDistinctWithStrlenFilter() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT distinct dim1 FROM druid.foo " + "WHERE CHARACTER_LENGTH(dim1) = 3 OR CAST(CHARACTER_LENGTH(dim1) AS varchar) = 3", @@ -3915,6 +3984,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testCountDistinct() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT SUM(cnt), COUNT(distinct dim2), COUNT(distinct unique_dim1) FROM druid.foo", ImmutableList.of( @@ -3947,6 +4019,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testCountDistinctOfCaseWhen() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT\n" + "COUNT(DISTINCT CASE WHEN m1 >= 4 THEN m1 END),\n" @@ -4039,6 +4114,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest { // When HLL is disabled, APPROX_COUNT_DISTINCT is still approximate. + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( PLANNER_CONFIG_NO_HLL, "SELECT APPROX_COUNT_DISTINCT(dim2) FROM druid.foo", @@ -4125,6 +4203,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testApproxCountDistinct() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " SUM(cnt),\n" @@ -4202,6 +4283,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testNestedGroupBy() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " FLOOR(__time to hour) AS __time,\n" @@ -4320,10 +4404,13 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExplainDoubleNestedGroupBy() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + final String explanation = - "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"longSum\",\"name\":\"a0\",\"fieldName\":\"cnt\",\"expression\":null},{\"type\":\"count\",\"name\":\"a1\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"}}], signature=[{a0:LONG, a1:LONG}])\n" - + " DruidOuterQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[{\"type\":\"longSum\",\"name\":\"a0\",\"fieldName\":\"cnt\",\"expression\":null}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false}], signature=[{d0:STRING, a0:LONG}])\n" - + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\"},{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d1\",\"outputType\":\"STRING\"}],\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false}], signature=[{d0:STRING, d1:STRING, a0:LONG}])\n"; + "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"longSum\",\"name\":\"a0\",\"fieldName\":\"cnt\",\"expression\":null},{\"type\":\"count\",\"name\":\"a1\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"}}], signature=[{a0:LONG, a1:LONG}])\n" + + " DruidOuterQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[{\"type\":\"longSum\",\"name\":\"a0\",\"fieldName\":\"cnt\",\"expression\":null}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"},\"descending\":false}], signature=[{d0:STRING, a0:LONG}])\n" + + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\"},{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d1\",\"outputType\":\"STRING\"}],\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"},\"descending\":false}], signature=[{d0:STRING, d1:STRING, a0:LONG}])\n"; testQuery( "EXPLAIN PLAN FOR SELECT SUM(cnt), COUNT(*) FROM (\n" @@ -4390,6 +4477,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMinMaxAvgDailyCountWithLimit() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT * FROM (" + " SELECT max(cnt), min(cnt), avg(cnt), TIME_EXTRACT(max(t), 'EPOCH') last_time, count(1) num_days FROM (\n" @@ -4457,6 +4547,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testAvgDailyCountDistinct() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " AVG(u)\n" @@ -4716,6 +4809,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExactCountDistinctOfSemiJoinResult() throws Exception { + // Cannot vectorize due to extraction dimension spec. + cannotVectorize(); + testQuery( "SELECT COUNT(*)\n" + "FROM (\n" @@ -4790,10 +4886,13 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testExplainExactCountDistinctOfSemiJoinResult() throws Exception { + // Skip vectorization since otherwise the "context" will change for each subtest. + skipVectorize(); + final String explanation = - "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"}}], signature=[{a0:LONG}])\n" - + " DruidSemiJoin(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false}], leftExpressions=[[SUBSTRING($3, 1, 1)]], rightKeys=[[0]])\n" - + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null,\"extractionFn\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\"},\"descending\":false}], signature=[{d0:STRING}])\n"; + "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"skipEmptyBuckets\":true,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"}}], signature=[{a0:LONG}])\n" + + " DruidSemiJoin(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"},\"descending\":false}], leftExpressions=[[SUBSTRING($3, 1, 1)]], rightKeys=[[0]])\n" + + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null,\"extractionFn\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\"},\"descending\":false}], signature=[{d0:STRING}])\n"; testQuery( "EXPLAIN PLAN FOR SELECT COUNT(*)\n" @@ -5086,6 +5185,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testCountDistinctArithmetic() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT\n" + " SUM(cnt),\n" @@ -5130,6 +5232,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testCountDistinctOfSubstring() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + testQuery( "SELECT COUNT(DISTINCT SUBSTRING(dim1, 1, 1)) FROM druid.foo WHERE dim1 <> ''", ImmutableList.of( @@ -5169,6 +5274,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest { // Test a couple different syntax variants of TRIM. + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT COUNT(DISTINCT TRIM(BOTH ' ' FROM dim1)) FROM druid.foo WHERE TRIM(dim1) <> ''", ImmutableList.of( @@ -5203,6 +5311,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest { // Like FLOOR(__time TO QUARTER) but silly. + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT CAST((EXTRACT(MONTH FROM __time) - 1 ) / 3 + 1 AS INTEGER) AS quarter, COUNT(*)\n" + "FROM foo\n" @@ -5231,6 +5342,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testRegexpExtract() throws Exception { + // Cannot vectorize due to extractionFn in dimension spec. + cannotVectorize(); + String nullValue = NullHandling.replaceWithDefault() ? "" : null; testQuery( "SELECT DISTINCT\n" @@ -5610,6 +5724,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testFilterOnTimeExtract() throws Exception { + // Cannot vectorize due to expression filter. + cannotVectorize(); + testQuery( "SELECT COUNT(*) FROM druid.foo\n" + "WHERE EXTRACT(YEAR FROM __time) = 2000\n" @@ -5642,6 +5759,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testFilterOnTimeExtractWithMultipleDays() throws Exception { + // Cannot vectorize due to expression filters. + cannotVectorize(); + testQuery( "SELECT COUNT(*) FROM druid.foo\n" + "WHERE EXTRACT(YEAR FROM __time) = 2000\n" @@ -5701,6 +5821,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByFloor() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( PLANNER_CONFIG_NO_SUBQUERIES, // Sanity check; this simple query should work with subqueries disabled. "SELECT floor(CAST(dim1 AS float)), COUNT(*) FROM druid.foo GROUP BY floor(CAST(dim1 AS float))", @@ -5730,6 +5853,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByFloorWithOrderBy() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT floor(CAST(dim1 AS float)) AS fl, COUNT(*) FROM druid.foo GROUP BY floor(CAST(dim1 AS float)) ORDER BY fl DESC", ImmutableList.of( @@ -5781,6 +5907,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByFloorTimeAndOneOtherDimensionWithOrderBy() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT floor(__time TO year), dim2, COUNT(*)" + " FROM druid.foo" @@ -5856,6 +5985,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByStringLength() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT CHARACTER_LENGTH(dim1), COUNT(*) FROM druid.foo GROUP BY CHARACTER_LENGTH(dim1)", ImmutableList.of( @@ -5881,6 +6013,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testFilterAndGroupByLookup() throws Exception { + // Cannot vectorize due to extraction dimension specs. + cannotVectorize(); + String nullValue = NullHandling.replaceWithDefault() ? "" : null; final RegisteredLookupExtractionFn extractionFn = new RegisteredLookupExtractionFn( null, @@ -5935,6 +6070,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testCountDistinctOfLookup() throws Exception { + // Cannot vectorize due to "cardinality" aggregator. + cannotVectorize(); + final RegisteredLookupExtractionFn extractionFn = new RegisteredLookupExtractionFn( null, "lookyloo", @@ -6142,6 +6280,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testTimeseriesUsingTimeFloorWithTimeShift() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT SUM(cnt), gran FROM (\n" + " SELECT TIME_FLOOR(TIME_SHIFT(__time, 'P1D', -1), 'P1M') AS gran,\n" @@ -6190,6 +6331,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testTimeseriesUsingTimeFloorWithTimestampAdd() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT SUM(cnt), gran FROM (\n" + " SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'P1M') AS gran,\n" @@ -6440,6 +6584,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testTimeseriesDescending() throws Exception { + // Cannot vectorize due to descending order. + cannotVectorize(); + testQuery( "SELECT gran, SUM(cnt) FROM (\n" + " SELECT floor(__time TO month) AS gran,\n" @@ -6467,6 +6614,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByExtractYear() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " EXTRACT(YEAR FROM __time) AS \"year\",\n" @@ -6513,6 +6663,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByFormatYearAndMonth() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + " TIME_FORMAt(__time, 'yyyy MM') AS \"year\",\n" @@ -6559,6 +6712,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByExtractFloorTime() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT\n" + "EXTRACT(YEAR FROM FLOOR(__time TO YEAR)) AS \"year\", SUM(cnt)\n" @@ -6591,6 +6747,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByExtractFloorTimeLosAngeles() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( PLANNER_CONFIG_DEFAULT, QUERY_CONTEXT_LOS_ANGELES, @@ -6712,6 +6871,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testGroupByTimeAndOtherDimension() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT dim2, gran, SUM(cnt)\n" + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, dim2, cnt FROM druid.foo) AS x\n" @@ -7016,6 +7178,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testSemiJoinWithOuterTimeExtractAggregateWithOrderBy() throws Exception { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + testQuery( "SELECT COUNT(DISTINCT dim1), EXTRACT(MONTH FROM __time) FROM druid.foo\n" + " WHERE dim2 IN (\n" @@ -7087,6 +7252,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testUsingSubqueryWithExtractionFns() throws Exception { + // Cannot vectorize due to extraction dimension specs. + cannotVectorize(); + testQuery( "SELECT dim2, COUNT(*) FROM druid.foo " + "WHERE substring(dim2, 1, 1) IN (SELECT substring(dim1, 1, 1) FROM druid.foo WHERE dim1 <> '')" @@ -7353,6 +7521,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testPostAggWithTimeseries() throws Exception { + // Cannot vectorize due to descending order. + cannotVectorize(); + testQuery( "SELECT " + " FLOOR(__time TO YEAR), " @@ -7622,6 +7793,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest ) ); + // Cannot vectorize next test due to "cardinality" aggregator. + cannotVectorize(); + // semi-join requires time condition on both left and right query testQuery( PLANNER_CONFIG_REQUIRE_TIME_CONDITION, @@ -7956,6 +8130,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testNvlColumns() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT NVL(dim2, dim1), COUNT(*) FROM druid.foo GROUP BY NVL(dim2, dim1)\n", ImmutableList.of( @@ -7994,6 +8171,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringWorksLikeStringGroupBy() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + List expected; if (NullHandling.replaceWithDefault()) { expected = ImmutableList.of( @@ -8045,6 +8225,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringWorksLikeStringGroupByWithFilter() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT concat(dim3, 'foo'), SUM(cnt) FROM druid.numfoo where concat(dim3, 'foo') = 'bfoo' GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8325,6 +8508,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringLength() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT dim1, MV_LENGTH(dim3), SUM(cnt) FROM druid.numfoo GROUP BY 1, 2 ORDER BY 2 DESC", ImmutableList.of( @@ -8365,6 +8551,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringAppend() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8418,6 +8607,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringPrepend() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8471,6 +8663,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringPrependAppend() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8524,6 +8719,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringConcat() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8575,6 +8773,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringOffset() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT MV_OFFSET(dim3, 1), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8611,6 +8812,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringOrdinal() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT MV_ORDINAL(dim3, 2), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8647,6 +8851,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringOffsetOf() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT MV_OFFSET_OF(dim3, 'b'), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8683,6 +8890,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringOrdinalOf() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + testQuery( "SELECT MV_ORDINAL_OF(dim3, 'b'), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", ImmutableList.of( @@ -8719,6 +8929,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringToString() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of( @@ -8768,6 +8981,9 @@ public class CalciteQueryTest extends BaseCalciteQueryTest @Test public void testMultiValueStringToStringToMultiValueString() throws Exception { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + ImmutableList results; if (NullHandling.replaceWithDefault()) { results = ImmutableList.of(