From a42ccb6d19997bb67622e0444ca564e974ddf24c Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Wed, 20 Jul 2016 17:08:49 -0700 Subject: [PATCH] Support filtering on long columns (including __time) (#3180) * Support filtering on __time column * Rename DruidPredicate * Add docs for ValueMatcherFactory, add comment on getColumnCapabilities * Combine ValueMatcherFactory predicate methods to accept DruidCompositePredicate * Address PR comments (support filter on all long columns) * Use predicate factory instead of composite predicate * Address PR comments * Lazily initialize long handling in selector/in filter * Move long value parsing from InFilter to InDimFilter, make long value parsing thread-safe * Add multithreaded selector/in filter test * Fix non-final lock object in SelectorDimFilter --- .../DimensionPredicateFilterBenchmark.java | 33 +- .../benchmark/FilterPartitionBenchmark.java | 120 ++++++- .../FilteredAggregatorBenchmark.java | 316 +++++++++++++++++ .../IncrementalIndexReadBenchmark.java | 46 +++ docs/content/querying/filters.md | 37 ++ .../FilteredAggregatorFactory.java | 49 ++- .../query/filter/DruidLongPredicate.java | 28 ++ .../query/filter/DruidPredicateFactory.java | 29 ++ .../io/druid/query/filter/InDimFilter.java | 88 ++++- .../query/filter/JavaScriptDimFilter.java | 47 ++- .../druid/query/filter/SelectorDimFilter.java | 63 +++- .../query/filter/ValueMatcherFactory.java | 39 ++- .../GroupByMergingQueryRunnerV2.java | 10 + .../ColumnSelectorBitmapIndexSelector.java | 21 +- .../druid/segment/ColumnSelectorFactory.java | 2 + .../segment/QueryableIndexStorageAdapter.java | 70 +++- .../io/druid/segment/filter/BoundFilter.java | 59 ++-- .../filter/DimensionPredicateFilter.java | 46 ++- .../java/io/druid/segment/filter/Filters.java | 53 +++ .../io/druid/segment/filter/InFilter.java | 85 +++-- .../segment/filter/JavaScriptFilter.java | 13 +- .../io/druid/segment/filter/RegexFilter.java | 28 +- .../segment/filter/SearchQueryFilter.java | 28 +- .../druid/segment/filter/SpatialFilter.java | 40 ++- .../segment/incremental/IncrementalIndex.java | 15 + .../IncrementalIndexStorageAdapter.java | 61 +++- .../incremental/OnheapIncrementalIndex.java | 7 + .../aggregation/FilteredAggregatorTest.java | 21 ++ .../aggregation/JavaScriptAggregatorTest.java | 7 + .../TestColumnSelectorFactory.java | 7 + .../segment/filter/FilterPartitionTest.java | 38 ++- .../segment/filter/InvalidFilteringTest.java | 179 ++++++++++ .../segment/filter/LongFilteringTest.java | 320 ++++++++++++++++++ .../segment/filter/TimeFilteringTest.java | 240 +++++++++++++ 34 files changed, 2092 insertions(+), 153 deletions(-) create mode 100644 benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java create mode 100644 processing/src/main/java/io/druid/query/filter/DruidLongPredicate.java create mode 100644 processing/src/main/java/io/druid/query/filter/DruidPredicateFactory.java create mode 100644 processing/src/test/java/io/druid/segment/filter/InvalidFilteringTest.java create mode 100644 processing/src/test/java/io/druid/segment/filter/LongFilteringTest.java create mode 100644 processing/src/test/java/io/druid/segment/filter/TimeFilteringTest.java diff --git a/benchmarks/src/main/java/io/druid/benchmark/DimensionPredicateFilterBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/DimensionPredicateFilterBenchmark.java index cf21147fd65..424c003e369 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/DimensionPredicateFilterBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/DimensionPredicateFilterBenchmark.java @@ -29,6 +29,8 @@ import com.metamx.collections.bitmap.MutableBitmap; import com.metamx.collections.bitmap.RoaringBitmapFactory; import com.metamx.collections.spatial.ImmutableRTree; import io.druid.query.filter.BitmapIndexSelector; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.segment.column.BitmapIndex; import io.druid.segment.data.BitmapSerdeFactory; import io.druid.segment.data.GenericIndexed; @@ -63,16 +65,35 @@ public class DimensionPredicateFilterBenchmark private static final DimensionPredicateFilter IS_EVEN = new DimensionPredicateFilter( "foo", - new Predicate() + new DruidPredicateFactory() { @Override - public boolean apply(String input) + public Predicate makeStringPredicate() { - if (input == null) { - return false; - } + return new Predicate() + { + @Override + public boolean apply(String input) + { + if (input == null) { + return false; + } + return Integer.parseInt(input.toString()) % 2 == 0; + } + }; + } - return Integer.parseInt(input) % 2 == 0; + @Override + public DruidLongPredicate makeLongPredicate() + { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return false; + } + }; } }, null diff --git a/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java index cd3608e1cbe..7169a12d10f 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/FilterPartitionBenchmark.java @@ -43,7 +43,10 @@ import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.JavaScriptExtractionFn; import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.BitmapIndexSelector; +import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.OrDimFilter; import io.druid.query.filter.SelectorDimFilter; @@ -56,9 +59,11 @@ import io.druid.segment.LongColumnSelector; import io.druid.segment.QueryableIndex; import io.druid.segment.QueryableIndexStorageAdapter; import io.druid.segment.StorageAdapter; +import io.druid.segment.column.Column; import io.druid.segment.column.ColumnConfig; import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.AndFilter; +import io.druid.segment.filter.BoundFilter; import io.druid.segment.filter.DimensionPredicateFilter; import io.druid.segment.filter.Filters; import io.druid.segment.filter.OrFilter; @@ -67,6 +72,7 @@ import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.incremental.IncrementalIndexSchema; import io.druid.segment.incremental.OnheapIncrementalIndex; import io.druid.segment.serde.ComplexMetrics; +import org.joda.time.Interval; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -109,6 +115,10 @@ public class FilterPartitionBenchmark private QueryableIndex qIndex; private File indexFile; + private Filter timeFilterNone; + private Filter timeFilterHalf; + private Filter timeFilterAll; + private BenchmarkSchemaInfo schemaInfo; private static String JS_FN = "function(str) { return 'super-' + str; }"; @@ -168,6 +178,38 @@ public class FilterPartitionBenchmark new IndexSpec() ); qIndex = INDEX_IO.loadIndex(indexFile); + + Interval interval = schemaInfo.getDataInterval(); + timeFilterNone = new BoundFilter(new BoundDimFilter( + Column.TIME_COLUMN_NAME, + String.valueOf(Long.MAX_VALUE), + String.valueOf(Long.MAX_VALUE), + true, + true, + true, + null + )); + + long halfEnd = (interval.getEndMillis() + interval.getStartMillis()) / 2; + timeFilterHalf = new BoundFilter(new BoundDimFilter( + Column.TIME_COLUMN_NAME, + String.valueOf(interval.getStartMillis()), + String.valueOf(halfEnd), + true, + true, + true, + null + )); + + timeFilterAll = new BoundFilter(new BoundDimFilter( + Column.TIME_COLUMN_NAME, + String.valueOf(interval.getStartMillis()), + String.valueOf(interval.getEndMillis()), + true, + true, + true, + null + )); } private IncrementalIndex makeIncIndex() @@ -215,6 +257,51 @@ public class FilterPartitionBenchmark } } + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void timeFilterNone(Blackhole blackhole) throws Exception + { + StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); + Sequence cursors = sa.makeCursors(timeFilterNone, schemaInfo.getDataInterval(), QueryGranularities.ALL, false); + + Sequence> longListSeq = readCursorsLong(cursors, blackhole); + List strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.>newArrayList()).get(0); + for (Long st : strings) { + blackhole.consume(st); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void timeFilterHalf(Blackhole blackhole) throws Exception + { + StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); + Sequence cursors = sa.makeCursors(timeFilterHalf, schemaInfo.getDataInterval(), QueryGranularities.ALL, false); + + Sequence> longListSeq = readCursorsLong(cursors, blackhole); + List strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.>newArrayList()).get(0); + for (Long st : strings) { + blackhole.consume(st); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void timeFilterAll(Blackhole blackhole) throws Exception + { + StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); + Sequence cursors = sa.makeCursors(timeFilterAll, schemaInfo.getDataInterval(), QueryGranularities.ALL, false); + + Sequence> longListSeq = readCursorsLong(cursors, blackhole); + List strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.>newArrayList()).get(0); + for (Long st : strings) { + blackhole.consume(st); + } + } + @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MICROSECONDS) @@ -442,7 +529,6 @@ public class FilterPartitionBenchmark public List apply(Cursor input) { List longvals = new ArrayList(); - LongColumnSelector selector = input.makeLongColumnSelector("sumLongSequential"); while (!input.isDone()) { long rowval = selector.get(); @@ -476,11 +562,11 @@ public class FilterPartitionBenchmark { public NoBitmapDimensionPredicateFilter( final String dimension, - final Predicate predicate, + final DruidPredicateFactory predicateFactory, final ExtractionFn extractionFn ) { - super(dimension, predicate, extractionFn); + super(dimension, predicateFactory, extractionFn); } @Override @@ -510,21 +596,37 @@ public class FilterPartitionBenchmark return new NoBitmapSelectorFilter(dimension, value); } else { final String valueOrNull = Strings.emptyToNull(value); - final Predicate predicate = new Predicate() + + final DruidPredicateFactory predicateFactory = new DruidPredicateFactory() { @Override - public boolean apply(String input) + public Predicate makeStringPredicate() { - return Objects.equals(valueOrNull, input); + return new Predicate() + { + @Override + public boolean apply(String input) + { + return Objects.equals(valueOrNull, input); + } + }; } @Override - public String toString() + public DruidLongPredicate makeLongPredicate() { - return value; + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return false; + } + }; } }; - return new NoBitmapDimensionPredicateFilter(dimension, predicate, extractionFn); + + return new NoBitmapDimensionPredicateFilter(dimension, predicateFactory, extractionFn); } } } diff --git a/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java new file mode 100644 index 00000000000..4639da5fc84 --- /dev/null +++ b/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java @@ -0,0 +1,316 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.benchmark; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; +import com.google.common.base.Predicate; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.hash.Hashing; +import com.google.common.io.Files; +import com.metamx.common.guava.Sequence; +import com.metamx.common.guava.Sequences; +import com.metamx.common.logger.Logger; +import io.druid.benchmark.datagen.BenchmarkDataGenerator; +import io.druid.benchmark.datagen.BenchmarkSchemaInfo; +import io.druid.benchmark.datagen.BenchmarkSchemas; +import io.druid.benchmark.query.QueryBenchmarkUtil; +import io.druid.data.input.InputRow; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.granularity.QueryGranularities; +import io.druid.jackson.DefaultObjectMapper; +import io.druid.js.JavaScriptConfig; +import io.druid.query.Druids; +import io.druid.query.FinalizeResultsQueryRunner; +import io.druid.query.Query; +import io.druid.query.QueryRunner; +import io.druid.query.QueryRunnerFactory; +import io.druid.query.QueryToolChest; +import io.druid.query.Result; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.CountAggregatorFactory; +import io.druid.query.aggregation.FilteredAggregatorFactory; +import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde; +import io.druid.query.dimension.DefaultDimensionSpec; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.JavaScriptExtractionFn; +import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.BitmapIndexSelector; +import io.druid.query.filter.BoundDimFilter; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.Filter; +import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.JavaScriptDimFilter; +import io.druid.query.filter.OrDimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SearchQueryDimFilter; +import io.druid.query.filter.SelectorDimFilter; +import io.druid.query.search.search.ContainsSearchQuerySpec; +import io.druid.query.spec.MultipleIntervalSegmentSpec; +import io.druid.query.spec.QuerySegmentSpec; +import io.druid.query.timeseries.TimeseriesQuery; +import io.druid.query.timeseries.TimeseriesQueryEngine; +import io.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import io.druid.query.timeseries.TimeseriesQueryRunnerFactory; +import io.druid.query.timeseries.TimeseriesResultValue; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; +import io.druid.segment.IncrementalIndexSegment; +import io.druid.segment.IndexIO; +import io.druid.segment.IndexMergerV9; +import io.druid.segment.IndexSpec; +import io.druid.segment.LongColumnSelector; +import io.druid.segment.QueryableIndex; +import io.druid.segment.QueryableIndexSegment; +import io.druid.segment.QueryableIndexStorageAdapter; +import io.druid.segment.StorageAdapter; +import io.druid.segment.column.ColumnConfig; +import io.druid.segment.data.IndexedInts; +import io.druid.segment.filter.AndFilter; +import io.druid.segment.filter.DimensionPredicateFilter; +import io.druid.segment.filter.Filters; +import io.druid.segment.filter.OrFilter; +import io.druid.segment.filter.SelectorFilter; +import io.druid.segment.incremental.IncrementalIndex; +import io.druid.segment.incremental.IncrementalIndexSchema; +import io.druid.segment.incremental.IncrementalIndexStorageAdapter; +import io.druid.segment.incremental.OnheapIncrementalIndex; +import io.druid.segment.serde.ComplexMetrics; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +@Fork(jvmArgsPrepend = "-server", value = 1) +@Warmup(iterations = 10) +@Measurement(iterations = 25) +public class FilteredAggregatorBenchmark +{ + @Param({"75000"}) + private int rowsPerSegment; + + @Param({"basic"}) + private String schema; + + private static final Logger log = new Logger(FilteredAggregatorBenchmark.class); + private static final int RNG_SEED = 9999; + private static final IndexMergerV9 INDEX_MERGER_V9; + private static final IndexIO INDEX_IO; + public static final ObjectMapper JSON_MAPPER; + private IncrementalIndex incIndex; + private IncrementalIndex incIndexFilteredAgg; + private AggregatorFactory[] filteredMetrics; + private QueryableIndex qIndex; + private File indexFile; + private DimFilter filter; + private List inputRows; + private QueryRunnerFactory factory; + private BenchmarkSchemaInfo schemaInfo; + private TimeseriesQuery query; + + private static String JS_FN = "function(str) { return 'super-' + str; }"; + private static ExtractionFn JS_EXTRACTION_FN = new JavaScriptExtractionFn(JS_FN, false, JavaScriptConfig.getDefault()); + + static { + JSON_MAPPER = new DefaultObjectMapper(); + INDEX_IO = new IndexIO( + JSON_MAPPER, + new ColumnConfig() + { + @Override + public int columnCacheSizeBytes() + { + return 0; + } + } + ); + INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO); + } + + @Setup + public void setup() throws IOException + { + log.info("SETUP CALLED AT " + System.currentTimeMillis()); + + if (ComplexMetrics.getSerdeForType("hyperUnique") == null) { + ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(Hashing.murmur3_128())); + } + + schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schema); + + BenchmarkDataGenerator gen = new BenchmarkDataGenerator( + schemaInfo.getColumnSchemas(), + RNG_SEED, + schemaInfo.getDataInterval(), + rowsPerSegment + ); + + incIndex = makeIncIndex(schemaInfo.getAggsArray()); + + filter = new OrDimFilter( + Arrays.asList( + new BoundDimFilter("dimSequential", "-1", "-1", true, true, true, null), + new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getDefault()), + new RegexDimFilter("dimSequential", "X", null), + new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), + new InDimFilter("dimSequential", Arrays.asList("X"), null) + ) + ); + filteredMetrics = new AggregatorFactory[1]; + filteredMetrics[0] = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter); + incIndexFilteredAgg = makeIncIndex(filteredMetrics); + + inputRows = new ArrayList<>(); + for (int j = 0; j < rowsPerSegment; j++) { + InputRow row = gen.nextRow(); + if (j % 10000 == 0) { + log.info(j + " rows generated."); + } + incIndex.add(row); + inputRows.add(row); + } + + File tmpFile = Files.createTempDir(); + log.info("Using temp dir: " + tmpFile.getAbsolutePath()); + tmpFile.deleteOnExit(); + + indexFile = INDEX_MERGER_V9.persist( + incIndex, + tmpFile, + new IndexSpec() + ); + qIndex = INDEX_IO.loadIndex(indexFile); + + factory = new TimeseriesQueryRunnerFactory( + new TimeseriesQueryQueryToolChest( + QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator() + ), + new TimeseriesQueryEngine(), + QueryBenchmarkUtil.NOOP_QUERYWATCHER + ); + + BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic"); + QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + List queryAggs = new ArrayList<>(); + queryAggs.add(filteredMetrics[0]); + + query = Druids.newTimeseriesQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .aggregators(queryAggs) + .descending(false) + .build(); + } + + private IncrementalIndex makeIncIndex(AggregatorFactory[] metrics) + { + return new OnheapIncrementalIndex( + new IncrementalIndexSchema.Builder() + .withQueryGranularity(QueryGranularities.NONE) + .withMetrics(metrics) + .withDimensionsSpec(new DimensionsSpec(null, null, null)) + .build(), + true, + false, + true, + rowsPerSegment + ); + } + + private static List runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query) + { + QueryToolChest toolChest = factory.getToolchest(); + QueryRunner theRunner = new FinalizeResultsQueryRunner<>( + toolChest.mergeResults(toolChest.preMergeQueryDecoration(runner)), + toolChest + ); + + Sequence queryResult = theRunner.run(query, Maps.newHashMap()); + return Sequences.toList(queryResult, Lists.newArrayList()); + } + + // Filtered agg doesn't work with ingestion, cardinality is not supported in incremental index + // See https://github.com/druid-io/druid/issues/3164 + // @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void ingest(Blackhole blackhole) throws Exception + { + incIndexFilteredAgg = makeIncIndex(filteredMetrics); + for (InputRow row : inputRows) { + int rv = incIndexFilteredAgg.add(row); + blackhole.consume(rv); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void querySingleIncrementalIndex(Blackhole blackhole) throws Exception + { + QueryRunner> runner = QueryBenchmarkUtil.makeQueryRunner( + factory, + "incIndex", + new IncrementalIndexSegment(incIndex, "incIndex") + ); + + List> results = FilteredAggregatorBenchmark.runQuery(factory, runner, query); + for (Result result : results) { + blackhole.consume(result); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void querySingleQueryableIndex(Blackhole blackhole) throws Exception + { + final QueryRunner> runner = QueryBenchmarkUtil.makeQueryRunner( + factory, + "qIndex", + new QueryableIndexSegment("qIndex", qIndex) + ); + + List> results = FilteredAggregatorBenchmark.runQuery(factory, runner, query); + for (Result result : results) { + blackhole.consume(result); + } + } +} diff --git a/benchmarks/src/main/java/io/druid/benchmark/indexing/IncrementalIndexReadBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/indexing/IncrementalIndexReadBenchmark.java index e64775809f1..1cd13a4027d 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/indexing/IncrementalIndexReadBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/indexing/IncrementalIndexReadBenchmark.java @@ -30,8 +30,18 @@ import io.druid.benchmark.datagen.BenchmarkSchemas; import io.druid.data.input.InputRow; import io.druid.data.input.impl.DimensionsSpec; import io.druid.granularity.QueryGranularities; +import io.druid.js.JavaScriptConfig; import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde; import io.druid.query.dimension.DefaultDimensionSpec; +import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.BoundDimFilter; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.JavaScriptDimFilter; +import io.druid.query.filter.OrDimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SearchQueryDimFilter; +import io.druid.query.search.search.ContainsSearchQuerySpec; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; import io.druid.segment.data.IndexedInts; @@ -55,6 +65,7 @@ import org.openjdk.jmh.infra.Blackhole; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; @@ -145,4 +156,39 @@ public class IncrementalIndexReadBenchmark cursor.advance(); } } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void readWithFilters(Blackhole blackhole) throws Exception + { + DimFilter filter = new OrDimFilter( + Arrays.asList( + new BoundDimFilter("dimSequential", "-1", "-1", true, true, true, null), + new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getDefault()), + new RegexDimFilter("dimSequential", "X", null), + new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), + new InDimFilter("dimSequential", Arrays.asList("X"), null) + ) + ); + + IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex); + Sequence cursors = sa.makeCursors(filter.toFilter(), schemaInfo.getDataInterval(), QueryGranularities.ALL, false); + Cursor cursor = Sequences.toList(Sequences.limit(cursors, 1), Lists.newArrayList()).get(0); + + List selectors = new ArrayList<>(); + selectors.add(cursor.makeDimensionSelector(new DefaultDimensionSpec("dimSequential", null))); + selectors.add(cursor.makeDimensionSelector(new DefaultDimensionSpec("dimZipf", null))); + selectors.add(cursor.makeDimensionSelector(new DefaultDimensionSpec("dimUniform", null))); + selectors.add(cursor.makeDimensionSelector(new DefaultDimensionSpec("dimSequentialHalfNull", null))); + + cursor.reset(); + while (!cursor.isDone()) { + for (DimensionSelector selector : selectors) { + IndexedInts row = selector.getRow(); + blackhole.consume(selector.lookupName(row.get(0))); + } + cursor.advance(); + } + } } diff --git a/docs/content/querying/filters.md b/docs/content/querying/filters.md index 48b18ce6879..30c9edda714 100644 --- a/docs/content/querying/filters.md +++ b/docs/content/querying/filters.md @@ -332,3 +332,40 @@ The following matches dimension values in `[product_1, product_3, product_5]` fo } } ``` + +### Filtering on the Timestamp Column +Filters can also be applied to the timestamp column. The timestamp column has long millisecond values. + +To refer to the timestamp column, use the string `__time` as the dimension name. + +The filter parameters (e.g., the selector value for the SelectorFilter) should be provided as Strings. + +If the user wishes to interpret the timestamp with a specific format, timezone, or locale, the [Time Format Extraction Function](./dimensionspecs.html#time-format-extraction-function) is useful. + +Note that the timestamp column does not have a bitmap index. Thus, filtering on timestamp in a query requires a scan of the column, and performance will be affected accordingly. If possible, excluding time ranges by specifying the query interval will be faster. + +**Example** + +Filtering on a long timestamp value: +```json +"filter": { + "type": "selector", + "dimension": "__time", + "value": "124457387532" +} +``` + +Filtering on day of week: +```json +"filter": { + "type": "selector", + "dimension": "__time", + "value": "Friday", + "extractionFn": { + "type": "timeFormat", + "format": "EEEE", + "timeZone": "America/New_York", + "locale": "en" + } +} +``` diff --git a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java index a9679b7cdee..4918d968eb1 100644 --- a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java @@ -23,12 +23,18 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Strings; +import com.metamx.common.UOE; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionSelector; +import io.druid.segment.column.Column; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.BooleanValueMatcher; import io.druid.segment.filter.Filters; @@ -221,6 +227,13 @@ public class FilteredAggregatorFactory extends AggregatorFactory @Override public ValueMatcher makeValueMatcher(final String dimension, final Comparable value) { + if (getTypeForDimension(dimension) == ValueType.LONG) { + return Filters.getLongValueMatcher( + columnSelectorFactory.makeLongColumnSelector(dimension), + value + ); + } + final DimensionSelector selector = columnSelectorFactory.makeDimensionSelector( new DefaultDimensionSpec(dimension, dimension) ); @@ -256,8 +269,20 @@ public class FilteredAggregatorFactory extends AggregatorFactory }; } - @Override - public ValueMatcher makeValueMatcher(final String dimension, final Predicate predicate) + public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicateFactory predicateFactory) + { + ValueType type = getTypeForDimension(dimension); + switch (type) { + case LONG: + return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); + case STRING: + return makeStringValueMatcher(dimension, predicateFactory.makeStringPredicate()); + default: + return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); + } + } + + public ValueMatcher makeStringValueMatcher(final String dimension, final Predicate predicate) { final DimensionSelector selector = columnSelectorFactory.makeDimensionSelector( new DefaultDimensionSpec(dimension, dimension) @@ -299,5 +324,25 @@ public class FilteredAggregatorFactory extends AggregatorFactory } }; } + + private ValueMatcher makeLongValueMatcher(String dimension, DruidLongPredicate predicate) + { + return Filters.getLongPredicateMatcher( + columnSelectorFactory.makeLongColumnSelector(dimension), + predicate + ); + } + + private ValueType getTypeForDimension(String dimension) + { + // FilteredAggregatorFactory is sometimes created from a ColumnSelectorFactory that + // has no knowledge of column capabilities/types. + // Default to LONG for __time, STRING for everything else. + if (dimension.equals(Column.TIME_COLUMN_NAME)) { + return ValueType.LONG; + } + ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(dimension); + return capabilities == null ? ValueType.STRING : capabilities.getType(); + } } } diff --git a/processing/src/main/java/io/druid/query/filter/DruidLongPredicate.java b/processing/src/main/java/io/druid/query/filter/DruidLongPredicate.java new file mode 100644 index 00000000000..1651776777f --- /dev/null +++ b/processing/src/main/java/io/druid/query/filter/DruidLongPredicate.java @@ -0,0 +1,28 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.filter; + +/** + * LongPredicate is only supported in Java 8+, so use this to avoid boxing when a long predicate is needed. + */ +public interface DruidLongPredicate +{ + boolean applyLong(long input); +} diff --git a/processing/src/main/java/io/druid/query/filter/DruidPredicateFactory.java b/processing/src/main/java/io/druid/query/filter/DruidPredicateFactory.java new file mode 100644 index 00000000000..bc94931167c --- /dev/null +++ b/processing/src/main/java/io/druid/query/filter/DruidPredicateFactory.java @@ -0,0 +1,29 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.filter; + +import com.google.common.base.Predicate; + +public interface DruidPredicateFactory +{ + public Predicate makeStringPredicate(); + + public DruidLongPredicate makeLongPredicate(); +} diff --git a/processing/src/main/java/io/druid/query/filter/InDimFilter.java b/processing/src/main/java/io/druid/query/filter/InDimFilter.java index 9ee4d5d8da0..9b2f4a337cf 100644 --- a/processing/src/main/java/io/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/InDimFilter.java @@ -24,11 +24,13 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.common.base.Supplier; import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Iterables; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; +import com.google.common.primitives.Longs; import com.metamx.common.StringUtils; import io.druid.query.extraction.ExtractionFn; import io.druid.query.lookup.LookupExtractionFn; @@ -37,15 +39,22 @@ import io.druid.segment.filter.InFilter; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Set; public class InDimFilter implements DimFilter { + // determined through benchmark that binary search on long[] is faster than HashSet until ~16 elements + // Hashing threshold is not applied to String for now, String still uses ImmutableSortedSet + public static final int LONG_HASHING_THRESHOLD = 16; + private final ImmutableSortedSet values; private final String dimension; private final ExtractionFn extractionFn; + private final Supplier longPredicateSupplier; @JsonCreator public InDimFilter( @@ -71,6 +80,7 @@ public class InDimFilter implements DimFilter ); this.dimension = dimension; this.extractionFn = extractionFn; + this.longPredicateSupplier = getLongPredicateSupplier(); } @JsonProperty @@ -170,7 +180,7 @@ public class InDimFilter implements DimFilter @Override public Filter toFilter() { - return new InFilter(dimension, values, extractionFn); + return new InFilter(dimension, values, longPredicateSupplier, extractionFn); } @Override @@ -216,4 +226,80 @@ public class InDimFilter implements DimFilter result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0); return result; } + + // As the set of filtered values can be large, parsing them as longs should be done only if needed, and only once. + // Pass in a common long predicate supplier to all filters created by .toFilter(), so that + // we only compute the long hashset/array once per query. + // This supplier must be thread-safe, since this DimFilter will be accessed in the query runners. + private Supplier getLongPredicateSupplier() + { + return new Supplier() + { + private final Object initLock = new Object(); + private volatile boolean longsInitialized = false; + private volatile boolean useLongHash; + private volatile long[] longArray; + private volatile HashSet longHashSet; + + private void initLongValues() + { + if (longsInitialized) { + return; + } + + synchronized (initLock) { + if (longsInitialized) { + return; + } + + List longs = new ArrayList<>(); + for (String value : values) { + Long longValue = Longs.tryParse(value); + if (longValue != null) { + longs.add(longValue); + } + } + + useLongHash = longs.size() > LONG_HASHING_THRESHOLD; + if (useLongHash) { + longHashSet = new HashSet(longs); + } else { + longArray = new long[longs.size()]; + for (int i = 0; i < longs.size(); i++) { + longArray[i] = longs.get(i).longValue(); + } + Arrays.sort(longArray); + } + + longsInitialized = true; + } + } + + @Override + public DruidLongPredicate get() + { + initLongValues(); + + if (useLongHash) { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return longHashSet.contains(input); + } + }; + } else { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return Arrays.binarySearch(longArray, input) >= 0; + } + }; + } + } + }; + } } diff --git a/processing/src/main/java/io/druid/query/filter/JavaScriptDimFilter.java b/processing/src/main/java/io/druid/query/filter/JavaScriptDimFilter.java index 6442c68da42..03c8e35c17f 100644 --- a/processing/src/main/java/io/druid/query/filter/JavaScriptDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/JavaScriptDimFilter.java @@ -34,6 +34,7 @@ import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.ScriptableObject; +import javax.annotation.Nullable; import java.nio.ByteBuffer; public class JavaScriptDimFilter implements DimFilter @@ -43,7 +44,7 @@ public class JavaScriptDimFilter implements DimFilter private final ExtractionFn extractionFn; private final JavaScriptConfig config; - private final JavaScriptPredicate predicate; + private final JavaScriptPredicateFactory predicateFactory; @JsonCreator public JavaScriptDimFilter( @@ -61,9 +62,9 @@ public class JavaScriptDimFilter implements DimFilter this.config = config; if (config.isDisabled()) { - this.predicate = null; + this.predicateFactory = null; } else { - this.predicate = new JavaScriptPredicate(function, extractionFn); + this.predicateFactory = new JavaScriptPredicateFactory(function, extractionFn); } } @@ -115,7 +116,7 @@ public class JavaScriptDimFilter implements DimFilter throw new ISE("JavaScript is disabled"); } - return new JavaScriptFilter(dimension, predicate); + return new JavaScriptFilter(dimension, predicateFactory); } @Override @@ -165,14 +166,14 @@ public class JavaScriptDimFilter implements DimFilter return result; } - public static class JavaScriptPredicate implements Predicate + public static class JavaScriptPredicateFactory implements DruidPredicateFactory { final ScriptableObject scope; final Function fnApply; final String script; final ExtractionFn extractionFn; - public JavaScriptPredicate(final String script, final ExtractionFn extractionFn) + public JavaScriptPredicateFactory(final String script, final ExtractionFn extractionFn) { Preconditions.checkNotNull(script, "script must not be null"); this.script = script; @@ -191,7 +192,33 @@ public class JavaScriptDimFilter implements DimFilter } @Override - public boolean apply(final String input) + public Predicate makeStringPredicate() + { + return new Predicate() + { + @Override + public boolean apply(String input) + { + return applyObject(input); + } + }; + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + // Can't avoid boxing here because the Mozilla JS Function.call() only accepts Object[] + return applyObject(input); + } + }; + } + + public boolean applyObject(final Object input) { // one and only one context per thread final Context cx = Context.enter(); @@ -203,12 +230,12 @@ public class JavaScriptDimFilter implements DimFilter } } - public boolean applyInContext(Context cx, String input) + public boolean applyInContext(Context cx, Object input) { if (extractionFn != null) { input = extractionFn.apply(input); } - return Context.toBoolean(fnApply.call(cx, scope, scope, new String[]{input})); + return Context.toBoolean(fnApply.call(cx, scope, scope, new Object[]{input})); } @Override @@ -221,7 +248,7 @@ public class JavaScriptDimFilter implements DimFilter return false; } - JavaScriptPredicate that = (JavaScriptPredicate) o; + JavaScriptPredicateFactory that = (JavaScriptPredicateFactory) o; if (!script.equals(that.script)) { return false; diff --git a/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java index a4826c47f6b..5b91398048f 100644 --- a/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java @@ -28,6 +28,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; +import com.google.common.primitives.Longs; import com.metamx.common.StringUtils; import io.druid.query.extraction.ExtractionFn; import io.druid.segment.filter.DimensionPredicateFilter; @@ -44,6 +45,11 @@ public class SelectorDimFilter implements DimFilter private final String value; private final ExtractionFn extractionFn; + private final Object initLock = new Object(); + private volatile boolean longsInitialized = false; + private volatile Long valueAsLong; + + @JsonCreator public SelectorDimFilter( @JsonProperty("dimension") String dimension, @@ -88,21 +94,51 @@ public class SelectorDimFilter implements DimFilter return new SelectorFilter(dimension, value); } else { final String valueOrNull = Strings.emptyToNull(value); - final Predicate predicate = new Predicate() + + final DruidPredicateFactory predicateFactory = new DruidPredicateFactory() { @Override - public boolean apply(String input) + public Predicate makeStringPredicate() { - return Objects.equals(valueOrNull, input); + return new Predicate() + { + @Override + public boolean apply(String input) + { + return Objects.equals(valueOrNull, input); + } + }; } @Override - public String toString() + public DruidLongPredicate makeLongPredicate() { - return value; + initLongValue(); + + if (valueAsLong == null) { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return false; + } + }; + } else { + // store the primitive, so we don't unbox for every comparison + final long unboxedLong = valueAsLong.longValue(); + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return input == unboxedLong; + } + }; + } } }; - return new DimensionPredicateFilter(dimension, predicate, extractionFn); + return new DimensionPredicateFilter(dimension, predicateFactory, extractionFn); } } @@ -174,4 +210,19 @@ public class SelectorDimFilter implements DimFilter result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0); return result; } + + + private void initLongValue() + { + if (longsInitialized) { + return; + } + synchronized (initLock) { + if (longsInitialized) { + return; + } + valueAsLong = Longs.tryParse(value); + longsInitialized = true; + } + } } diff --git a/processing/src/main/java/io/druid/query/filter/ValueMatcherFactory.java b/processing/src/main/java/io/druid/query/filter/ValueMatcherFactory.java index 3da61be99b1..06c82af8d5f 100644 --- a/processing/src/main/java/io/druid/query/filter/ValueMatcherFactory.java +++ b/processing/src/main/java/io/druid/query/filter/ValueMatcherFactory.java @@ -20,11 +20,48 @@ package io.druid.query.filter; import com.google.common.base.Predicate; +import io.druid.segment.column.ValueType; /** + * A ValueMatcherFactory is an object associated with a collection of rows (e.g., an IncrementalIndexStorageAdapter) + * that generates ValueMatchers for filtering on the associated collection of rows. + * + * A ValueMatcher is an object that decides whether a row value matches a value or predicate + * associated with the ValueMatcher. + * + * The ValueMatcher is expected to track the current row to be matched with a stateful + * object (e.g., a ColumnSelectorFactory). The ValueMatcher has no responsibility for moving the current + * "row pointer", this is handled outside of the ValueMatcher. + * + * The ValueMatcherFactory/ValueMatcher classes are used for filtering rows during column scans. */ public interface ValueMatcherFactory { + /** + * Create a ValueMatcher that compares row values to the provided value. + * + * An implementation of this method should be able to handle dimensions of various types. + * + * @param dimension The dimension to filter. + * @param value The value to match against. + * + * @return An object that matches row values on the provided value. + */ public ValueMatcher makeValueMatcher(String dimension, Comparable value); - public ValueMatcher makeValueMatcher(String dimension, Predicate predicate); + + + /** + * Create a ValueMatcher that applies a predicate to row values. + * + * The caller provides a predicate factory that can create a predicate for each value type supported by Druid. + * See {@link DruidPredicateFactory} for more information. + * + * When creating the ValueMatcher, the ValueMatcherFactory implementation should decide what type of predicate + * to create from the predicate factory based on the ValueType of the specified dimension. + * + * @param dimension The dimension to filter. + * @param predicateFactory Predicate factory + * @return An object that applies a predicate to row values + */ + public ValueMatcher makeValueMatcher(String dimension, DruidPredicateFactory predicateFactory); } diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByMergingQueryRunnerV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByMergingQueryRunnerV2.java index 285c87a5a1a..3cd9e02879a 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByMergingQueryRunnerV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByMergingQueryRunnerV2.java @@ -66,6 +66,7 @@ import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; import io.druid.segment.LongColumnSelector; import io.druid.segment.ObjectColumnSelector; +import io.druid.segment.column.ColumnCapabilities; import java.io.Closeable; import java.io.File; @@ -669,5 +670,14 @@ public class GroupByMergingQueryRunnerV2 implements QueryRunner } }; } + + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + // getColumnCapabilities() is only used by FilteredAggregatorFactory for determining dimension types. + // Since FilteredAggregatorFactory only works with ColumnSelectorFactory implementations + // that support makeDimensionSelector(), this method is also left unsupported. + throw new UnsupportedOperationException(); + } } } diff --git a/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java b/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java index 79c2018e001..84c44420993 100644 --- a/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java +++ b/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java @@ -25,12 +25,15 @@ import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.spatial.ImmutableRTree; import com.metamx.common.guava.CloseQuietly; import io.druid.query.filter.BitmapIndexSelector; +import io.druid.query.filter.Filter; import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.Column; import io.druid.segment.column.DictionaryEncodedColumn; import io.druid.segment.column.GenericColumn; +import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedIterable; +import io.druid.segment.filter.Filters; import java.util.Iterator; @@ -115,12 +118,14 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector public BitmapIndex getBitmapIndex(String dimension) { final Column column = index.getColumn(dimension); - - if (column == null) { - // Create a BitmapIndex for null columns so that filters applied to null columns can use + if (column == null || !columnSupportsFiltering(column)) { + // for missing columns and columns with types that do not support filtering, + // treat the column as if it were a String column full of nulls. + // Create a BitmapIndex so that filters applied to null columns can use // bitmap indexes. Filters check for the presence of a bitmap index, this is used to determine // whether the filter is applied in the pre or post filtering stage. - return new BitmapIndex() { + return new BitmapIndex() + { @Override public int getCardinality() { @@ -175,7 +180,7 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector public ImmutableBitmap getBitmapIndex(String dimension, String value) { final Column column = index.getColumn(dimension); - if (column == null) { + if (column == null || !columnSupportsFiltering(column)) { if (Strings.isNullOrEmpty(value)) { return bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), getNumRows()); } else { @@ -201,4 +206,10 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector return column.getSpatialIndex().getRTree(); } + + private static boolean columnSupportsFiltering(Column column) + { + ValueType columnType = column.getCapabilities().getType(); + return Filters.FILTERABLE_TYPES.contains(columnType); + } } diff --git a/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java b/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java index 2aff7f831c1..f550fef14e5 100644 --- a/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java +++ b/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java @@ -20,6 +20,7 @@ package io.druid.segment; import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.column.ColumnCapabilities; /** * Factory class for MetricSelectors @@ -30,4 +31,5 @@ public interface ColumnSelectorFactory public FloatColumnSelector makeFloatColumnSelector(String columnName); public LongColumnSelector makeLongColumnSelector(String columnName); public ObjectColumnSelector makeObjectColumnSelector(String columnName); + public ColumnCapabilities getColumnCapabilities(String columnName); } diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index 91aa9188cab..50cade3c131 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -29,6 +29,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.metamx.collections.bitmap.ImmutableBitmap; +import com.metamx.common.IAE; +import com.metamx.common.UOE; import com.metamx.common.guava.CloseQuietly; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; @@ -38,6 +40,8 @@ import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BooleanFilter; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; @@ -54,6 +58,7 @@ import io.druid.segment.data.IndexedInts; import io.druid.segment.data.Offset; import io.druid.segment.filter.AndFilter; import io.druid.segment.filter.BooleanValueMatcher; +import io.druid.segment.filter.Filters; import org.joda.time.DateTime; import org.joda.time.Interval; import org.roaringbitmap.IntIterator; @@ -184,11 +189,7 @@ public class QueryableIndexStorageAdapter implements StorageAdapter @Override public ColumnCapabilities getColumnCapabilities(String column) { - Column columnObj = index.getColumn(column); - if (columnObj == null) { - return null; - } - return columnObj.getCapabilities(); + return getColumnCapabilites(index, column); } @Override @@ -314,6 +315,15 @@ public class QueryableIndexStorageAdapter implements StorageAdapter ); } + private static ColumnCapabilities getColumnCapabilites(ColumnSelector index, String columnName) + { + Column columnObj = index.getColumn(columnName); + if (columnObj == null) { + return null; + } + return columnObj.getCapabilities(); + } + private interface CursorAdvancer { public void advance(); @@ -790,6 +800,12 @@ public class QueryableIndexStorageAdapter implements StorageAdapter } }; } + + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + return getColumnCapabilites(index, columnName); + } } if (postFilter == null) { @@ -841,6 +857,7 @@ public class QueryableIndexStorageAdapter implements StorageAdapter return new QueryableIndexBaseCursor() { CursorOffsetHolderValueMatcherFactory valueMatcherFactory = new CursorOffsetHolderValueMatcherFactory( + index, this ); RowOffsetMatcherFactory rowOffsetMatcherFactory = new CursorOffsetHolderRowOffsetMatcherFactory( @@ -981,20 +998,28 @@ public class QueryableIndexStorageAdapter implements StorageAdapter private static class CursorOffsetHolderValueMatcherFactory implements ValueMatcherFactory { + private final ColumnSelector index; private final ColumnSelectorFactory cursor; public CursorOffsetHolderValueMatcherFactory( + ColumnSelector index, ColumnSelectorFactory cursor ) { + this.index = index; this.cursor = cursor; } - // Currently unused, except by unit tests, since filters always support bitmap indexes currently. - // This will change when non-String dimensions are added. @Override public ValueMatcher makeValueMatcher(String dimension, final Comparable value) { + if (getTypeForDimension(dimension) == ValueType.LONG) { + return Filters.getLongValueMatcher( + cursor.makeLongColumnSelector(dimension), + value + ); + } + final DimensionSelector selector = cursor.makeDimensionSelector( new DefaultDimensionSpec(dimension, dimension) ); @@ -1026,10 +1051,21 @@ public class QueryableIndexStorageAdapter implements StorageAdapter } } - // Currently unused, except by unit tests, since filters always support bitmap indexes currently. - // This will change when non-String dimensions are added. @Override - public ValueMatcher makeValueMatcher(String dimension, final Predicate predicate) + public ValueMatcher makeValueMatcher(String dimension, final DruidPredicateFactory predicateFactory) + { + ValueType type = getTypeForDimension(dimension); + switch (type) { + case LONG: + return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); + case STRING: + return makeStringValueMatcher(dimension, predicateFactory.makeStringPredicate()); + default: + return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); + } + } + + private ValueMatcher makeStringValueMatcher(String dimension, final Predicate predicate) { final DimensionSelector selector = cursor.makeDimensionSelector( new DefaultDimensionSpec(dimension, dimension) @@ -1055,6 +1091,20 @@ public class QueryableIndexStorageAdapter implements StorageAdapter } }; } + + private ValueMatcher makeLongValueMatcher(String dimension, final DruidLongPredicate predicate) + { + return Filters.getLongPredicateMatcher( + cursor.makeLongColumnSelector(dimension), + predicate + ); + } + + private ValueType getTypeForDimension(String dimension) + { + ColumnCapabilities capabilities = getColumnCapabilites(index, dimension); + return capabilities == null ? ValueType.STRING : capabilities.getType(); + } } private static class CursorOffsetHolderRowOffsetMatcherFactory implements RowOffsetMatcherFactory diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index 362d09e929f..107419cf1bf 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -24,13 +24,13 @@ import com.metamx.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BoundDimFilter; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; -import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; import io.druid.query.ordering.StringComparators; import io.druid.segment.column.BitmapIndex; -import io.druid.segment.column.ColumnCapabilities; import java.util.Comparator; import java.util.Iterator; @@ -57,14 +57,7 @@ public class BoundFilter implements Filter return Filters.matchPredicate( boundDimFilter.getDimension(), selector, - new Predicate() - { - @Override - public boolean apply(String input) - { - return doesMatch(input); - } - } + getPredicateFactory().makeStringPredicate() ); } else { final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension()); @@ -142,17 +135,7 @@ public class BoundFilter implements Filter @Override public ValueMatcher makeMatcher(ValueMatcherFactory factory) { - return factory.makeValueMatcher( - boundDimFilter.getDimension(), - new Predicate() - { - @Override - public boolean apply(String input) - { - return doesMatch(input); - } - } - ); + return factory.makeValueMatcher(boundDimFilter.getDimension(), getPredicateFactory()); } @Override @@ -161,6 +144,40 @@ public class BoundFilter implements Filter return selector.getBitmapIndex(boundDimFilter.getDimension()) != null; } + private DruidPredicateFactory getPredicateFactory() + { + return new DruidPredicateFactory() + { + @Override + public Predicate makeStringPredicate() + { + return new Predicate() + { + @Override + public boolean apply(String input) + { + return doesMatch(input); + } + }; + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + // When BoundFilter has a 'numeric' comparator (see https://github.com/druid-io/druid/issues/2989) + // this should be optimized to compare on longs instead of using string conversion. + return doesMatch(String.valueOf(input)); + } + }; + } + }; + } + private boolean doesMatch(String input) { if (extractionFn != null) { diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index e16c5ccd3c3..1443ef9f4b6 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -24,41 +24,63 @@ import com.google.common.base.Predicate; import com.metamx.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; -import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; -import io.druid.segment.column.ColumnCapabilities; /** */ public class DimensionPredicateFilter implements Filter { private final String dimension; - private final Predicate predicate; + private final DruidPredicateFactory predicateFactory; private final String basePredicateString; private final ExtractionFn extractionFn; public DimensionPredicateFilter( final String dimension, - final Predicate predicate, + final DruidPredicateFactory predicateFactory, final ExtractionFn extractionFn ) { - Preconditions.checkNotNull(predicate, "predicate"); + Preconditions.checkNotNull(predicateFactory, "predicateFactory"); this.dimension = Preconditions.checkNotNull(dimension, "dimension"); - this.basePredicateString = predicate.toString(); + this.basePredicateString = predicateFactory.toString(); this.extractionFn = extractionFn; if (extractionFn == null) { - this.predicate = predicate; + this.predicateFactory = predicateFactory; } else { - this.predicate = new Predicate() + this.predicateFactory = new DruidPredicateFactory() { + final Predicate baseStringPredicate = predicateFactory.makeStringPredicate(); + @Override - public boolean apply(String input) + public Predicate makeStringPredicate() { - return predicate.apply(extractionFn.apply(input)); + return new Predicate() + { + @Override + public boolean apply(String input) + { + return baseStringPredicate.apply(extractionFn.apply(input)); + } + }; + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return baseStringPredicate.apply(extractionFn.apply(input)); + } + }; } }; } @@ -67,13 +89,13 @@ public class DimensionPredicateFilter implements Filter @Override public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { - return Filters.matchPredicate(dimension, selector, predicate); + return Filters.matchPredicate(dimension, selector, predicateFactory.makeStringPredicate()); } @Override public ValueMatcher makeMatcher(ValueMatcherFactory factory) { - return factory.makeValueMatcher(dimension, predicate); + return factory.makeValueMatcher(dimension, predicateFactory); } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index 002caaee8d2..c995055e39d 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -24,18 +24,28 @@ import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.primitives.Longs; import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.common.IAE; import com.metamx.common.guava.FunctionalIterable; +import com.metamx.common.parsers.ParseException; import io.druid.query.Query; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BooleanFilter; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.DruidLongPredicate; import io.druid.query.filter.Filter; +import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.LongColumnSelector; import io.druid.segment.column.BitmapIndex; +import io.druid.segment.column.Column; +import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; +import io.druid.segment.incremental.IncrementalIndexStorageAdapter; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; @@ -43,6 +53,7 @@ import java.util.List; */ public class Filters { + public static final List FILTERABLE_TYPES = ImmutableList.of(ValueType.STRING, ValueType.LONG); private static final String CTX_KEY_USE_FILTER_CNF = "useFilterCNF"; /** @@ -157,6 +168,48 @@ public class Filters ); } + public static ValueMatcher getLongValueMatcher( + final LongColumnSelector longSelector, + Comparable value + ) + { + if (value == null) { + return new BooleanValueMatcher(false); + } + + final Long longValue = Longs.tryParse(value.toString()); + if (longValue == null) { + return new BooleanValueMatcher(false); + } + + return new ValueMatcher() + { + // store the primitive, so we don't unbox for every comparison + final long unboxedLong = longValue.longValue(); + + @Override + public boolean matches() + { + return longSelector.get() == unboxedLong; + } + }; + } + + public static ValueMatcher getLongPredicateMatcher( + final LongColumnSelector longSelector, + final DruidLongPredicate predicate + ) + { + return new ValueMatcher() + { + @Override + public boolean matches() + { + return predicate.applyLong(longSelector.get()); + } + }; + } + public static Filter convertToCNFFromQueryContext(Query query, Filter filter) { if (filter == null) { diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index 07be80e7881..0b34c6fddaa 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -22,15 +22,16 @@ package io.druid.segment.filter; import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; +import com.google.common.base.Supplier; import com.google.common.collect.Iterables; import com.metamx.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; -import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; -import io.druid.segment.column.ColumnCapabilities; import java.util.Set; @@ -41,12 +42,19 @@ public class InFilter implements Filter private final String dimension; private final Set values; private final ExtractionFn extractionFn; + private final Supplier longPredicateSupplier; - public InFilter(String dimension, Set values, ExtractionFn extractionFn) + public InFilter( + String dimension, + Set values, + Supplier longPredicateSupplier, + ExtractionFn extractionFn + ) { this.dimension = dimension; this.values = values; this.extractionFn = extractionFn; + this.longPredicateSupplier = longPredicateSupplier; } @Override @@ -69,15 +77,7 @@ public class InFilter implements Filter return Filters.matchPredicate( dimension, selector, - new Predicate() - { - @Override - public boolean apply(String input) - { - // InDimFilter converts all null "values" to empty. - return values.contains(Strings.nullToEmpty(extractionFn.apply(input))); - } - } + getPredicateFactory().makeStringPredicate() ); } } @@ -85,19 +85,7 @@ public class InFilter implements Filter @Override public ValueMatcher makeMatcher(ValueMatcherFactory factory) { - return factory.makeValueMatcher( - dimension, new Predicate() - { - @Override - public boolean apply(String input) - { - if (extractionFn != null) { - input = extractionFn.apply(input); - } - return values.contains(Strings.nullToEmpty(input)); - } - } - ); + return factory.makeValueMatcher(dimension, getPredicateFactory()); } @Override @@ -105,4 +93,51 @@ public class InFilter implements Filter { return selector.getBitmapIndex(dimension) != null; } + + private DruidPredicateFactory getPredicateFactory() + { + return new DruidPredicateFactory() + { + @Override + public Predicate makeStringPredicate() + { + if (extractionFn != null) { + return new Predicate() + { + @Override + public boolean apply(String input) + { + return values.contains(Strings.nullToEmpty(extractionFn.apply(input))); + } + }; + } else { + return new Predicate() + { + @Override + public boolean apply(String input) + { + return values.contains(Strings.nullToEmpty(input)); + } + }; + } + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + if (extractionFn != null) { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return values.contains(extractionFn.apply(input)); + } + }; + } else { + return longPredicateSupplier.get(); + } + } + }; + } } diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index 69125e1e119..bd5fdaaf54e 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -20,28 +20,31 @@ package io.druid.segment.filter; import com.google.common.base.Predicate; +import com.google.common.base.Strings; import com.metamx.collections.bitmap.ImmutableBitmap; import io.druid.query.filter.BitmapIndexSelector; +import io.druid.query.filter.DruidLongPredicate; import io.druid.query.filter.Filter; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; import org.mozilla.javascript.Context; public class JavaScriptFilter implements Filter { private final String dimension; - private final JavaScriptDimFilter.JavaScriptPredicate predicate; + private final JavaScriptDimFilter.JavaScriptPredicateFactory predicateFactory; public JavaScriptFilter( String dimension, - JavaScriptDimFilter.JavaScriptPredicate predicate + JavaScriptDimFilter.JavaScriptPredicateFactory predicate ) { this.dimension = dimension; - this.predicate = predicate; + this.predicateFactory = predicate; } @Override @@ -54,7 +57,7 @@ public class JavaScriptFilter implements Filter @Override public boolean apply(String input) { - return predicate.applyInContext(cx, input); + return predicateFactory.applyInContext(cx, input); } }; @@ -69,7 +72,7 @@ public class JavaScriptFilter implements Filter public ValueMatcher makeMatcher(ValueMatcherFactory factory) { // suboptimal, since we need create one context per call to predicate.apply() - return factory.makeValueMatcher(dimension, predicate); + return factory.makeValueMatcher(dimension, predicateFactory); } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/RegexFilter.java b/processing/src/main/java/io/druid/segment/filter/RegexFilter.java index ee5549eb424..0c7eb7e5384 100644 --- a/processing/src/main/java/io/druid/segment/filter/RegexFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/RegexFilter.java @@ -21,6 +21,8 @@ package io.druid.segment.filter; import com.google.common.base.Predicate; import io.druid.query.extraction.ExtractionFn; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import java.util.regex.Pattern; @@ -36,12 +38,32 @@ public class RegexFilter extends DimensionPredicateFilter { super( dimension, - new Predicate() + new DruidPredicateFactory() { @Override - public boolean apply(String input) + public Predicate makeStringPredicate() { - return (input != null) && pattern.matcher(input).find(); + return new Predicate() + { + @Override + public boolean apply(String input) + { + return (input != null) && pattern.matcher(input).find(); + } + }; + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return pattern.matcher(String.valueOf(input)).find(); + } + }; } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/SearchQueryFilter.java b/processing/src/main/java/io/druid/segment/filter/SearchQueryFilter.java index a2e37270b14..215e5cd5962 100644 --- a/processing/src/main/java/io/druid/segment/filter/SearchQueryFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SearchQueryFilter.java @@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Predicate; import io.druid.query.extraction.ExtractionFn; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.search.search.SearchQuerySpec; import javax.annotation.Nullable; @@ -40,20 +42,32 @@ public class SearchQueryFilter extends DimensionPredicateFilter { super( dimension, - new Predicate() + new DruidPredicateFactory() { @Override - public boolean apply(@Nullable String input) + public Predicate makeStringPredicate() { - return query.accept(input); + return new Predicate() + { + @Override + public boolean apply(@Nullable String input) + { + return query.accept(input); + } + }; } @Override - public String toString() + public DruidLongPredicate makeLongPredicate() { - return "SearchQueryFilter{" + - ", query=" + query + - '}'; + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return query.accept(String.valueOf(input)); + } + }; } }, extractionFn diff --git a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java index 35d5aeb223f..988f61eccb8 100644 --- a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java @@ -23,11 +23,11 @@ import com.google.common.base.Predicate; import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.spatial.search.Bound; import io.druid.query.filter.BitmapIndexSelector; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; -import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; -import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.incremental.SpatialDimensionRowTransformer; /** @@ -58,17 +58,37 @@ public class SpatialFilter implements Filter { return factory.makeValueMatcher( dimension, - new Predicate() + new DruidPredicateFactory() { @Override - public boolean apply(Object input) + public Predicate makeStringPredicate() { - if (input instanceof String) { - final float[] coordinate = SpatialDimensionRowTransformer.decode((String) input); - return bound.contains(coordinate); - } else { - return false; - } + return new Predicate() + { + @Override + public boolean apply(String input) + { + if (input == null) { + return false; + } + final float[] coordinate = SpatialDimensionRowTransformer.decode(input); + return bound.contains(coordinate); + } + }; + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + // SpatialFilter does not currently support longs + return false; + } + }; } } ); diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 9707fada524..45cef452b27 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -253,6 +253,16 @@ public abstract class IncrementalIndex implements Iterable, } } + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + // This ColumnSelectorFactory implementation has no knowledge of column capabilities. + // However, this method may still be called by FilteredAggregatorFactory's ValueMatcherFactory + // to check column types. + // Just return null, the caller will assume default types in that case. + return null; + } + @Override public DimensionSelector makeDimensionSelector( DimensionSpec dimensionSpec @@ -429,6 +439,11 @@ public abstract class IncrementalIndex implements Iterable, columnCapabilities.put(dimSchema.getName(), capabilities); } + //__time capabilites + ColumnCapabilitiesImpl timeCapabilities = new ColumnCapabilitiesImpl(); + timeCapabilities.setType(ValueType.LONG); + columnCapabilities.put(Column.TIME_COLUMN_NAME, timeCapabilities); + // This should really be more generic List spatialDimensions = dimensionsSpec.getSpatialDimensions(); if (!spatialDimensions.isEmpty()) { diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index ca2b22db7c6..f749bf9554a 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -27,12 +27,15 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.primitives.Ints; +import com.metamx.common.UOE; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; import io.druid.granularity.QueryGranularity; import io.druid.query.QueryInterruptedException; import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; @@ -48,10 +51,12 @@ import io.druid.segment.SingleScanTimeDimSelector; import io.druid.segment.StorageAdapter; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedInts; import io.druid.segment.data.ListIndexed; import io.druid.segment.filter.BooleanValueMatcher; +import io.druid.segment.filter.Filters; import io.druid.segment.serde.ComplexMetricSerde; import io.druid.segment.serde.ComplexMetrics; import org.joda.time.DateTime; @@ -213,11 +218,6 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter new Function() { EntryHolder currEntry = new EntryHolder(); - private final ValueMatcher filterMatcher; - - { - filterMatcher = makeFilterMatcher(filter, currEntry); - } @Override public Cursor apply(@Nullable final Long input) @@ -226,6 +226,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter return new Cursor() { + private final ValueMatcher filterMatcher = makeFilterMatcher(filter, this, currEntry); private Iterator> baseIter; private ConcurrentNavigableMap cursorMap; final DateTime time; @@ -599,6 +600,12 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter return null; } + + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + return index.getCapabilities(columnName); + } }; } } @@ -613,11 +620,11 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter return value == null; } - private ValueMatcher makeFilterMatcher(final Filter filter, final EntryHolder holder) + private ValueMatcher makeFilterMatcher(final Filter filter, final Cursor cursor, final EntryHolder holder) { return filter == null ? new BooleanValueMatcher(true) - : filter.makeMatcher(new EntryHolderValueMatcherFactory(holder)); + : filter.makeMatcher(new CursorAndEntryHolderValueMatcherFactory(cursor, holder)); } private static class EntryHolder @@ -645,20 +652,28 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter } } - private class EntryHolderValueMatcherFactory implements ValueMatcherFactory + + private class CursorAndEntryHolderValueMatcherFactory implements ValueMatcherFactory { private final EntryHolder holder; + private final Cursor cursor; - public EntryHolderValueMatcherFactory( + public CursorAndEntryHolderValueMatcherFactory( + Cursor cursor, EntryHolder holder ) { + this.cursor = cursor; this.holder = holder; } @Override public ValueMatcher makeValueMatcher(String dimension, final Comparable value) { + if (getTypeForDimension(dimension) == ValueType.LONG) { + return Filters.getLongValueMatcher(cursor.makeLongColumnSelector(dimension), value); + } + IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimension); if (dimensionDesc == null) { return new BooleanValueMatcher(isComparableNullOrEmpty(value)); @@ -701,7 +716,20 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter } @Override - public ValueMatcher makeValueMatcher(String dimension, final Predicate predicate) + public ValueMatcher makeValueMatcher(String dimension, final DruidPredicateFactory predicateFactory) + { + ValueType type = getTypeForDimension(dimension); + switch (type) { + case LONG: + return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); + case STRING: + return makeStringValueMatcher(dimension, predicateFactory.makeStringPredicate()); + default: + return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); + } + } + + private ValueMatcher makeStringValueMatcher(String dimension, final Predicate predicate) { IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimension); if (dimensionDesc == null) { @@ -721,7 +749,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter } for (int dimVal : dims[dimIndex]) { - if (predicate.apply(dimDim.getValue(dimVal))) { + if (predicate.apply((String) dimDim.getValue(dimVal))) { return true; } } @@ -729,6 +757,17 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter } }; } + + private ValueMatcher makeLongValueMatcher(String dimension, DruidLongPredicate predicate) + { + return Filters.getLongPredicateMatcher(cursor.makeLongColumnSelector(dimension), predicate); + } + + private ValueType getTypeForDimension(String dimension) + { + ColumnCapabilities capabilities = index.getCapabilities(dimension); + return capabilities == null ? ValueType.STRING : capabilities.getType(); + } } @Override diff --git a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java index 7fe2a8ee64f..98a6361c9cb 100644 --- a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java @@ -34,6 +34,7 @@ import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; import io.druid.segment.LongColumnSelector; import io.druid.segment.ObjectColumnSelector; +import io.druid.segment.column.ColumnCapabilities; import java.util.List; import java.util.Map; @@ -514,6 +515,12 @@ public class OnheapIncrementalIndex extends IncrementalIndex return prev != null ? prev : newSelector; } } + + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + return delegate.getColumnCapabilities(columnName); + } } } diff --git a/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java index 18236669c53..11c0a6fb28f 100644 --- a/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java @@ -40,6 +40,9 @@ import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; import io.druid.segment.LongColumnSelector; import io.druid.segment.ObjectColumnSelector; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ColumnCapabilitiesImpl; +import io.druid.segment.column.ValueType; import io.druid.segment.data.ArrayBasedIndexedInts; import io.druid.segment.data.IndexedInts; import org.junit.Assert; @@ -162,6 +165,24 @@ public class FilteredAggregatorTest { throw new UnsupportedOperationException(); } + + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + ColumnCapabilitiesImpl caps; + if (columnName.equals("value")) { + caps = new ColumnCapabilitiesImpl(); + caps.setType(ValueType.FLOAT); + caps.setDictionaryEncoded(false); + caps.setHasBitmapIndexes(false); + } else { + caps = new ColumnCapabilitiesImpl(); + caps.setType(ValueType.STRING); + caps.setDictionaryEncoded(true); + caps.setHasBitmapIndexes(true); + } + return caps; + } }; } diff --git a/processing/src/test/java/io/druid/query/aggregation/JavaScriptAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/JavaScriptAggregatorTest.java index 7d9272c03ac..fb8004e89ed 100644 --- a/processing/src/test/java/io/druid/query/aggregation/JavaScriptAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/JavaScriptAggregatorTest.java @@ -29,6 +29,7 @@ import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; import io.druid.segment.LongColumnSelector; import io.druid.segment.ObjectColumnSelector; +import io.druid.segment.column.ColumnCapabilities; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -69,6 +70,12 @@ public class JavaScriptAggregatorTest { return null; } + + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + return null; + } }; static { diff --git a/processing/src/test/java/io/druid/query/groupby/epinephelinae/TestColumnSelectorFactory.java b/processing/src/test/java/io/druid/query/groupby/epinephelinae/TestColumnSelectorFactory.java index 1b97037c192..77c57c0bf3b 100644 --- a/processing/src/test/java/io/druid/query/groupby/epinephelinae/TestColumnSelectorFactory.java +++ b/processing/src/test/java/io/druid/query/groupby/epinephelinae/TestColumnSelectorFactory.java @@ -26,6 +26,7 @@ import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; import io.druid.segment.LongColumnSelector; import io.druid.segment.ObjectColumnSelector; +import io.druid.segment.column.ColumnCapabilities; public class TestColumnSelectorFactory implements ColumnSelectorFactory { @@ -86,4 +87,10 @@ public class TestColumnSelectorFactory implements ColumnSelectorFactory } }; } + + @Override + public ColumnCapabilities getColumnCapabilities(String columnName) + { + return null; + } } diff --git a/processing/src/test/java/io/druid/segment/filter/FilterPartitionTest.java b/processing/src/test/java/io/druid/segment/filter/FilterPartitionTest.java index 1f9c6d8a37d..022a2c7c39b 100644 --- a/processing/src/test/java/io/druid/segment/filter/FilterPartitionTest.java +++ b/processing/src/test/java/io/druid/segment/filter/FilterPartitionTest.java @@ -36,15 +36,14 @@ import io.druid.data.input.impl.MapInputRowParser; import io.druid.data.input.impl.TimeAndDimsParseSpec; import io.druid.data.input.impl.TimestampSpec; import io.druid.js.JavaScriptConfig; -import io.druid.query.aggregation.Aggregator; -import io.druid.query.aggregation.CountAggregatorFactory; -import io.druid.query.aggregation.FilteredAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.JavaScriptExtractionFn; import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.DruidLongPredicate; +import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.OrDimFilter; import io.druid.query.filter.SelectorDimFilter; @@ -91,11 +90,11 @@ public class FilterPartitionTest extends BaseFilterTest { public NoBitmapDimensionPredicateFilter( final String dimension, - final Predicate predicate, + final DruidPredicateFactory predicateFactory, final ExtractionFn extractionFn ) { - super(dimension, predicate, extractionFn); + super(dimension, predicateFactory, extractionFn); } @Override @@ -125,15 +124,36 @@ public class FilterPartitionTest extends BaseFilterTest return new NoBitmapSelectorFilter(dimension, value); } else { final String valueOrNull = Strings.emptyToNull(value); - final Predicate predicate = new Predicate() + final DruidPredicateFactory predicateFactory = new DruidPredicateFactory() { @Override - public boolean apply(String input) + public Predicate makeStringPredicate() { - return Objects.equals(valueOrNull, input); + return new Predicate() + { + @Override + public boolean apply(String input) + { + return Objects.equals(valueOrNull, input); + } + }; + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + return new DruidLongPredicate() + { + @Override + public boolean applyLong(long input) + { + return Objects.equals(valueOrNull, String.valueOf(input)); + } + }; } }; - return new NoBitmapDimensionPredicateFilter(dimension, predicate, extractionFn); + + return new NoBitmapDimensionPredicateFilter(dimension, predicateFactory, extractionFn); } } } diff --git a/processing/src/test/java/io/druid/segment/filter/InvalidFilteringTest.java b/processing/src/test/java/io/druid/segment/filter/InvalidFilteringTest.java new file mode 100644 index 00000000000..394036aad82 --- /dev/null +++ b/processing/src/test/java/io/druid/segment/filter/InvalidFilteringTest.java @@ -0,0 +1,179 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.metamx.common.Pair; +import io.druid.data.input.InputRow; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.InputRowParser; +import io.druid.data.input.impl.MapInputRowParser; +import io.druid.data.input.impl.TimeAndDimsParseSpec; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.js.JavaScriptConfig; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.CountAggregatorFactory; +import io.druid.query.aggregation.DoubleMaxAggregatorFactory; +import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; +import io.druid.query.extraction.MapLookupExtractor; +import io.druid.query.filter.BoundDimFilter; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.JavaScriptDimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SearchQueryDimFilter; +import io.druid.query.filter.SelectorDimFilter; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; +import io.druid.query.search.search.ContainsSearchQuerySpec; +import io.druid.segment.IndexBuilder; +import io.druid.segment.StorageAdapter; +import io.druid.segment.incremental.IncrementalIndexSchema; +import org.joda.time.DateTime; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class InvalidFilteringTest extends BaseFilterTest +{ + private static final String COUNT_COLUMN = "count"; + private static final String TIMESTAMP_COLUMN = "ts"; + + private static final InputRowParser> PARSER = new MapInputRowParser( + new TimeAndDimsParseSpec( + new TimestampSpec(TIMESTAMP_COLUMN, "millis", new DateTime("2000")), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3")), + null, + null + ) + ) + ); + + private static final InputRow row0 = PARSER.parse(ImmutableMap.of("ts", 1L, "dim0", "1", "dim1", "", "dim2", ImmutableList.of("a", "b"))); + private static final InputRow row1 = PARSER.parse(ImmutableMap.of("ts", 2L, "dim0", "2", "dim1", "10", "dim2", ImmutableList.of())); + private static final InputRow row2 = PARSER.parse(ImmutableMap.of("ts", 3L, "dim0", "3", "dim1", "2", "dim2", ImmutableList.of(""))); + private static final InputRow row3 = PARSER.parse(ImmutableMap.of("ts", 4L, "dim0", "4", "dim1", "1", "dim2", ImmutableList.of("a"))); + private static final InputRow row4 = PARSER.parse(ImmutableMap.of("ts", 5L, "dim0", "5", "dim1", "def", "dim2", ImmutableList.of("c"))); + private static final InputRow row5 = PARSER.parse(ImmutableMap.of("ts", 6L, "dim0", "6", "dim1", "abc")); + + private static final List ROWS = ImmutableList.of( + row0, + row1, + row2, + row3, + row4, + row5 + ); + + public InvalidFilteringTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean optimize + ) + { + super(testName, ROWS, overrideIndexBuilderSchema(indexBuilder), finisher, optimize); + } + + private static IndexBuilder overrideIndexBuilderSchema(IndexBuilder indexBuilder) + { + IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[]{ + new CountAggregatorFactory("count"), + new HyperUniquesAggregatorFactory("hyperion", "dim1"), + new DoubleMaxAggregatorFactory("dmax", "dim0") + }).build(); + + return indexBuilder.schema(schema); + } + + @AfterClass + public static void tearDown() throws Exception + { + BaseFilterTest.tearDown(InvalidFilteringTest.class.getName()); + } + + @Test + public void testFilterTheUnfilterable() + { + // single value matching + assertFilterMatches( + new SelectorDimFilter("hyperion", "a string", null), + ImmutableList.of() + ); + + assertFilterMatches( + new SelectorDimFilter("hyperion", null, null), + ImmutableList.of("1", "2", "3", "4", "5", "6") + ); + + assertFilterMatches( + new SelectorDimFilter("dmax", "another string", null), + ImmutableList.of() + ); + + assertFilterMatches( + new SelectorDimFilter("dmax", null, null), + ImmutableList.of("1", "2", "3", "4", "5", "6") + ); + + // predicate based matching + assertFilterMatches( + new InDimFilter("hyperion", Arrays.asList("hello", "world"), null), + ImmutableList.of() + ); + + assertFilterMatches( + new InDimFilter("hyperion", Arrays.asList("hello", "world", null), null), + ImmutableList.of("1", "2", "3", "4", "5", "6") + ); + + assertFilterMatches( + new InDimFilter("dmax", Arrays.asList("hello", "world"), null), + ImmutableList.of() + ); + + assertFilterMatches( + new InDimFilter("dmax", Arrays.asList("hello", "world", null), null), + ImmutableList.of("1", "2", "3", "4", "5", "6") + ); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows + ) + { + Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0")); + Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter)); + } +} diff --git a/processing/src/test/java/io/druid/segment/filter/LongFilteringTest.java b/processing/src/test/java/io/druid/segment/filter/LongFilteringTest.java new file mode 100644 index 00000000000..54a96f64a36 --- /dev/null +++ b/processing/src/test/java/io/druid/segment/filter/LongFilteringTest.java @@ -0,0 +1,320 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.google.common.util.concurrent.MoreExecutors; +import com.metamx.common.Pair; +import io.druid.data.input.InputRow; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.InputRowParser; +import io.druid.data.input.impl.MapInputRowParser; +import io.druid.data.input.impl.TimeAndDimsParseSpec; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.js.JavaScriptConfig; +import io.druid.query.extraction.MapLookupExtractor; +import io.druid.query.filter.BoundDimFilter; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.JavaScriptDimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SearchQueryDimFilter; +import io.druid.query.filter.SelectorDimFilter; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; +import io.druid.query.search.search.ContainsSearchQuerySpec; +import io.druid.segment.IndexBuilder; +import io.druid.segment.StorageAdapter; +import org.joda.time.DateTime; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.RunnableFuture; +import java.util.concurrent.TimeUnit; + +@RunWith(Parameterized.class) +public class LongFilteringTest extends BaseFilterTest +{ + private static final String COUNT_COLUMN = "count"; + private static final String TIMESTAMP_COLUMN = "ts"; + private static int EXECUTOR_NUM_THREADS = 16; + private static int EXECUTOR_NUM_TASKS = 2000; + + private static final InputRowParser> PARSER = new MapInputRowParser( + new TimeAndDimsParseSpec( + new TimestampSpec(TIMESTAMP_COLUMN, "millis", new DateTime("2000")), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3")), + null, + null + ) + ) + ); + + private static final InputRow row0 = PARSER.parse(ImmutableMap.of("ts", 1L, "dim0", "1", "dim1", "", "dim2", ImmutableList.of("a", "b"))); + private static final InputRow row1 = PARSER.parse(ImmutableMap.of("ts", 2L, "dim0", "2", "dim1", "10", "dim2", ImmutableList.of())); + private static final InputRow row2 = PARSER.parse(ImmutableMap.of("ts", 3L, "dim0", "3", "dim1", "2", "dim2", ImmutableList.of(""))); + private static final InputRow row3 = PARSER.parse(ImmutableMap.of("ts", 4L, "dim0", "4", "dim1", "1", "dim2", ImmutableList.of("a"))); + private static final InputRow row4 = PARSER.parse(ImmutableMap.of("ts", 5L, "dim0", "5", "dim1", "def", "dim2", ImmutableList.of("c"))); + private static final InputRow row5 = PARSER.parse(ImmutableMap.of("ts", 6L, "dim0", "6", "dim1", "abc")); + + private static final List ROWS = ImmutableList.of( + row0, + row1, row1, + row2, row2, row2, + row3, row3, row3, row3, + row4, row4, row4, row4, row4, + row5, row5, row5, row5, row5, row5 + ); + + public LongFilteringTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean optimize + ) + { + super(testName, ROWS, indexBuilder, finisher, optimize); + } + + @AfterClass + public static void tearDown() throws Exception + { + BaseFilterTest.tearDown(LongFilteringTest.class.getName()); + } + + @Test + public void testTimeFilterAsLong() + { + assertFilterMatches( + new SelectorDimFilter(COUNT_COLUMN, "0", null), + ImmutableList.of() + ); + + assertFilterMatches( + new SelectorDimFilter(COUNT_COLUMN, "3", null), + ImmutableList.of("3") + ); + + assertFilterMatches( + new BoundDimFilter(COUNT_COLUMN, "2", "5", false, false, true, null), + ImmutableList.of("2", "3", "4", "5") + ); + + assertFilterMatches( + new BoundDimFilter(COUNT_COLUMN, "1", "4", true, true, true, null), + ImmutableList.of("2", "3") + ); + + assertFilterMatches( + new InDimFilter(COUNT_COLUMN, Arrays.asList("2", "4", "8"), null), + ImmutableList.of("2", "4") + ); + + // cross the hashing threshold to test hashset implementation, filter on even values + List infilterValues = new ArrayList<>(InDimFilter.LONG_HASHING_THRESHOLD * 2); + for (int i = 0; i < InDimFilter.LONG_HASHING_THRESHOLD * 2; i++) { + infilterValues.add(String.valueOf(i * 2)); + } + assertFilterMatches( + new InDimFilter(COUNT_COLUMN, infilterValues, null), + ImmutableList.of("2", "4", "6") + ); + + String jsFn = "function(x) { return(x === 3 || x === 5) }"; + assertFilterMatches( + new JavaScriptDimFilter(COUNT_COLUMN, jsFn, null, JavaScriptConfig.getDefault()), + ImmutableList.of("3", "5") + ); + + assertFilterMatches( + new RegexDimFilter(COUNT_COLUMN, "4", null), + ImmutableList.of("4") + ); + + assertFilterMatches( + new SearchQueryDimFilter(COUNT_COLUMN, new ContainsSearchQuerySpec("2", true), null), + ImmutableList.of("2") + ); + } + + @Test + public void testLongFilterWithExtractionFn() + { + final Map stringMap = new HashMap<>(); + stringMap.put("1", "Monday"); + stringMap.put("2", "Tuesday"); + stringMap.put("3", "Wednesday"); + stringMap.put("4", "Thursday"); + stringMap.put("5", "Friday"); + stringMap.put("6", "Saturday"); + LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); + LookupExtractionFn exfn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); + + assertFilterMatches( + new SelectorDimFilter(COUNT_COLUMN, "Monday", exfn), + ImmutableList.of("1") + ); + assertFilterMatches( + new SelectorDimFilter(COUNT_COLUMN, "Notaday", exfn), + ImmutableList.of() + ); + + assertFilterMatches( + new BoundDimFilter(COUNT_COLUMN, "Fridax", "Fridaz", false, false, true, exfn), + ImmutableList.of("5") + ); + assertFilterMatches( + new BoundDimFilter(COUNT_COLUMN, "Friday", "Friday", true, true, true, exfn), + ImmutableList.of() + ); + + assertFilterMatches( + new InDimFilter(COUNT_COLUMN, Arrays.asList("Caturday", "Saturday", "Tuesday"), exfn), + ImmutableList.of("2", "6") + ); + + // test InFilter HashSet implementation + List bigList = Arrays.asList( + "Saturday", "Tuesday", + "Caturday", "Xanaday", "Vojuday", "Gribaday", "Kipoday", "Dheferday", "Fakeday", "Qeearaday", + "Hello", "World", "1", "2", "3", "4", "5", "6", "7" + ); + assertFilterMatches( + new InDimFilter(COUNT_COLUMN, bigList, exfn), + ImmutableList.of("2", "6") + ); + + String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }"; + assertFilterMatches( + new JavaScriptDimFilter(COUNT_COLUMN, jsFn, exfn, JavaScriptConfig.getDefault()), + ImmutableList.of("3", "4") + ); + + assertFilterMatches( + new RegexDimFilter(COUNT_COLUMN, ".*day", exfn), + ImmutableList.of("1", "2", "3", "4", "5", "6") + ); + + assertFilterMatches( + new SearchQueryDimFilter(COUNT_COLUMN, new ContainsSearchQuerySpec("s", true), exfn), + ImmutableList.of("2", "3", "4") + ); + } + + @Test + public void testSelectorAndInFilterMultithreaded() + { + assertFilterMatchesMultithreaded( + new SelectorDimFilter(COUNT_COLUMN, "3", null), + ImmutableList.of("3") + ); + + assertFilterMatchesMultithreaded( + new InDimFilter(COUNT_COLUMN, Arrays.asList("2", "4", "8"), null), + ImmutableList.of("2", "4") + ); + + // cross the hashing threshold to test hashset implementation, filter on even values + List infilterValues = new ArrayList<>(InDimFilter.LONG_HASHING_THRESHOLD * 2); + for (int i = 0; i < InDimFilter.LONG_HASHING_THRESHOLD * 2; i++) { + infilterValues.add(String.valueOf(i * 2)); + } + assertFilterMatchesMultithreaded( + new InDimFilter(COUNT_COLUMN, infilterValues, null), + ImmutableList.of("2", "4", "6") + ); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows + ) + { + Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0")); + Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter)); + } + + private void assertFilterMatchesMultithreaded( + final DimFilter filter, + final List expectedRows + ) + { + testWithExecutor(filter, expectedRows); + } + + private Runnable makeFilterRunner( + final DimFilter filter, + final List expectedRows + ) + { + return new Runnable() + { + @Override + public void run() + { + Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0")); + Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter)); + } + }; + } + + private void testWithExecutor( + final DimFilter filter, + final List expectedRows + ) + { + ListeningExecutorService executor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(EXECUTOR_NUM_THREADS)); + + List> futures = new ArrayList<>(); + + for (int i = 0; i < EXECUTOR_NUM_TASKS; i++) { + Runnable runnable = makeFilterRunner(filter, expectedRows); + ListenableFuture fut = executor.submit(runnable); + futures.add(fut); + } + + try { + Futures.allAsList(futures).get(60, TimeUnit.SECONDS); + } + catch (Exception ex) { + Assert.fail(ex.getMessage()); + } + + executor.shutdown(); + } +} diff --git a/processing/src/test/java/io/druid/segment/filter/TimeFilteringTest.java b/processing/src/test/java/io/druid/segment/filter/TimeFilteringTest.java new file mode 100644 index 00000000000..de6da1fbece --- /dev/null +++ b/processing/src/test/java/io/druid/segment/filter/TimeFilteringTest.java @@ -0,0 +1,240 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.metamx.common.Pair; +import io.druid.data.input.InputRow; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.InputRowParser; +import io.druid.data.input.impl.MapInputRowParser; +import io.druid.data.input.impl.TimeAndDimsParseSpec; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.js.JavaScriptConfig; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.MapLookupExtractor; +import io.druid.query.extraction.TimeFormatExtractionFn; +import io.druid.query.filter.BoundDimFilter; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.JavaScriptDimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SearchQueryDimFilter; +import io.druid.query.filter.SelectorDimFilter; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; +import io.druid.query.search.search.ContainsSearchQuerySpec; +import io.druid.segment.IndexBuilder; +import io.druid.segment.StorageAdapter; +import io.druid.segment.column.Column; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class TimeFilteringTest extends BaseFilterTest +{ + private static final String TIMESTAMP_COLUMN = "ts"; + + private static final InputRowParser> PARSER = new MapInputRowParser( + new TimeAndDimsParseSpec( + new TimestampSpec(TIMESTAMP_COLUMN, "millis", new DateTime("2000")), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3")), + null, + null + ) + ) + ); + + private static final List ROWS = ImmutableList.of( + PARSER.parse(ImmutableMap.of("ts", 0L, "dim0", "0", "dim1", "", "dim2", ImmutableList.of("a", "b"))), + PARSER.parse(ImmutableMap.of("ts", 1L, "dim0", "1", "dim1", "10", "dim2", ImmutableList.of())), + PARSER.parse(ImmutableMap.of("ts", 2L, "dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""))), + PARSER.parse(ImmutableMap.of("ts", 3L, "dim0", "3", "dim1", "1", "dim2", ImmutableList.of("a"))), + PARSER.parse(ImmutableMap.of("ts", 4L, "dim0", "4", "dim1", "def", "dim2", ImmutableList.of("c"))), + PARSER.parse(ImmutableMap.of("ts", 5L, "dim0", "5", "dim1", "abc")) + ); + + public TimeFilteringTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean optimize + ) + { + super(testName, ROWS, indexBuilder, finisher, optimize); + } + + @AfterClass + public static void tearDown() throws Exception + { + BaseFilterTest.tearDown(TimeFilteringTest.class.getName()); + } + + @Test + public void testTimeFilterAsLong() + { + assertFilterMatches( + new SelectorDimFilter(Column.TIME_COLUMN_NAME, "0", null), + ImmutableList.of("0") + ); + assertFilterMatches( + new SelectorDimFilter(Column.TIME_COLUMN_NAME, "9000", null), + ImmutableList.of() + ); + + assertFilterMatches( + new BoundDimFilter(Column.TIME_COLUMN_NAME, "0", "4", false, false, true, null), + ImmutableList.of("0", "1", "2", "3", "4") + ); + assertFilterMatches( + new BoundDimFilter(Column.TIME_COLUMN_NAME, "0", "4", true, true, true, null), + ImmutableList.of("1", "2", "3") + ); + + assertFilterMatches( + new InDimFilter(Column.TIME_COLUMN_NAME, Arrays.asList("2", "4", "8"), null), + ImmutableList.of("2", "4") + ); + + // cross the hashing threshold to test hashset implementation, filter on even values + List infilterValues = new ArrayList<>(InDimFilter.LONG_HASHING_THRESHOLD * 2); + for (int i = 0; i < InDimFilter.LONG_HASHING_THRESHOLD * 2; i++) { + infilterValues.add(String.valueOf(i*2)); + } + assertFilterMatches( + new InDimFilter(Column.TIME_COLUMN_NAME, infilterValues, null), + ImmutableList.of("0", "2", "4") + ); + + String jsFn = "function(x) { return(x === 3 || x === 5) }"; + assertFilterMatches( + new JavaScriptDimFilter(Column.TIME_COLUMN_NAME, jsFn, null, JavaScriptConfig.getDefault()), + ImmutableList.of("3", "5") + ); + + assertFilterMatches( + new RegexDimFilter(Column.TIME_COLUMN_NAME, "4", null), + ImmutableList.of("4") + ); + + assertFilterMatches( + new SearchQueryDimFilter(Column.TIME_COLUMN_NAME, new ContainsSearchQuerySpec("2", true), null), + ImmutableList.of("2") + ); + } + + @Test + public void testTimeFilterWithExtractionFn() + { + final Map stringMap = new HashMap<>(); + stringMap.put("0", "Monday"); + stringMap.put("1", "Tuesday"); + stringMap.put("2", "Wednesday"); + stringMap.put("3", "Thursday"); + stringMap.put("4", "Friday"); + stringMap.put("5", "Saturday"); + LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); + LookupExtractionFn exfn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); + + assertFilterMatches( + new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Monday", exfn), + ImmutableList.of("0") + ); + assertFilterMatches( + new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Notaday", exfn), + ImmutableList.of() + ); + + assertFilterMatches( + new BoundDimFilter(Column.TIME_COLUMN_NAME, "Fridax", "Fridaz", false, false, true, exfn), + ImmutableList.of("4") + ); + assertFilterMatches( + new BoundDimFilter(Column.TIME_COLUMN_NAME, "Friday", "Friday", true, true, true, exfn), + ImmutableList.of() + ); + + assertFilterMatches( + new InDimFilter(Column.TIME_COLUMN_NAME, Arrays.asList("Caturday", "Saturday", "Tuesday"), exfn), + ImmutableList.of("1", "5") + ); + + // test InFilter HashSet implementation + List bigList = Arrays.asList( + "Saturday", "Tuesday", + "Caturday", "Xanaday", "Vojuday", "Gribaday", "Kipoday", "Dheferday", "Fakeday", "Qeearaday", + "Hello", "World", "1", "2", "3", "4", "5", "6", "7" + ); + assertFilterMatches( + new InDimFilter(Column.TIME_COLUMN_NAME, bigList, exfn), + ImmutableList.of("1", "5") + ); + + String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }"; + assertFilterMatches( + new JavaScriptDimFilter(Column.TIME_COLUMN_NAME, jsFn, exfn, JavaScriptConfig.getDefault()), + ImmutableList.of("2", "3") + ); + + assertFilterMatches( + new RegexDimFilter(Column.TIME_COLUMN_NAME, ".*day", exfn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + + assertFilterMatches( + new SearchQueryDimFilter(Column.TIME_COLUMN_NAME, new ContainsSearchQuerySpec("s", true), exfn), + ImmutableList.of("1", "2", "3") + ); + } + + @Test + public void testTimeFilterWithTimeFormatExtractionFn() + { + ExtractionFn exfn = new TimeFormatExtractionFn("EEEE", DateTimeZone.forID("America/New_York"), "en"); + assertFilterMatches( + new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Wednesday", exfn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows + ) + { + Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0")); + Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter)); + } +}