From d3f5bddc5cb30dade742f278c1c00e65b2e9c2e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Thu, 26 Feb 2015 17:13:07 -0800 Subject: [PATCH] Add ability to apply extraction functions to the time dimension - Moves DimExtractionFn under a more generic ExtractionFn interface to support extracting dimension values other than strings - pushes down extractionFn to the storage adapter from query engine - 'dimExtractionFn' parameter has been deprecated in favor of 'extractionFn' - adds a TimeFormatExtractionFn, allowing to project the '__time' dimension - JavascriptDimExtractionFn renamed to JavascriptExtractionFn, adding support for any dimension value types that map directly to Javascript - update documentation for time column extraction and related changes --- docs/content/DimensionSpecs.md | 120 +++++++++-- docs/content/Granularities.md | 1 + .../IngestSegmentFirehoseFactory.java | 2 +- .../CardinalityAggregatorFactory.java | 2 +- .../query/dimension/DefaultDimensionSpec.java | 4 +- .../druid/query/dimension/DimensionSpec.java | 4 +- .../dimension/ExtractionDimensionSpec.java | 28 ++- .../query/extraction/DimExtractionFn.java | 88 +++----- .../druid/query/extraction/ExtractionFn.java | 78 +++++++ ...ionFn.java => JavascriptExtractionFn.java} | 33 +-- .../extraction/MatchingDimExtractionFn.java | 2 +- .../extraction/RegexDimExtractionFn.java | 2 +- .../SearchQuerySpecDimExtractionFn.java | 2 +- .../query/extraction/TimeDimExtractionFn.java | 2 +- .../extraction/TimeFormatExtractionFn.java | 113 +++++++++++ .../query/filter/ExtractionDimFilter.java | 18 +- .../query/groupby/GroupByQueryEngine.java | 18 +- .../druid/query/search/SearchQueryRunner.java | 3 +- .../druid/query/select/SelectQueryEngine.java | 3 +- .../topn/DimExtractionTopNAlgorithm.java | 17 +- .../topn/TimeExtractionTopNAlgorithm.java | 139 +++++++++++++ .../query/topn/TopNAlgorithmSelector.java | 10 +- .../java/io/druid/query/topn/TopNMapFn.java | 5 +- .../java/io/druid/query/topn/TopNQuery.java | 4 +- .../io/druid/query/topn/TopNQueryEngine.java | 20 +- .../druid/segment/ColumnSelectorFactory.java | 6 +- .../segment/QueryableIndexStorageAdapter.java | 29 ++- .../java/io/druid/segment/StorageAdapter.java | 11 +- .../segment/filter/ExtractionFilter.java | 6 +- .../java/io/druid/segment/filter/Filters.java | 2 +- .../druid/segment/filter/SelectorFilter.java | 2 +- .../segment/incremental/IncrementalIndex.java | 9 +- .../IncrementalIndexStorageAdapter.java | 18 +- .../aggregation/FilteredAggregatorTest.java | 3 +- .../TimeFormatExtractionFnTest.java | 98 +++++++++ ...t.java => JavascriptExtractionFnTest.java} | 45 +++- .../MatchingDimExtractionFnTest.java | 6 +- .../extraction/RegexDimExtractionFnTest.java | 14 +- .../SearchQuerySpecDimExtractionFnTest.java | 6 +- .../extraction/TimeDimExtractionFnTest.java | 10 +- .../query/groupby/GroupByQueryRunnerTest.java | 81 +++++++- .../druid/query/topn/TopNQueryRunnerTest.java | 192 ++++++++++++++++-- .../filter/ExtractionDimFilterTest.java | 8 +- .../IncrementalIndexStorageAdapterTest.java | 4 +- 44 files changed, 1037 insertions(+), 231 deletions(-) create mode 100644 processing/src/main/java/io/druid/query/extraction/ExtractionFn.java rename processing/src/main/java/io/druid/query/extraction/{JavascriptDimExtractionFn.java => JavascriptExtractionFn.java} (80%) create mode 100644 processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java create mode 100644 processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java create mode 100644 processing/src/test/java/io/druid/query/extraction/TimeFormatExtractionFnTest.java rename processing/src/test/java/io/druid/query/extraction/extraction/{JavascriptDimExtractionFnTest.java => JavascriptExtractionFnTest.java} (89%) diff --git a/docs/content/DimensionSpecs.md b/docs/content/DimensionSpecs.md index 690324df456..c4705913480 100644 --- a/docs/content/DimensionSpecs.md +++ b/docs/content/DimensionSpecs.md @@ -1,14 +1,16 @@ --- layout: doc_page --- + # Transforming Dimension Values -The following JSON fields can be used in a query to operate on dimension values. + +The following JSON fields can be used in a query to operate on dimension values. ## DimensionSpec `DimensionSpec`s define how dimension values get transformed prior to aggregation. -### DefaultDimensionSpec +### Default DimensionSpec Returns dimension values as is and optionally renames the dimension. @@ -16,63 +18,139 @@ Returns dimension values as is and optionally renames the dimension. { "type" : "default", "dimension" : , "outputName": } ``` -### ExtractionDimensionSpec +### Extraction DimensionSpec -Returns dimension values transformed using the given [DimExtractionFn](#toc_3) +Returns dimension values transformed using the given [extraction function](#extraction-functions). ```json { "type" : "extraction", "dimension" : , "outputName" : , - "dimExtractionFn" : + "extractionFn" : } ``` -## DimExtractionFn +## Extraction Functions -`DimExtractionFn`s define the transformation applied to each dimension value +Extraction functions define the transformation applied to each dimension value. -### RegexDimExtractionFn +Transformations can be applied to both regular (string) dimensions, as well +as the special `__time` dimension, which represents the current time bucket +according to the query [aggregation granularity](Granularities.html). -Returns the first group matched by the given regular expression. If there is no match it returns the dimension value as is. +**Note**: for functions taking string values (such as regular expressions), +`__time` dimension values will be formatted in [ISO-8601 format](https://en.wikipedia.org/wiki/ISO_8601) +before getting passed to the extraction function. + +### Regular Expression Extraction Function + +Returns the first matching group for the given regular expression. +If there is no match, it returns the dimension value as is. ```json -{ "type" : "regex", "expr", } +{ "type" : "regex", "expr" : } ``` -### PartialDimExtractionFn +For example, using `"expr" : "(\\w\\w\\w).*"` will transform +`'Monday'`, `'Tuesday'`, `'Wednesday'` into `'Mon'`, `'Tue'`, `'Wed'`. -Returns the dimension value as is if there is a match, otherwise returns null. +### Partial Extraction Function + +Returns the dimension value unchanged if the regular expression matches, otherwise returns null. ```json -{ "type" : "partial", "expr", } +{ "type" : "partial", "expr" : } ``` -### SearchQuerySpecDimExtractionFn +### Search Query Extraction Function -Returns the dimension value as is if the given [SearchQuerySpec](SearchQuerySpec.html) matches, otherwise returns null. +Returns the dimension value unchanged if the given [`SearchQuerySpec`](SearchQuerySpec.html) +matches, otherwise returns null. ```json { "type" : "searchQuery", "query" : } ``` -### TimeDimExtractionFn +### Time Format Extraction Function -Parses dimension values as timestamps using the given input format, and returns them formatted using the given output format. Time formats follow the [com.ibm.icu.text.SimpleDateFormat](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) format +Returns the dimension value formatted according to the given format string, time zone, and locale. + +For `__time` dimension values, this formats the time value bucketed by the +[aggregation granularity](Granularities.html) + +For a regular dimension, it assumes the string is formatted in +[ISO-8601 date and time format](https://en.wikipedia.org/wiki/ISO_8601). + +* `format` : date time format for the resulting dimension value, in [Joda Time DateTimeFormat](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html). +* `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](http://www.oracle.com/technetwork/java/javase/java8locales-2095355.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc. +* `timeZone` : time zone to use in [IANA tz database format](http://en.wikipedia.org/wiki/List_of_tz_database_time_zones), e.g. `Europe/Berlin` (this can possibly be different than the aggregation time-zone) ```json -{ "type" : "time", "timeFormat" : , "resultFormat" : } +{ "type" : "timeFormat", + "format" : , + "timeZone" : (optional), + "locale" : (optional) } ``` -### JavascriptDimExtractionFn +For example, the following dimension spec returns the day of the week for Montréal in French: -Returns the dimension value as transformed by the given JavaScript function. +```json +{ + "type" : "extraction", + "dimension" : "__time", + "outputName" : "dayOfWeek", + "extractionFn" : { + "type" : "timeFormat", + "format" : "EEEE", + "timeZone" : "America/Montreal", + "locale" : "fr" + } +} +``` -Example +### Time Parsing Extraction Function + +Parses dimension values as timestamps using the given input format, +and returns them formatted using the given output format. + +Note, if you are working with the `__time` dimension, you should consider using the +[time extraction function instead](#time-format-extraction-function) instead, +which works on time value directly as opposed to string values. + +Time formats are described in the +[SimpleDateFormat documentation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) + +```json +{ "type" : "time", + "timeFormat" : , + "resultFormat" : } +``` + + +### Javascript Extraction Function + +Returns the dimension value, as transformed by the given JavaScript function. + +For regular dimensions, the input value is passed as a string. + +For the `__time` dimension, the input value is passed as a number +representing the number of milliseconds since January 1, 1970 UTC. + +Example for a regular dimension ```json { "type" : "javascript", "function" : "function(str) { return str.substr(0, 3); }" } +``` + +Example for the `__time` dimension: + +```json +{ + "type" : "javascript", + "function" : "function(t) { return 'Second ' + Math.floor((t % 60000) / 1000); }" +} +``` diff --git a/docs/content/Granularities.md b/docs/content/Granularities.md index 162d22bf5c4..c2c6bdbb698 100644 --- a/docs/content/Granularities.md +++ b/docs/content/Granularities.md @@ -1,6 +1,7 @@ --- layout: doc_page --- + # Aggregation Granularity The granularity field determines how data gets bucketed across the time dimension, or how it gets aggregated by hour, day, minute, etc. diff --git a/indexing-service/src/main/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactory.java b/indexing-service/src/main/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactory.java index 6eaa1760b0b..a400a00785d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactory.java +++ b/indexing-service/src/main/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactory.java @@ -256,7 +256,7 @@ public class IngestSegmentFirehoseFactory implements FirehoseFactory dimSelectors = Maps.newHashMap(); for (String dim : dims) { - final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim); + final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim, null); // dimSelector is null if the dimension is not present if (dimSelector != null) { dimSelectors.put(dim, dimSelector); diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java index 1018f6734f7..289120cf976 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java @@ -105,7 +105,7 @@ public class CardinalityAggregatorFactory implements AggregatorFactory @Override public DimensionSelector apply(@Nullable String input) { - return columnFactory.makeDimensionSelector(input); + return columnFactory.makeDimensionSelector(input, null); } } ), Predicates.notNull() diff --git a/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java index 3d08ad817b3..4392ca0ca92 100644 --- a/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/DefaultDimensionSpec.java @@ -20,7 +20,7 @@ package io.druid.query.dimension; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.metamx.common.StringUtils; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import java.nio.ByteBuffer; @@ -59,7 +59,7 @@ public class DefaultDimensionSpec implements DimensionSpec } @Override - public DimExtractionFn getDimExtractionFn() + public ExtractionFn getExtractionFn() { return null; } diff --git a/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java index 184573a72c7..a6d556e5d0a 100644 --- a/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/DimensionSpec.java @@ -19,7 +19,7 @@ package io.druid.query.dimension; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; /** */ @@ -34,7 +34,7 @@ public interface DimensionSpec public String getOutputName(); - public DimExtractionFn getDimExtractionFn(); + public ExtractionFn getExtractionFn(); public byte[] getCacheKey(); diff --git a/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java b/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java index e95b3e25f5e..ec186dca248 100644 --- a/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java +++ b/processing/src/main/java/io/druid/query/dimension/ExtractionDimensionSpec.java @@ -19,8 +19,9 @@ package io.druid.query.dimension; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; import com.metamx.common.StringUtils; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import java.nio.ByteBuffer; @@ -31,18 +32,23 @@ public class ExtractionDimensionSpec implements DimensionSpec private static final byte CACHE_TYPE_ID = 0x1; private final String dimension; - private final DimExtractionFn dimExtractionFn; + private final ExtractionFn extractionFn; private final String outputName; @JsonCreator public ExtractionDimensionSpec( @JsonProperty("dimension") String dimension, @JsonProperty("outputName") String outputName, - @JsonProperty("dimExtractionFn") DimExtractionFn dimExtractionFn + @JsonProperty("extractionFn") ExtractionFn extractionFn, + // for backwards compatibility + @Deprecated @JsonProperty("dimExtractionFn") ExtractionFn dimExtractionFn ) { + Preconditions.checkNotNull(dimension, "dimension must not be null"); + Preconditions.checkArgument(extractionFn != null || dimExtractionFn != null, "extractionFn must not be null"); + this.dimension = dimension; - this.dimExtractionFn = dimExtractionFn; + this.extractionFn = extractionFn != null ? extractionFn : dimExtractionFn; // Do null check for backwards compatibility this.outputName = outputName == null ? dimension : outputName; @@ -64,16 +70,16 @@ public class ExtractionDimensionSpec implements DimensionSpec @Override @JsonProperty - public DimExtractionFn getDimExtractionFn() + public ExtractionFn getExtractionFn() { - return dimExtractionFn; + return extractionFn; } @Override public byte[] getCacheKey() { byte[] dimensionBytes = StringUtils.toUtf8(dimension); - byte[] dimExtractionFnBytes = dimExtractionFn.getCacheKey(); + byte[] dimExtractionFnBytes = extractionFn.getCacheKey(); return ByteBuffer.allocate(1 + dimensionBytes.length + dimExtractionFnBytes.length) .put(CACHE_TYPE_ID) @@ -85,7 +91,7 @@ public class ExtractionDimensionSpec implements DimensionSpec @Override public boolean preservesOrdering() { - return dimExtractionFn.preservesOrdering(); + return extractionFn.preservesOrdering(); } @Override @@ -93,7 +99,7 @@ public class ExtractionDimensionSpec implements DimensionSpec { return "ExtractionDimensionSpec{" + "dimension='" + dimension + '\'' + - ", dimExtractionFn=" + dimExtractionFn + + ", extractionFn=" + extractionFn + ", outputName='" + outputName + '\'' + '}'; } @@ -106,7 +112,7 @@ public class ExtractionDimensionSpec implements DimensionSpec ExtractionDimensionSpec that = (ExtractionDimensionSpec) o; - if (dimExtractionFn != null ? !dimExtractionFn.equals(that.dimExtractionFn) : that.dimExtractionFn != null) + if (extractionFn != null ? !extractionFn.equals(that.extractionFn) : that.extractionFn != null) return false; if (dimension != null ? !dimension.equals(that.dimension) : that.dimension != null) return false; if (outputName != null ? !outputName.equals(that.outputName) : that.outputName != null) return false; @@ -118,7 +124,7 @@ public class ExtractionDimensionSpec implements DimensionSpec public int hashCode() { int result = dimension != null ? dimension.hashCode() : 0; - result = 31 * result + (dimExtractionFn != null ? dimExtractionFn.hashCode() : 0); + result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0); result = 31 * result + (outputName != null ? outputName.hashCode() : 0); return result; } diff --git a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java index e6df2e75a9d..a3752f9b4f6 100644 --- a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java @@ -1,73 +1,35 @@ /* - * Druid - a distributed column store. - * Copyright 2012 - 2015 Metamarkets Group Inc. + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package io.druid.query.extraction; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -/** - */ -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class), - @JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class), - @JsonSubTypes.Type(name = "partial", value = MatchingDimExtractionFn.class), - @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class), - @JsonSubTypes.Type(name = "javascript", value = JavascriptDimExtractionFn.class) -}) -/** - * A DimExtractionFn is a function that can be used to modify the values of a dimension column. - * - * A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a - * regular expression with a capture group. When the regular expression matches the value of a dimension, - * the value captured by the group is used for grouping operations instead of the dimension value. - */ -public interface DimExtractionFn +public abstract class DimExtractionFn implements ExtractionFn { - /** - * Returns a byte[] unique to all concrete implementations of DimExtractionFn. This byte[] is used to - * generate a cache key for the specific query. - * - * @return a byte[] unit to all concrete implements of DimExtractionFn - */ - public byte[] getCacheKey(); + @Override + public String apply(Object value) + { + return apply(value.toString()); + } - /** - * The "extraction" function. This should map a dimension value into some other value. - * - * In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the - * empty string is considered invalid output for this method and should instead return null. This is - * a contract on the method rather than enforced at a lower level in order to eliminate a global check - * for extraction functions that do not already need one. - * - * - * @param dimValue the original value of the dimension - * @return a value that should be used instead of the original - */ - public String apply(String dimValue); - - /** - * Offers information on whether the extraction will preserve the original ordering of the values. - * - * Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards - * ordering. - * - * @return true if ordering is preserved, false otherwise - */ - public boolean preservesOrdering(); + @Override + public String apply(long value) + { + return apply(Long.toString(value)); + } } diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java new file mode 100644 index 00000000000..de6e047176e --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java @@ -0,0 +1,78 @@ +/* + * Druid - a distributed column store. + * Copyright 2012 - 2015 Metamarkets Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +/** + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { + @JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class), + @JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class), + @JsonSubTypes.Type(name = "partial", value = MatchingDimExtractionFn.class), + @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class), + @JsonSubTypes.Type(name = "javascript", value = JavascriptExtractionFn.class), + @JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class) +}) +/** + * An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension) + * + * A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a + * regular expression with a capture group. When the regular expression matches the value of a dimension, + * the value captured by the group is used for grouping operations instead of the dimension value. + */ +public interface ExtractionFn +{ + /** + * Returns a byte[] unique to all concrete implementations of DimExtractionFn. This byte[] is used to + * generate a cache key for the specific query. + * + * @return a byte[] unit to all concrete implements of DimExtractionFn + */ + public byte[] getCacheKey(); + + /** + * The "extraction" function. This should map a value into some other String value. + * + * In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the + * empty string is considered invalid output for this method and should instead return null. This is + * a contract on the method rather than enforced at a lower level in order to eliminate a global check + * for extraction functions that do not already need one. + * + * + * @param value the original value of the dimension + * @return a value that should be used instead of the original + */ + public String apply(Object value); + + public String apply(String value); + + public String apply(long value); + + /** + * Offers information on whether the extraction will preserve the original ordering of the values. + * + * Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards + * ordering. + * + * @return true if ordering is preserved, false otherwise + */ + public boolean preservesOrdering(); +} diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptExtractionFn.java similarity index 80% rename from processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java rename to processing/src/main/java/io/druid/query/extraction/JavascriptExtractionFn.java index 1e7a1bb0e41..8c698b95053 100644 --- a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/JavascriptExtractionFn.java @@ -22,17 +22,15 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Function; import com.google.common.base.Strings; import com.metamx.common.StringUtils; -import com.google.common.base.Strings; -import com.metamx.common.StringUtils; import org.mozilla.javascript.Context; import org.mozilla.javascript.ContextFactory; import org.mozilla.javascript.ScriptableObject; import java.nio.ByteBuffer; -public class JavascriptDimExtractionFn implements DimExtractionFn +public class JavascriptExtractionFn implements ExtractionFn { - private static Function compile(String function) { + private static Function compile(String function) { final ContextFactory contextFactory = ContextFactory.getGlobal(); final Context context = contextFactory.enterContext(); context.setOptimizationLevel(9); @@ -43,9 +41,9 @@ public class JavascriptDimExtractionFn implements DimExtractionFn Context.exit(); - return new Function() + return new Function() { - public String apply(String input) + public String apply(Object input) { // ideally we need a close() function to discard the context once it is not used anymore Context cx = Context.getCurrentContext(); @@ -53,7 +51,7 @@ public class JavascriptDimExtractionFn implements DimExtractionFn cx = contextFactory.enterContext(); } - final Object res = fn.call(cx, scope, scope, new String[]{input}); + final Object res = fn.call(cx, scope, scope, new Object[]{input}); return res != null ? Context.toString(res) : null; } }; @@ -62,10 +60,10 @@ public class JavascriptDimExtractionFn implements DimExtractionFn private static final byte CACHE_TYPE_ID = 0x4; private final String function; - private final Function fn; + private final Function fn; @JsonCreator - public JavascriptDimExtractionFn( + public JavascriptExtractionFn( @JsonProperty("function") String function ) { @@ -90,10 +88,21 @@ public class JavascriptDimExtractionFn implements DimExtractionFn } @Override - public String apply(String dimValue) + public String apply(Object value) { - String retVal = fn.apply(dimValue); - return Strings.isNullOrEmpty(retVal) ? null : retVal; + return Strings.emptyToNull(fn.apply(value)); + } + + @Override + public String apply(String value) + { + return this.apply((Object)value); + } + + @Override + public String apply(long value) + { + return this.apply((Long)value); } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/MatchingDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/MatchingDimExtractionFn.java index 655e76faf45..07f4c38413c 100644 --- a/processing/src/main/java/io/druid/query/extraction/MatchingDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/MatchingDimExtractionFn.java @@ -27,7 +27,7 @@ import java.util.regex.Pattern; /** */ -public class MatchingDimExtractionFn implements DimExtractionFn +public class MatchingDimExtractionFn extends DimExtractionFn { private static final byte CACHE_TYPE_ID = 0x2; diff --git a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java index 7695190fa66..08ff946fabd 100644 --- a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java @@ -27,7 +27,7 @@ import java.util.regex.Pattern; /** */ -public class RegexDimExtractionFn implements DimExtractionFn +public class RegexDimExtractionFn extends DimExtractionFn { private static final byte CACHE_TYPE_ID = 0x1; diff --git a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java index 3cdf761cbb1..9d8fe3f8e03 100644 --- a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java @@ -25,7 +25,7 @@ import java.nio.ByteBuffer; /** */ -public class SearchQuerySpecDimExtractionFn implements DimExtractionFn +public class SearchQuerySpecDimExtractionFn extends DimExtractionFn { private static final byte CACHE_TYPE_ID = 0x3; diff --git a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java index 4363b1bf004..c11cca2f4e1 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java @@ -28,7 +28,7 @@ import java.util.Date; /** */ -public class TimeDimExtractionFn implements DimExtractionFn +public class TimeDimExtractionFn extends DimExtractionFn { private static final byte CACHE_TYPE_ID = 0x0; diff --git a/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java new file mode 100644 index 00000000000..1c1a16fde92 --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java @@ -0,0 +1,113 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.metamx.common.StringUtils; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; + +import java.nio.ByteBuffer; +import java.util.Locale; + +public class TimeFormatExtractionFn implements ExtractionFn +{ + private static final byte CACHE_TYPE_ID = 0x5; + + private final DateTimeZone tz; + private final String pattern; + private final Locale locale; + private final DateTimeFormatter formatter; + + public TimeFormatExtractionFn( + @JsonProperty("format") String pattern, + @JsonProperty("timeZone") DateTimeZone tz, + @JsonProperty("locale") String localeString + ) + { + Preconditions.checkArgument(pattern != null, "format cannot be null"); + + this.pattern = pattern; + this.tz = tz; + this.locale = localeString == null ? null : Locale.forLanguageTag(localeString); + this.formatter = DateTimeFormat.forPattern(pattern) + .withZone(tz == null ? DateTimeZone.UTC : tz) + .withLocale(locale); + } + + @JsonProperty + public DateTimeZone getTimeZone() + { + return tz; + } + + @JsonProperty + public String getFormat() + { + return pattern; + } + + @JsonProperty + public String getLocale() + { + if (locale != null) { + return locale.toLanguageTag(); + } else { + return null; + } + } + + @Override + public byte[] getCacheKey() + { + byte[] exprBytes = StringUtils.toUtf8(pattern + "\u0001" + tz.getID() + "\u0001" + locale.toLanguageTag()); + return ByteBuffer.allocate(1 + exprBytes.length) + .put(CACHE_TYPE_ID) + .put(exprBytes) + .array(); + } + + @Override + public String apply(long value) + { + return formatter.print(value); + } + + @Override + public String apply(Object value) + { + return formatter.print(new DateTime(value)); + } + + @Override + public String apply(String value) + { + return apply((Object)value); + } + + @Override + public boolean preservesOrdering() + { + return false; + } +} diff --git a/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java b/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java index 859732a01f7..7de7d51c8db 100644 --- a/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.metamx.common.StringUtils; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import java.nio.ByteBuffer; @@ -31,22 +31,24 @@ public class ExtractionDimFilter implements DimFilter { private final String dimension; private final String value; - private final DimExtractionFn dimExtractionFn; + private final ExtractionFn extractionFn; @JsonCreator public ExtractionDimFilter( @JsonProperty("dimension") String dimension, @JsonProperty("value") String value, - @JsonProperty("dimExtractionFn") DimExtractionFn dimExtractionFn + @JsonProperty("extractionFn") ExtractionFn extractionFn, + // for backwards compatibility + @Deprecated @JsonProperty("dimExtractionFn") ExtractionFn dimExtractionFn ) { Preconditions.checkArgument(dimension != null, "dimension must not be null"); Preconditions.checkArgument(value != null, "value must not be null"); - Preconditions.checkArgument(dimExtractionFn != null, "extraction function must not be null"); + Preconditions.checkArgument(extractionFn != null || dimExtractionFn != null, "extraction function must not be null"); this.dimension = dimension; this.value = value; - this.dimExtractionFn = dimExtractionFn; + this.extractionFn = extractionFn != null ? extractionFn : dimExtractionFn; } @JsonProperty @@ -62,9 +64,9 @@ public class ExtractionDimFilter implements DimFilter } @JsonProperty - public DimExtractionFn getDimExtractionFn() + public ExtractionFn getExtractionFn() { - return dimExtractionFn; + return extractionFn; } @Override @@ -83,6 +85,6 @@ public class ExtractionDimFilter implements DimFilter @Override public String toString() { - return String.format("%s(%s) = %s", dimExtractionFn, dimension, value); + return String.format("%s(%s) = %s", extractionFn, dimension, value); } } diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java index f5f65425b69..84f877a8e0e 100644 --- a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java +++ b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java @@ -42,10 +42,11 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.BufferAggregator; import io.druid.query.aggregation.PostAggregator; import io.druid.query.dimension.DimensionSpec; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; import io.druid.segment.StorageAdapter; +import io.druid.segment.column.Column; import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; import org.joda.time.DateTime; @@ -300,7 +301,7 @@ public class GroupByQueryEngine private List unprocessedKeys; private Iterator delegate; - public RowIterator(GroupByQuery query, Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config) + public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config) { this.query = query; this.cursor = cursor; @@ -312,9 +313,13 @@ public class GroupByQueryEngine dimensionSpecs = query.getDimensions(); dimensions = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); dimNames = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); + for (int i = 0; i < dimensionSpecs.size(); ++i) { final DimensionSpec dimSpec = dimensionSpecs.get(i); - final DimensionSelector selector = cursor.makeDimensionSelector(dimSpec.getDimension()); + final DimensionSelector selector = cursor.makeDimensionSelector( + dimSpec.getDimension(), + dimSpec.getExtractionFn() + ); if (selector != null) { dimensions.add(selector); dimNames.add(dimSpec.getOutputName()); @@ -395,14 +400,9 @@ public class GroupByQueryEngine ByteBuffer keyBuffer = input.getKey().duplicate(); for (int i = 0; i < dimensions.size(); ++i) { final DimensionSelector dimSelector = dimensions.get(i); - final DimExtractionFn fn = dimensionSpecs.get(i).getDimExtractionFn(); final int dimVal = keyBuffer.getInt(); if (dimSelector.getValueCardinality() != dimVal) { - if (fn != null) { - theEvent.put(dimNames.get(i), fn.apply(dimSelector.lookupName(dimVal))); - } else { - theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal)); - } + theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal)); } } diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index 5b250e7b067..e6167813154 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -161,7 +161,8 @@ public class SearchQueryRunner implements QueryRunner> Map dimSelectors = Maps.newHashMap(); for (String dim : dimsToSearch) { - dimSelectors.put(dim, cursor.makeDimensionSelector(dim)); + // switching to using DimensionSpec for search would allow the use of extractionFn here. + dimSelectors.put(dim, cursor.makeDimensionSelector(dim, null)); } while (!cursor.isDone()) { diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index ab8f526f54b..e9d29cf6aac 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -86,7 +86,8 @@ public class SelectQueryEngine final Map dimSelectors = Maps.newHashMap(); for (String dim : dims) { - final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim); + // switching to using DimensionSpec for select would allow the use of extractionFn here. + final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim, null); dimSelectors.put(dim, dimSelector); } diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index bf008246132..cacc2815b1f 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -19,6 +19,7 @@ package io.druid.query.topn; import com.google.common.collect.Maps; import io.druid.query.aggregation.Aggregator; +import io.druid.query.extraction.ExtractionFn; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; @@ -44,7 +45,8 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm, TopNParams> +{ + public static final int[] EMPTY_INTS = new int[]{}; + private final TopNQuery query; + + public TimeExtractionTopNAlgorithm(Capabilities capabilities, TopNQuery query) + { + super(capabilities); + this.query = query; + } + + + @Override + public TopNParams makeInitParams(DimensionSelector dimSelector, Cursor cursor) + { + return new TopNParams( + dimSelector, + cursor, + dimSelector.getValueCardinality(), + Integer.MAX_VALUE + ); + } + + @Override + protected int[] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess) + { + return EMPTY_INTS; + } + + @Override + protected int[] updateDimValSelector(int[] dimValSelector, int numProcessed, int numToProcess) + { + return dimValSelector; + } + + @Override + protected Map makeDimValAggregateStore(TopNParams params) + { + return Maps.newHashMap(); + } + + @Override + protected void scanAndAggregate( + TopNParams params, int[] dimValSelector, Map aggregatesStore, int numProcessed + ) + { + final Cursor cursor = params.getCursor(); + final DimensionSelector dimSelector = params.getDimSelector(); + + while (!cursor.isDone()) { + final String key = dimSelector.lookupName(dimSelector.getRow().get(0)); + + Aggregator[] theAggregators = aggregatesStore.get(key); + if (theAggregators == null) { + theAggregators = makeAggregators(cursor, query.getAggregatorSpecs()); + aggregatesStore.put(key, theAggregators); + } + + for (Aggregator aggregator : theAggregators) { + aggregator.aggregate(); + } + + cursor.advance(); + } + } + + @Override + protected void updateResults( + TopNParams params, + int[] dimValSelector, + Map aggregatesStore, + TopNResultBuilder resultBuilder + ) + { + for (Map.Entry entry : aggregatesStore.entrySet()) { + Aggregator[] aggs = entry.getValue(); + if (aggs != null && aggs.length > 0) { + Object[] vals = new Object[aggs.length]; + for (int i = 0; i < aggs.length; i++) { + vals[i] = aggs[i].get(); + } + + resultBuilder.addEntry( + entry.getKey(), + entry.getKey(), + vals + ); + } + } + } + + @Override + protected void closeAggregators(Map stringMap) + { + for (Aggregator[] aggregators : stringMap.values()) { + for (Aggregator agg : aggregators) { + agg.close(); + } + } + } + + @Override + public void cleanup(TopNParams params) + { + + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNAlgorithmSelector.java b/processing/src/main/java/io/druid/query/topn/TopNAlgorithmSelector.java index 8e999da9036..b1d16a6f47a 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNAlgorithmSelector.java +++ b/processing/src/main/java/io/druid/query/topn/TopNAlgorithmSelector.java @@ -24,7 +24,7 @@ public class TopNAlgorithmSelector private final int cardinality; private final int numBytesPerRecord; - private volatile boolean hasDimExtractionFn; + private volatile boolean hasExtractionFn; private volatile boolean aggregateAllMetrics; private volatile boolean aggregateTopNMetricFirst; @@ -34,9 +34,9 @@ public class TopNAlgorithmSelector this.numBytesPerRecord = numBytesPerRecord; } - public void setHasDimExtractionFn(boolean hasDimExtractionFn) + public void setHasExtractionFn(boolean hasExtractionFn) { - this.hasDimExtractionFn = hasDimExtractionFn; + this.hasExtractionFn = hasExtractionFn; } public void setAggregateAllMetrics(boolean aggregateAllMetrics) @@ -53,9 +53,9 @@ public class TopNAlgorithmSelector } } - public boolean isHasDimExtractionFn() + public boolean isHasExtractionFn() { - return hasDimExtractionFn; + return hasExtractionFn; } public boolean isAggregateAllMetrics() diff --git a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java index deefa214224..eff973aa485 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java @@ -41,7 +41,10 @@ public class TopNMapFn implements Function> @SuppressWarnings("unchecked") public Result apply(Cursor cursor) { - final DimensionSelector dimSelector = cursor.makeDimensionSelector(query.getDimensionSpec().getDimension()); + final DimensionSelector dimSelector = cursor.makeDimensionSelector( + query.getDimensionSpec().getDimension(), + query.getDimensionSpec().getExtractionFn() + ); if (dimSelector == null) { return null; } diff --git a/processing/src/main/java/io/druid/query/topn/TopNQuery.java b/processing/src/main/java/io/druid/query/topn/TopNQuery.java index b4c576ce026..4a9d024e0cc 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQuery.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQuery.java @@ -139,8 +139,8 @@ public class TopNQuery extends BaseQuery> public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector) { - if (dimensionSpec.getDimExtractionFn() != null) { - selector.setHasDimExtractionFn(true); + if (dimensionSpec.getExtractionFn() != null) { + selector.setHasExtractionFn(true); } topNMetricSpec.initTopNAlgorithmSelector(selector); } diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java index 15f5a97be64..d1627f2d07e 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java @@ -32,6 +32,7 @@ import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.SegmentMissingException; import io.druid.segment.StorageAdapter; +import io.druid.segment.column.Column; import io.druid.segment.filter.Filters; import org.joda.time.Interval; @@ -88,7 +89,10 @@ public class TopNQueryEngine private Function> getMapFn(TopNQuery query, final StorageAdapter adapter) { final Capabilities capabilities = adapter.getCapabilities(); - final int cardinality = adapter.getDimensionCardinality(query.getDimensionSpec().getDimension()); + final String dimension = query.getDimensionSpec().getDimension(); + + final int cardinality = adapter.getDimensionCardinality(dimension); + int numBytesPerRecord = 0; for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) { numBytesPerRecord += aggregatorFactory.getMaxIntermediateSize(); @@ -97,8 +101,18 @@ public class TopNQueryEngine final TopNAlgorithmSelector selector = new TopNAlgorithmSelector(cardinality, numBytesPerRecord); query.initTopNAlgorithmSelector(selector); - TopNAlgorithm topNAlgorithm = null; - if (selector.isHasDimExtractionFn()) { + final TopNAlgorithm topNAlgorithm; + if ( + selector.isHasExtractionFn() && + // TimeExtractionTopNAlgorithm can work on any single-value dimension of type long. + // Once we have arbitrary dimension types following check should be replaced by checking + // that the column is of type long and single-value. + dimension.equals(Column.TIME_COLUMN_NAME) + ) { + // A special TimeExtractionTopNAlgorithm is required, since DimExtractionTopNAlgorithm + // currently relies on the dimension cardinality to support lexicographic sorting + topNAlgorithm = new TimeExtractionTopNAlgorithm(capabilities, query); + } else if(selector.isHasExtractionFn()) { topNAlgorithm = new DimExtractionTopNAlgorithm(capabilities, query); } else if (selector.isAggregateAllMetrics()) { topNAlgorithm = new PooledTopNAlgorithm(capabilities, query, bufferPool); diff --git a/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java b/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java index 295157c516a..dfa1cecfb57 100644 --- a/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java +++ b/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java @@ -17,12 +17,16 @@ package io.druid.segment; +import io.druid.query.extraction.ExtractionFn; + +import javax.annotation.Nullable; + /** * Factory class for MetricSelectors */ public interface ColumnSelectorFactory { - public DimensionSelector makeDimensionSelector(String dimensionName); + public DimensionSelector makeDimensionSelector(String dimensionName, @Nullable ExtractionFn extractionFn); public FloatColumnSelector makeFloatColumnSelector(String columnName); public LongColumnSelector makeLongColumnSelector(String columnName); public ObjectColumnSelector makeObjectColumnSelector(String columnName); diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index cd5683773ef..77dd57a6285 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -19,6 +19,7 @@ package io.druid.segment; import com.google.common.base.Function; import com.google.common.base.Predicates; +import com.google.common.base.Strings; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -27,6 +28,7 @@ import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; import io.druid.granularity.QueryGranularity; import io.druid.query.QueryInterruptedException; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.Filter; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; @@ -40,6 +42,7 @@ import io.druid.segment.data.Offset; import org.joda.time.DateTime; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.io.Closeable; import java.io.IOException; import java.util.Iterator; @@ -96,7 +99,7 @@ public class QueryableIndexStorageAdapter implements StorageAdapter return 0; } if (!column.getCapabilities().isDictionaryEncoded()) { - throw new UnsupportedOperationException("Only know cardinality of dictionary encoded columns."); + return Integer.MAX_VALUE; } return column.getDictionaryEncoding().getCardinality(); } @@ -272,14 +275,18 @@ public class QueryableIndexStorageAdapter implements StorageAdapter } @Override - public DimensionSelector makeDimensionSelector(String dimension) + public DimensionSelector makeDimensionSelector(final String dimension, @Nullable final ExtractionFn extractionFn) { - DictionaryEncodedColumn cachedColumn = dictionaryColumnCache.get(dimension); final Column columnDesc = index.getColumn(dimension); if (columnDesc == null) { return NULL_DIMENSION_SELECTOR; } + if (dimension.equals(Column.TIME_COLUMN_NAME)) { + return new SingleScanTimeDimSelector(makeLongColumnSelector(dimension), extractionFn); + } + + DictionaryEncodedColumn cachedColumn = dictionaryColumnCache.get(dimension); if (cachedColumn == null) { cachedColumn = columnDesc.getDictionaryEncoding(); dictionaryColumnCache.put(dimension, cachedColumn); @@ -308,13 +315,18 @@ public class QueryableIndexStorageAdapter implements StorageAdapter @Override public String lookupName(int id) { - final String retVal = column.lookupName(id); - return retVal == null ? "" : retVal; + final String value = column.lookupName(id); + return extractionFn == null ? + Strings.nullToEmpty(value) : + extractionFn.apply(Strings.nullToEmpty(value)); } @Override public int lookupId(String name) { + if (extractionFn != null) { + throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); + } return column.lookupId(name); } }; @@ -356,12 +368,16 @@ public class QueryableIndexStorageAdapter implements StorageAdapter @Override public String lookupName(int id) { - return column.lookupName(id); + final String value = column.lookupName(id); + return extractionFn == null ? value : extractionFn.apply(value); } @Override public int lookupId(String name) { + if (extractionFn != null) { + throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); + } return column.lookupId(name); } }; @@ -702,4 +718,5 @@ public class QueryableIndexStorageAdapter implements StorageAdapter return currentOffset; } } + } diff --git a/processing/src/main/java/io/druid/segment/StorageAdapter.java b/processing/src/main/java/io/druid/segment/StorageAdapter.java index bcb446ed22b..13a4fec8771 100644 --- a/processing/src/main/java/io/druid/segment/StorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/StorageAdapter.java @@ -29,7 +29,16 @@ public interface StorageAdapter extends CursorFactory public Interval getInterval(); public Indexed getAvailableDimensions(); public Iterable getAvailableMetrics(); - public int getDimensionCardinality(String dimension); + + /** + * Returns the number of distinct values for the given dimension column + * For dimensions of unknown cardinality, e.g. __time this currently returns + * Integer.MAX_VALUE + * + * @param column + * @return + */ + public int getDimensionCardinality(String column); public DateTime getMinTime(); public DateTime getMaxTime(); public Capabilities getCapabilities(); diff --git a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java index 405ad92deb8..906b1adb3fd 100644 --- a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java @@ -20,7 +20,7 @@ package io.druid.segment.filter; import com.google.common.collect.Lists; import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.bitmap.WrappedConciseBitmap; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; @@ -36,12 +36,12 @@ public class ExtractionFilter implements Filter { private final String dimension; private final String value; - private final DimExtractionFn fn; + private final ExtractionFn fn; public ExtractionFilter( String dimension, String value, - DimExtractionFn fn + ExtractionFn fn ) { this.dimension = dimension; diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index 406c763d19c..ee93583d9e3 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -75,7 +75,7 @@ public class Filters filter = new ExtractionFilter( extractionDimFilter.getDimension(), extractionDimFilter.getValue(), - extractionDimFilter.getDimExtractionFn() + extractionDimFilter.getExtractionFn() ); } else if (dimFilter instanceof RegexDimFilter) { final RegexDimFilter regexDimFilter = (RegexDimFilter) dimFilter; diff --git a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java index 234278c09b4..2eda28989db 100644 --- a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java @@ -58,7 +58,7 @@ public class SelectorFilter implements Filter @Override public ValueMatcher makeMatcher(ColumnSelectorFactory columnSelectorFactory) { - final DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimension); + final DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimension, null); // Missing columns match a null or empty string value and don't match anything else if (dimensionSelector == null) { diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 32b778f9d93..3d6f25af730 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -34,6 +34,7 @@ import io.druid.data.input.impl.SpatialDimensionSchema; import io.druid.granularity.QueryGranularity; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.PostAggregator; +import io.druid.query.extraction.ExtractionFn; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; @@ -163,7 +164,7 @@ public abstract class IncrementalIndex implements Iterable, } @Override - public DimensionSelector makeDimensionSelector(final String dimension) + public DimensionSelector makeDimensionSelector(final String dimension, final ExtractionFn extractionFn) { return new DimensionSelector() { @@ -209,12 +210,16 @@ public abstract class IncrementalIndex implements Iterable, @Override public String lookupName(int id) { - return in.get().getDimension(dimension).get(id); + final String value = in.get().getDimension(dimension).get(id); + return extractionFn == null ? value : extractionFn.apply(value); } @Override public int lookupId(String name) { + if (extractionFn != null) { + throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); + } return in.get().getDimension(dimension).indexOf(name); } }; diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 76b97892851..6f7fad3b51b 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -28,6 +28,7 @@ import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; import io.druid.granularity.QueryGranularity; import io.druid.query.QueryInterruptedException; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; @@ -38,6 +39,7 @@ import io.druid.segment.FloatColumnSelector; import io.druid.segment.LongColumnSelector; import io.druid.segment.NullDimensionSelector; import io.druid.segment.ObjectColumnSelector; +import io.druid.segment.SingleScanTimeDimSelector; import io.druid.segment.StorageAdapter; import io.druid.segment.column.Column; import io.druid.segment.data.Indexed; @@ -99,6 +101,9 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter @Override public int getDimensionCardinality(String dimension) { + if(dimension.equals(Column.TIME_COLUMN_NAME)) { + return Integer.MAX_VALUE; + } IncrementalIndex.DimDim dimDim = index.getDimension(dimension); if (dimDim == null) { return 0; @@ -272,8 +277,12 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter } @Override - public DimensionSelector makeDimensionSelector(String dimension) + public DimensionSelector makeDimensionSelector(final String dimension, @Nullable final ExtractionFn extractionFn) { + if (dimension.equals(Column.TIME_COLUMN_NAME)) { + return new SingleScanTimeDimSelector(makeLongColumnSelector(dimension), extractionFn); + } + final IncrementalIndex.DimDim dimValLookup = index.getDimension(dimension); if (dimValLookup == null) { return NULL_DIMENSION_SELECTOR; @@ -331,12 +340,17 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter @Override public String lookupName(int id) { - return dimValLookup.getValue(id); + final String value = dimValLookup.getValue(id); + return extractionFn == null ? value : extractionFn.apply(value); + } @Override public int lookupId(String name) { + if (extractionFn != null) { + throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); + } return dimValLookup.getId(name); } }; diff --git a/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java index 34d89c986af..603dec6c057 100644 --- a/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java @@ -18,6 +18,7 @@ package io.druid.query.aggregation; import com.google.common.collect.Lists; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.NotDimFilter; @@ -70,7 +71,7 @@ public class FilteredAggregatorTest return new ColumnSelectorFactory() { @Override - public DimensionSelector makeDimensionSelector(String dimensionName) + public DimensionSelector makeDimensionSelector(String dimensionName, ExtractionFn extractionFn) { if (dimensionName.equals("dim")) { return new DimensionSelector() diff --git a/processing/src/test/java/io/druid/query/extraction/TimeFormatExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/TimeFormatExtractionFnTest.java new file mode 100644 index 00000000000..36b38a76bed --- /dev/null +++ b/processing/src/test/java/io/druid/query/extraction/TimeFormatExtractionFnTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.databind.ObjectMapper; +import io.druid.jackson.DefaultObjectMapper; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.junit.Assert; +import org.junit.Test; + +public class TimeFormatExtractionFnTest +{ + + private static final long[] timestamps = { + new DateTime("2015-01-01T23:00:00Z").getMillis(), + new DateTime("2015-01-02T23:00:00Z").getMillis(), + new DateTime("2015-03-03T23:00:00Z").getMillis(), + new DateTime("2015-03-04T23:00:00Z").getMillis(), + new DateTime("2015-05-02T23:00:00Z").getMillis(), + new DateTime("2015-12-21T23:00:00Z").getMillis() + }; + + @Test(expected = IllegalArgumentException.class) + public void testIAEForNullPattern() throws Exception + { + new TimeFormatExtractionFn(null, null, null); + } + + @Test + public void testDayOfWeekExtraction() throws Exception + { + TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, null); + Assert.assertEquals("Thursday", fn.apply(timestamps[0])); + Assert.assertEquals("Friday", fn.apply(timestamps[1])); + Assert.assertEquals("Tuesday", fn.apply(timestamps[2])); + Assert.assertEquals("Wednesday", fn.apply(timestamps[3])); + Assert.assertEquals("Saturday", fn.apply(timestamps[4])); + Assert.assertEquals("Monday", fn.apply(timestamps[5])); + + testSerde(fn, "EEEE", null, null); + } + + @Test + public void testLocalizedExtraction() throws Exception + { + TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, "is"); + Assert.assertEquals("fimmtudagur", fn.apply(timestamps[0])); + Assert.assertEquals("föstudagur", fn.apply(timestamps[1])); + Assert.assertEquals("þriðjudagur", fn.apply(timestamps[2])); + Assert.assertEquals("miðvikudagur", fn.apply(timestamps[3])); + Assert.assertEquals("laugardagur", fn.apply(timestamps[4])); + Assert.assertEquals("mánudagur", fn.apply(timestamps[5])); + + testSerde(fn, "EEEE", null, "is"); + } + + @Test + public void testTimeZoneExtraction() throws Exception + { + TimeFormatExtractionFn fn = new TimeFormatExtractionFn("'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de"); + Assert.assertEquals("In Berlin ist es schon Freitag", fn.apply(timestamps[0])); + Assert.assertEquals("In Berlin ist es schon Samstag", fn.apply(timestamps[1])); + Assert.assertEquals("In Berlin ist es schon Mittwoch", fn.apply(timestamps[2])); + Assert.assertEquals("In Berlin ist es schon Donnerstag", fn.apply(timestamps[3])); + Assert.assertEquals("In Berlin ist es schon Sonntag", fn.apply(timestamps[4])); + Assert.assertEquals("In Berlin ist es schon Dienstag", fn.apply(timestamps[5])); + + testSerde(fn, "'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de"); + } + + public void testSerde(TimeFormatExtractionFn fn, String format, DateTimeZone tz, String locale) throws Exception { + ObjectMapper objectMapper = new DefaultObjectMapper(); + String json = objectMapper.writeValueAsString(fn); + TimeFormatExtractionFn deserialized = objectMapper.readValue(json, TimeFormatExtractionFn.class); + + Assert.assertEquals(format, deserialized.getFormat()); + Assert.assertEquals(tz, deserialized.getTimeZone()); + Assert.assertEquals(locale, deserialized.getLocale()); + } +} diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptExtractionFnTest.java similarity index 89% rename from processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java rename to processing/src/test/java/io/druid/query/extraction/extraction/JavascriptExtractionFnTest.java index e881c519535..7ad28dd8ea8 100644 --- a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptExtractionFnTest.java @@ -19,14 +19,15 @@ package io.druid.query.extraction.extraction; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import io.druid.query.extraction.DimExtractionFn; -import io.druid.query.extraction.JavascriptDimExtractionFn; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.JavascriptExtractionFn; +import org.joda.time.DateTime; import org.junit.Assert; import org.junit.Test; import java.util.Iterator; -public class JavascriptDimExtractionFnTest +public class JavascriptExtractionFnTest { private static final String[] testStrings = { "Quito", @@ -43,24 +44,46 @@ public class JavascriptDimExtractionFnTest public void testJavascriptSubstring() { String function = "function(str) { return str.substring(0,3); }"; - DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); + ExtractionFn extractionFn = new JavascriptExtractionFn(function); for (String str : testStrings) { - String res = dimExtractionFn.apply(str); + String res = extractionFn.apply(str); Assert.assertEquals(str.substring(0, 3), res); } } + @Test + public void testTimeExample() throws Exception + { + String utcHour = "function(t) {\nreturn 'Second ' + Math.floor((t % 60000) / 1000);\n}"; + final long millis = new DateTime("2015-01-02T13:00:59.999Z").getMillis(); + Assert.assertEquals("Second 59" , new JavascriptExtractionFn(utcHour).apply(millis)); + } + + @Test + public void testLongs() throws Exception + { + String typeOf = "function(x) {\nreturn typeof x\n}"; + Assert.assertEquals("number", new JavascriptExtractionFn(typeOf).apply(1234L)); + } + + @Test + public void testFloats() throws Exception + { + String typeOf = "function(x) {\nreturn typeof x\n}"; + Assert.assertEquals("number", new JavascriptExtractionFn(typeOf).apply(1234.0)); + } + @Test public void testCastingAndNull() { String function = "function(x) {\n x = Number(x);\n if(isNaN(x)) return null;\n return Math.floor(x / 5) * 5;\n}"; - DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); + ExtractionFn extractionFn = new JavascriptExtractionFn(function); Iterator it = Iterators.forArray("0", "5", "5", "10", null); for(String str : Lists.newArrayList("1", "5", "6", "10", "CA")) { - String res = dimExtractionFn.apply(str); + String res = extractionFn.apply(str); String expected = it.next(); Assert.assertEquals(expected, res); } @@ -70,11 +93,11 @@ public class JavascriptDimExtractionFnTest public void testJavascriptRegex() { String function = "function(str) { return str.replace(/[aeiou]/g, ''); }"; - DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); + ExtractionFn extractionFn = new JavascriptExtractionFn(function); Iterator it = Iterators.forArray("Qt", "Clgry", "Tky", "Stckhlm", "Vncvr", "Prtr", "Wllngtn", "Ontr"); for (String str : testStrings) { - String res = dimExtractionFn.apply(str); + String res = extractionFn.apply(str); Assert.assertEquals(it.next(), res); } } @@ -274,13 +297,13 @@ public class JavascriptDimExtractionFnTest + "" + "}"; - DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); + ExtractionFn extractionFn = new JavascriptExtractionFn(function); Iterator inputs = Iterators.forArray("introducing", "exploratory", "analytics", "on", "large", "datasets"); Iterator it = Iterators.forArray("introduc", "exploratori", "analyt", "on", "larg", "dataset"); while(inputs.hasNext()) { - String res = dimExtractionFn.apply(inputs.next()); + String res = extractionFn.apply(inputs.next()); Assert.assertEquals(it.next(), res); } } diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/MatchingDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/MatchingDimExtractionFnTest.java index f6fe5454f93..9706d044981 100644 --- a/processing/src/test/java/io/druid/query/extraction/extraction/MatchingDimExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/extraction/MatchingDimExtractionFnTest.java @@ -18,7 +18,7 @@ package io.druid.query.extraction.extraction; import com.google.common.collect.Sets; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.MatchingDimExtractionFn; import org.junit.Assert; import org.junit.Test; @@ -47,12 +47,12 @@ public class MatchingDimExtractionFnTest public void testExtraction() { String regex = ".*[Tt][Oo].*"; - DimExtractionFn dimExtractionFn = new MatchingDimExtractionFn(regex); + ExtractionFn extractionFn = new MatchingDimExtractionFn(regex); List expected = Arrays.asList("Quito", "Tokyo", "Stockholm", "Pretoria", "Wellington"); Set extracted = Sets.newHashSet(); for (String str : testStrings) { - String res = dimExtractionFn.apply(str); + String res = extractionFn.apply(str); if (res != null) { extracted.add(res); } diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/RegexDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/RegexDimExtractionFnTest.java index bcbdaa0e3a0..4746ee33a60 100644 --- a/processing/src/test/java/io/druid/query/extraction/extraction/RegexDimExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/extraction/RegexDimExtractionFnTest.java @@ -18,7 +18,7 @@ package io.druid.query.extraction.extraction; import com.google.common.collect.Sets; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; import org.junit.Assert; import org.junit.Test; @@ -53,11 +53,11 @@ public class RegexDimExtractionFnTest public void testPathExtraction() { String regex = "/([^/]+)/"; - DimExtractionFn dimExtractionFn = new RegexDimExtractionFn(regex); + ExtractionFn extractionFn = new RegexDimExtractionFn(regex); Set extracted = Sets.newHashSet(); for (String path : paths) { - extracted.add(dimExtractionFn.apply(path)); + extracted.add(extractionFn.apply(path)); } Assert.assertEquals(2, extracted.size()); @@ -69,11 +69,11 @@ public class RegexDimExtractionFnTest public void testDeeperPathExtraction() { String regex = "^/([^/]+/[^/]+)(/|$)"; - DimExtractionFn dimExtractionFn = new RegexDimExtractionFn(regex); + ExtractionFn extractionFn = new RegexDimExtractionFn(regex); Set extracted = Sets.newHashSet(); for (String path : paths) { - extracted.add(dimExtractionFn.apply(path)); + extracted.add(extractionFn.apply(path)); } Assert.assertEquals(4, extracted.size()); @@ -87,11 +87,11 @@ public class RegexDimExtractionFnTest public void testStringExtraction() { String regex = "(.)"; - DimExtractionFn dimExtractionFn = new RegexDimExtractionFn(regex); + ExtractionFn extractionFn = new RegexDimExtractionFn(regex); Set extracted = Sets.newHashSet(); for (String testString : testStrings) { - extracted.add(dimExtractionFn.apply(testString)); + extracted.add(extractionFn.apply(testString)); } Assert.assertEquals(3, extracted.size()); diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/SearchQuerySpecDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/SearchQuerySpecDimExtractionFnTest.java index 01743b45f38..9a5f50f55b6 100644 --- a/processing/src/test/java/io/druid/query/extraction/extraction/SearchQuerySpecDimExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/extraction/SearchQuerySpecDimExtractionFnTest.java @@ -18,7 +18,7 @@ package io.druid.query.extraction.extraction; import com.google.common.collect.Sets; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.SearchQuerySpecDimExtractionFn; import io.druid.query.search.search.FragmentSearchQuerySpec; import io.druid.query.search.search.SearchQuerySpec; @@ -50,12 +50,12 @@ public class SearchQuerySpecDimExtractionFnTest SearchQuerySpec spec = new FragmentSearchQuerySpec( Arrays.asList("to", "yo") ); - DimExtractionFn dimExtractionFn = new SearchQuerySpecDimExtractionFn(spec); + ExtractionFn extractionFn = new SearchQuerySpecDimExtractionFn(spec); List expected = Arrays.asList("Kyoto", "Tokyo", "Toyokawa", "Yorktown"); Set extracted = Sets.newHashSet(); for (String str : testStrings) { - String res = dimExtractionFn.apply(str); + String res = extractionFn.apply(str); if (res != null) { extracted.add(res); } diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/TimeDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/TimeDimExtractionFnTest.java index 079ec022c80..d9f327d46c0 100644 --- a/processing/src/test/java/io/druid/query/extraction/extraction/TimeDimExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/extraction/TimeDimExtractionFnTest.java @@ -18,7 +18,7 @@ package io.druid.query.extraction.extraction; import com.google.common.collect.Sets; -import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.TimeDimExtractionFn; import org.junit.Assert; import org.junit.Test; @@ -42,10 +42,10 @@ public class TimeDimExtractionFnTest public void testMonthExtraction() { Set months = Sets.newHashSet(); - DimExtractionFn dimExtractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy"); + ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy"); for (String dim : dims) { - months.add(dimExtractionFn.apply(dim)); + months.add(extractionFn.apply(dim)); } Assert.assertEquals(months.size(), 4); @@ -59,10 +59,10 @@ public class TimeDimExtractionFnTest public void testQuarterExtraction() { Set quarters = Sets.newHashSet(); - DimExtractionFn dimExtractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy"); + ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy"); for (String dim : dims) { - quarters.add(dimExtractionFn.apply(dim)); + quarters.add(extractionFn.apply(dim)); } Assert.assertEquals(quarters.size(), 3); diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 67a3d37fb1b..89dec7c2e71 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -52,9 +52,14 @@ import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; +import io.druid.query.extraction.TimeFormatExtractionFn; +import io.druid.query.filter.DimFilter; import io.druid.query.filter.JavaScriptDimFilter; +import io.druid.query.filter.OrDimFilter; import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SelectorDimFilter; import io.druid.query.groupby.having.EqualToHavingSpec; import io.druid.query.groupby.having.GreaterThanHavingSpec; import io.druid.query.groupby.having.HavingSpec; @@ -64,6 +69,7 @@ import io.druid.query.groupby.orderby.LimitSpec; import io.druid.query.groupby.orderby.OrderByColumnSpec; import io.druid.query.spec.MultipleIntervalSegmentSpec; import io.druid.segment.TestHelper; +import io.druid.segment.column.Column; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.Interval; @@ -322,8 +328,8 @@ public class GroupByQueryRunnerTest @Test public void testGroupByWithNullProducingDimExtractionFn() { - final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})"); - final DimExtractionFn nullExtractionFn = new DimExtractionFn() + final ExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})"); + final ExtractionFn nullExtractionFn = new DimExtractionFn() { @Override public byte[] getCacheKey() @@ -354,7 +360,7 @@ public class GroupByQueryRunnerTest )) .setGranularity(QueryRunnerTestHelper.dayGran) .setDimensions(Lists.newArrayList( - new ExtractionDimensionSpec("quality", "alias", nullExtractionFn) + new ExtractionDimensionSpec("quality", "alias", nullExtractionFn, null) )) .build(); @@ -389,8 +395,8 @@ public class GroupByQueryRunnerTest */ public void testGroupByWithEmptyStringProducingDimExtractionFn() { - final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})"); - final DimExtractionFn emptyStringExtractionFn = new DimExtractionFn() + final ExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})"); + final ExtractionFn emptyStringExtractionFn = new DimExtractionFn() { @Override public byte[] getCacheKey() @@ -421,7 +427,7 @@ public class GroupByQueryRunnerTest )) .setGranularity(QueryRunnerTestHelper.dayGran) .setDimensions(Lists.newArrayList( - new ExtractionDimensionSpec("quality", "alias", emptyStringExtractionFn) + new ExtractionDimensionSpec("quality", "alias", emptyStringExtractionFn, null) )) .build(); @@ -672,11 +678,13 @@ public class GroupByQueryRunnerTest final GroupByQuery allGranQuery = builder.copy().setGranularity(QueryGranularity.ALL).build(); QueryRunner mergedRunner = factory.getToolchest().mergeResults( - new QueryRunner() { + new QueryRunner() + { @Override public Sequence run( Query query, Map responseContext - ) { + ) + { // simulate two daily segments final Query query1 = query.withQuerySegmentSpec( new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-02/2011-04-03"))) @@ -2272,4 +2280,61 @@ public class GroupByQueryRunnerTest Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); TestHelper.assertExpectedObjects(expectedResults, results, ""); } + + @Test + public void testGroupByTimeExtraction() + { + GroupByQuery query = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval) + .setDimensions( + Lists.newArrayList( + new DefaultDimensionSpec("market", "market"), + new ExtractionDimensionSpec( + Column.TIME_COLUMN_NAME, + "dayOfWeek", + new TimeFormatExtractionFn("EEEE", null, null), + null + ) + ) + ) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.indexDoubleSum + ) + ) + .setPostAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .setGranularity(QueryRunnerTestHelper.allGran) + .setDimFilter( + new OrDimFilter( + Arrays.asList( + new SelectorDimFilter("market", "spot"), + new SelectorDimFilter("market", "upfront") + ) + ) + ) + .build(); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "spot", "index", 13219.574157714844, "rows", 117L, "addRowsIndexConstant", 13337.574157714844), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "spot", "index", 13557.738830566406, "rows", 117L, "addRowsIndexConstant", 13675.738830566406), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "spot", "index", 13493.751281738281, "rows", 117L, "addRowsIndexConstant", 13611.751281738281), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "spot", "index", 13585.541015625, "rows", 117L, "addRowsIndexConstant", 13703.541015625), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "spot", "index", 14279.127197265625, "rows", 126L, "addRowsIndexConstant", 14406.127197265625), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "spot", "index", 13199.471435546875, "rows", 117L, "addRowsIndexConstant", 13317.471435546875), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Wednesday", "market", "spot", "index", 14271.368591308594, "rows", 126L, "addRowsIndexConstant", 14398.368591308594), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "upfront", "index", 27297.8623046875, "rows", 26L, "addRowsIndexConstant", 27324.8623046875), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "upfront", "index", 27619.58447265625, "rows", 26L, "addRowsIndexConstant", 27646.58447265625), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "upfront", "index", 27820.83154296875, "rows", 26L, "addRowsIndexConstant", 27847.83154296875), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "upfront", "index", 24791.223876953125, "rows", 26L, "addRowsIndexConstant", 24818.223876953125), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "upfront", "index", 28562.748901367188, "rows", 28L, "addRowsIndexConstant", 28591.748901367188), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "upfront", "index", 26968.280639648438, "rows", 26L, "addRowsIndexConstant", 26995.280639648438), + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Wednesday", "market", "upfront", "index", 28985.5751953125, "rows", 28L, "addRowsIndexConstant", 29014.5751953125) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } } diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index ca74163ed3c..0379a256bbb 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -41,13 +41,14 @@ import io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.dimension.ExtractionDimensionSpec; import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; +import io.druid.query.extraction.TimeFormatExtractionFn; import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.spec.MultipleIntervalSegmentSpec; -import io.druid.query.timeseries.TimeseriesQuery; -import io.druid.query.timeseries.TimeseriesResultValue; import io.druid.segment.TestHelper; +import io.druid.segment.column.Column; import org.joda.time.DateTime; import org.joda.time.Interval; import org.junit.Test; @@ -1399,6 +1400,55 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query, context)); } + @Test + public void testTopNCollapsingDimExtraction() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.qualityDimension, + new RegexDimExtractionFn(".(.)"), null + ) + ) + .metric("index") + .threshold(2) + .intervals(QueryRunnerTestHelper.fullOnInterval) + .aggregators( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.indexDoubleSum + ) + ) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-01-12T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + QueryRunnerTestHelper.qualityDimension, "e", + "rows", 558L, + "index", 246645.1204032898, + "addRowsIndexConstant", 247204.1204032898 + ), + ImmutableMap.of( + QueryRunnerTestHelper.qualityDimension, "r", + "rows", 372L, + "index", 222051.08961486816, + "addRowsIndexConstant", 222424.08961486816 + ) + ) + ) + ) + ); + HashMap context = new HashMap(); + TestHelper.assertExpectedResults(expectedResults, runner.run(query, context)); + } + @Test public void testTopNDimExtraction() { @@ -1407,7 +1457,7 @@ public class TopNQueryRunnerTest .granularity(QueryRunnerTestHelper.allGran) .dimension( new ExtractionDimensionSpec( - QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)") + QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)"), null ) ) .metric("rows") @@ -1459,7 +1509,7 @@ public class TopNQueryRunnerTest .granularity(QueryRunnerTestHelper.allGran) .dimension( new ExtractionDimensionSpec( - QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)") + QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)"), null ) ) .metric(new LexicographicTopNMetricSpec(null)) @@ -1511,7 +1561,7 @@ public class TopNQueryRunnerTest .granularity(QueryRunnerTestHelper.allGran) .dimension( new ExtractionDimensionSpec( - QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("..(.)") + QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("..(.)"), null ) ) .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null))) @@ -1563,10 +1613,75 @@ public class TopNQueryRunnerTest .granularity(QueryRunnerTestHelper.allGran) .dimension( new ExtractionDimensionSpec( - QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)") + QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)"), null ) ) - .metric(new LexicographicTopNMetricSpec("spot")) + .metric(new LexicographicTopNMetricSpec("s")) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + QueryRunnerTestHelper.marketDimension, "t", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ), + ImmutableMap.of( + QueryRunnerTestHelper.marketDimension, "u", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D, + "uniques", QueryRunnerTestHelper.UNIQUES_2 + ) + ) + ) + ) + ); + HashMap context = new HashMap(); + TestHelper.assertExpectedResults(expectedResults, runner.run(query, context)); + } + + @Test + public void testTopNLexicographicDimExtractionWithSortingPreservedAndPreviousStop() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, + new DimExtractionFn() + { + @Override + public byte[] getCacheKey() + { + return new byte[0]; + } + + @Override + public String apply(String value) + { + return value.substring(0, 1); + } + + @Override + public boolean preservesOrdering() + { + return true; + } + }, null + ) + ) + .metric(new LexicographicTopNMetricSpec("s")) .threshold(4) .intervals(QueryRunnerTestHelper.firstToThird) .aggregators(QueryRunnerTestHelper.commonAggregators) @@ -1609,7 +1724,7 @@ public class TopNQueryRunnerTest .granularity(QueryRunnerTestHelper.allGran) .dimension( new ExtractionDimensionSpec( - QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)") + QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)"), null ) ) .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u"))) @@ -1654,7 +1769,7 @@ public class TopNQueryRunnerTest .granularity(QueryRunnerTestHelper.allGran) .dimension( new ExtractionDimensionSpec( - QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("..(.)") + QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("..(.)"), null ) ) .metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p"))) @@ -1693,7 +1808,7 @@ public class TopNQueryRunnerTest @Test public void testTopNWithNullProducingDimExtractionFn() { - final DimExtractionFn nullStringDimExtraction = new DimExtractionFn() + final ExtractionFn nullStringDimExtraction = new DimExtractionFn() { @Override public byte[] getCacheKey() @@ -1723,7 +1838,7 @@ public class TopNQueryRunnerTest .aggregators(QueryRunnerTestHelper.commonAggregators) .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) .dimension( - new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, nullStringDimExtraction) + new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, nullStringDimExtraction, null) ) .build(); @@ -1771,7 +1886,7 @@ public class TopNQueryRunnerTest */ public void testTopNWithEmptyStringProducingDimExtractionFn() { - final DimExtractionFn emptyStringDimExtraction = new DimExtractionFn() + final ExtractionFn emptyStringDimExtraction = new DimExtractionFn() { @Override public byte[] getCacheKey() @@ -1801,7 +1916,7 @@ public class TopNQueryRunnerTest .aggregators(QueryRunnerTestHelper.commonAggregators) .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) .dimension( - new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, emptyStringDimExtraction)) + new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, emptyStringDimExtraction, null)) .build(); @@ -2159,4 +2274,55 @@ public class TopNQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, runner.run(query, Maps.newHashMap())); } + + @Test + public void testTopNTimeExtraction() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension( + new ExtractionDimensionSpec( + Column.TIME_COLUMN_NAME, + "dayOfWeek", + new TimeFormatExtractionFn("EEEE", null, null), + null + ) + ) + .metric("index") + .threshold(2) + .intervals(QueryRunnerTestHelper.fullOnInterval) + .aggregators( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.indexDoubleSum + ) + ) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-01-12T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "dayOfWeek", "Wednesday", + "rows", 182L, + "index", 76010.28100585938, + "addRowsIndexConstant", 76193.28100585938 + ), + ImmutableMap.of( + "dayOfWeek", "Thursday", + "rows", 182L, + "index", 75203.26300811768, + "addRowsIndexConstant", 75386.26300811768 + ) + ) + ) + ) + ); + HashMap context = new HashMap(); + TestHelper.assertExpectedResults(expectedResults, runner.run(query, context)); + } } diff --git a/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java b/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java index c8045372b6f..f58436e666d 100644 --- a/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java @@ -17,7 +17,6 @@ package io.druid.segment.filter; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.metamx.collections.bitmap.BitmapFactory; import com.metamx.collections.bitmap.ConciseBitmapFactory; @@ -25,18 +24,15 @@ import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.bitmap.WrappedConciseBitmap; import com.metamx.collections.spatial.ImmutableRTree; import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; -import io.druid.query.filter.Filter; import io.druid.segment.data.ArrayIndexed; import io.druid.segment.data.Indexed; import it.uniroma3.mat.extendedset.intset.ConciseSet; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import java.util.Collection; import java.util.Map; /** @@ -94,7 +90,7 @@ public class ExtractionDimFilterTest return null; } }; - private static final DimExtractionFn DIM_EXTRACTION_FN = new DimExtractionFn() + private static final ExtractionFn DIM_EXTRACTION_FN = new DimExtractionFn() { @Override public byte[] getCacheKey() diff --git a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java index d8d5dcb4265..61f3686bfe4 100644 --- a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java @@ -273,7 +273,7 @@ public class IncrementalIndexStorageAdapterTest Cursor cursor = Sequences.toList(Sequences.limit(cursorSequence, 1), Lists.newArrayList()).get(0); DimensionSelector dimSelector; - dimSelector = cursor.makeDimensionSelector("sally"); + dimSelector = cursor.makeDimensionSelector("sally", null); Assert.assertEquals("bo", dimSelector.lookupName(dimSelector.getRow().get(0))); index.add( @@ -287,7 +287,7 @@ public class IncrementalIndexStorageAdapterTest // Cursor reset should not be affected by out of order values cursor.reset(); - dimSelector = cursor.makeDimensionSelector("sally"); + dimSelector = cursor.makeDimensionSelector("sally", null); Assert.assertEquals("bo", dimSelector.lookupName(dimSelector.getRow().get(0))); }