mirror of https://github.com/apache/druid.git
Add ability to apply extraction functions to the time dimension
- Moves DimExtractionFn under a more generic ExtractionFn interface to support extracting dimension values other than strings - pushes down extractionFn to the storage adapter from query engine - 'dimExtractionFn' parameter has been deprecated in favor of 'extractionFn' - adds a TimeFormatExtractionFn, allowing to project the '__time' dimension - JavascriptDimExtractionFn renamed to JavascriptExtractionFn, adding support for any dimension value types that map directly to Javascript - update documentation for time column extraction and related changes
This commit is contained in:
parent
c97fbdf616
commit
d3f5bddc5c
|
@ -1,14 +1,16 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
|
||||
# Transforming Dimension Values
|
||||
The following JSON fields can be used in a query to operate on dimension values.
|
||||
|
||||
The following JSON fields can be used in a query to operate on dimension values.
|
||||
|
||||
## DimensionSpec
|
||||
|
||||
`DimensionSpec`s define how dimension values get transformed prior to aggregation.
|
||||
|
||||
### DefaultDimensionSpec
|
||||
### Default DimensionSpec
|
||||
|
||||
Returns dimension values as is and optionally renames the dimension.
|
||||
|
||||
|
@ -16,63 +18,139 @@ Returns dimension values as is and optionally renames the dimension.
|
|||
{ "type" : "default", "dimension" : <dimension>, "outputName": <output_name> }
|
||||
```
|
||||
|
||||
### ExtractionDimensionSpec
|
||||
### Extraction DimensionSpec
|
||||
|
||||
Returns dimension values transformed using the given [DimExtractionFn](#toc_3)
|
||||
Returns dimension values transformed using the given [extraction function](#extraction-functions).
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "extraction",
|
||||
"dimension" : <dimension>,
|
||||
"outputName" : <output_name>,
|
||||
"dimExtractionFn" : <dim_extraction_fn>
|
||||
"extractionFn" : <extraction_function>
|
||||
}
|
||||
```
|
||||
|
||||
## <a id="toc_3"></a>DimExtractionFn
|
||||
## Extraction Functions
|
||||
|
||||
`DimExtractionFn`s define the transformation applied to each dimension value
|
||||
Extraction functions define the transformation applied to each dimension value.
|
||||
|
||||
### RegexDimExtractionFn
|
||||
Transformations can be applied to both regular (string) dimensions, as well
|
||||
as the special `__time` dimension, which represents the current time bucket
|
||||
according to the query [aggregation granularity](Granularities.html).
|
||||
|
||||
Returns the first group matched by the given regular expression. If there is no match it returns the dimension value as is.
|
||||
**Note**: for functions taking string values (such as regular expressions),
|
||||
`__time` dimension values will be formatted in [ISO-8601 format](https://en.wikipedia.org/wiki/ISO_8601)
|
||||
before getting passed to the extraction function.
|
||||
|
||||
### Regular Expression Extraction Function
|
||||
|
||||
Returns the first matching group for the given regular expression.
|
||||
If there is no match, it returns the dimension value as is.
|
||||
|
||||
```json
|
||||
{ "type" : "regex", "expr", <regular_expression> }
|
||||
{ "type" : "regex", "expr" : <regular_expression> }
|
||||
```
|
||||
|
||||
### PartialDimExtractionFn
|
||||
For example, using `"expr" : "(\\w\\w\\w).*"` will transform
|
||||
`'Monday'`, `'Tuesday'`, `'Wednesday'` into `'Mon'`, `'Tue'`, `'Wed'`.
|
||||
|
||||
Returns the dimension value as is if there is a match, otherwise returns null.
|
||||
### Partial Extraction Function
|
||||
|
||||
Returns the dimension value unchanged if the regular expression matches, otherwise returns null.
|
||||
|
||||
```json
|
||||
{ "type" : "partial", "expr", <regular_expression> }
|
||||
{ "type" : "partial", "expr" : <regular_expression> }
|
||||
```
|
||||
|
||||
### SearchQuerySpecDimExtractionFn
|
||||
### Search Query Extraction Function
|
||||
|
||||
Returns the dimension value as is if the given [SearchQuerySpec](SearchQuerySpec.html) matches, otherwise returns null.
|
||||
Returns the dimension value unchanged if the given [`SearchQuerySpec`](SearchQuerySpec.html)
|
||||
matches, otherwise returns null.
|
||||
|
||||
```json
|
||||
{ "type" : "searchQuery", "query" : <search_query_spec> }
|
||||
```
|
||||
|
||||
### TimeDimExtractionFn
|
||||
### Time Format Extraction Function
|
||||
|
||||
Parses dimension values as timestamps using the given input format, and returns them formatted using the given output format. Time formats follow the [com.ibm.icu.text.SimpleDateFormat](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) format
|
||||
Returns the dimension value formatted according to the given format string, time zone, and locale.
|
||||
|
||||
For `__time` dimension values, this formats the time value bucketed by the
|
||||
[aggregation granularity](Granularities.html)
|
||||
|
||||
For a regular dimension, it assumes the string is formatted in
|
||||
[ISO-8601 date and time format](https://en.wikipedia.org/wiki/ISO_8601).
|
||||
|
||||
* `format` : date time format for the resulting dimension value, in [Joda Time DateTimeFormat](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html).
|
||||
* `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](http://www.oracle.com/technetwork/java/javase/java8locales-2095355.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc.
|
||||
* `timeZone` : time zone to use in [IANA tz database format](http://en.wikipedia.org/wiki/List_of_tz_database_time_zones), e.g. `Europe/Berlin` (this can possibly be different than the aggregation time-zone)
|
||||
|
||||
```json
|
||||
{ "type" : "time", "timeFormat" : <input_format>, "resultFormat" : <output_format> }
|
||||
{ "type" : "timeFormat",
|
||||
"format" : <output_format>,
|
||||
"timeZone" : <time_zone> (optional),
|
||||
"locale" : <locale> (optional) }
|
||||
```
|
||||
|
||||
### JavascriptDimExtractionFn
|
||||
For example, the following dimension spec returns the day of the week for Montréal in French:
|
||||
|
||||
Returns the dimension value as transformed by the given JavaScript function.
|
||||
```json
|
||||
{
|
||||
"type" : "extraction",
|
||||
"dimension" : "__time",
|
||||
"outputName" : "dayOfWeek",
|
||||
"extractionFn" : {
|
||||
"type" : "timeFormat",
|
||||
"format" : "EEEE",
|
||||
"timeZone" : "America/Montreal",
|
||||
"locale" : "fr"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Example
|
||||
### Time Parsing Extraction Function
|
||||
|
||||
Parses dimension values as timestamps using the given input format,
|
||||
and returns them formatted using the given output format.
|
||||
|
||||
Note, if you are working with the `__time` dimension, you should consider using the
|
||||
[time extraction function instead](#time-format-extraction-function) instead,
|
||||
which works on time value directly as opposed to string values.
|
||||
|
||||
Time formats are described in the
|
||||
[SimpleDateFormat documentation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html)
|
||||
|
||||
```json
|
||||
{ "type" : "time",
|
||||
"timeFormat" : <input_format>,
|
||||
"resultFormat" : <output_format> }
|
||||
```
|
||||
|
||||
|
||||
### Javascript Extraction Function
|
||||
|
||||
Returns the dimension value, as transformed by the given JavaScript function.
|
||||
|
||||
For regular dimensions, the input value is passed as a string.
|
||||
|
||||
For the `__time` dimension, the input value is passed as a number
|
||||
representing the number of milliseconds since January 1, 1970 UTC.
|
||||
|
||||
Example for a regular dimension
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "javascript",
|
||||
"function" : "function(str) { return str.substr(0, 3); }"
|
||||
}
|
||||
```
|
||||
|
||||
Example for the `__time` dimension:
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "javascript",
|
||||
"function" : "function(t) { return 'Second ' + Math.floor((t % 60000) / 1000); }"
|
||||
}
|
||||
```
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
|
||||
# Aggregation Granularity
|
||||
The granularity field determines how data gets bucketed across the time dimension, or how it gets aggregated by hour, day, minute, etc.
|
||||
|
||||
|
|
|
@ -256,7 +256,7 @@ public class IngestSegmentFirehoseFactory implements FirehoseFactory<InputRowPar
|
|||
|
||||
final Map<String, DimensionSelector> dimSelectors = Maps.newHashMap();
|
||||
for (String dim : dims) {
|
||||
final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim);
|
||||
final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim, null);
|
||||
// dimSelector is null if the dimension is not present
|
||||
if (dimSelector != null) {
|
||||
dimSelectors.put(dim, dimSelector);
|
||||
|
|
|
@ -105,7 +105,7 @@ public class CardinalityAggregatorFactory implements AggregatorFactory
|
|||
@Override
|
||||
public DimensionSelector apply(@Nullable String input)
|
||||
{
|
||||
return columnFactory.makeDimensionSelector(input);
|
||||
return columnFactory.makeDimensionSelector(input, null);
|
||||
}
|
||||
}
|
||||
), Predicates.notNull()
|
||||
|
|
|
@ -20,7 +20,7 @@ package io.druid.query.dimension;
|
|||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.metamx.common.StringUtils;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
|
@ -59,7 +59,7 @@ public class DefaultDimensionSpec implements DimensionSpec
|
|||
}
|
||||
|
||||
@Override
|
||||
public DimExtractionFn getDimExtractionFn()
|
||||
public ExtractionFn getExtractionFn()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package io.druid.query.dimension;
|
|||
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -34,7 +34,7 @@ public interface DimensionSpec
|
|||
|
||||
public String getOutputName();
|
||||
|
||||
public DimExtractionFn getDimExtractionFn();
|
||||
public ExtractionFn getExtractionFn();
|
||||
|
||||
public byte[] getCacheKey();
|
||||
|
||||
|
|
|
@ -19,8 +19,9 @@ package io.druid.query.dimension;
|
|||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.metamx.common.StringUtils;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
|
@ -31,18 +32,23 @@ public class ExtractionDimensionSpec implements DimensionSpec
|
|||
private static final byte CACHE_TYPE_ID = 0x1;
|
||||
|
||||
private final String dimension;
|
||||
private final DimExtractionFn dimExtractionFn;
|
||||
private final ExtractionFn extractionFn;
|
||||
private final String outputName;
|
||||
|
||||
@JsonCreator
|
||||
public ExtractionDimensionSpec(
|
||||
@JsonProperty("dimension") String dimension,
|
||||
@JsonProperty("outputName") String outputName,
|
||||
@JsonProperty("dimExtractionFn") DimExtractionFn dimExtractionFn
|
||||
@JsonProperty("extractionFn") ExtractionFn extractionFn,
|
||||
// for backwards compatibility
|
||||
@Deprecated @JsonProperty("dimExtractionFn") ExtractionFn dimExtractionFn
|
||||
)
|
||||
{
|
||||
Preconditions.checkNotNull(dimension, "dimension must not be null");
|
||||
Preconditions.checkArgument(extractionFn != null || dimExtractionFn != null, "extractionFn must not be null");
|
||||
|
||||
this.dimension = dimension;
|
||||
this.dimExtractionFn = dimExtractionFn;
|
||||
this.extractionFn = extractionFn != null ? extractionFn : dimExtractionFn;
|
||||
|
||||
// Do null check for backwards compatibility
|
||||
this.outputName = outputName == null ? dimension : outputName;
|
||||
|
@ -64,16 +70,16 @@ public class ExtractionDimensionSpec implements DimensionSpec
|
|||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public DimExtractionFn getDimExtractionFn()
|
||||
public ExtractionFn getExtractionFn()
|
||||
{
|
||||
return dimExtractionFn;
|
||||
return extractionFn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
byte[] dimensionBytes = StringUtils.toUtf8(dimension);
|
||||
byte[] dimExtractionFnBytes = dimExtractionFn.getCacheKey();
|
||||
byte[] dimExtractionFnBytes = extractionFn.getCacheKey();
|
||||
|
||||
return ByteBuffer.allocate(1 + dimensionBytes.length + dimExtractionFnBytes.length)
|
||||
.put(CACHE_TYPE_ID)
|
||||
|
@ -85,7 +91,7 @@ public class ExtractionDimensionSpec implements DimensionSpec
|
|||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return dimExtractionFn.preservesOrdering();
|
||||
return extractionFn.preservesOrdering();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -93,7 +99,7 @@ public class ExtractionDimensionSpec implements DimensionSpec
|
|||
{
|
||||
return "ExtractionDimensionSpec{" +
|
||||
"dimension='" + dimension + '\'' +
|
||||
", dimExtractionFn=" + dimExtractionFn +
|
||||
", extractionFn=" + extractionFn +
|
||||
", outputName='" + outputName + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
@ -106,7 +112,7 @@ public class ExtractionDimensionSpec implements DimensionSpec
|
|||
|
||||
ExtractionDimensionSpec that = (ExtractionDimensionSpec) o;
|
||||
|
||||
if (dimExtractionFn != null ? !dimExtractionFn.equals(that.dimExtractionFn) : that.dimExtractionFn != null)
|
||||
if (extractionFn != null ? !extractionFn.equals(that.extractionFn) : that.extractionFn != null)
|
||||
return false;
|
||||
if (dimension != null ? !dimension.equals(that.dimension) : that.dimension != null) return false;
|
||||
if (outputName != null ? !outputName.equals(that.outputName) : that.outputName != null) return false;
|
||||
|
@ -118,7 +124,7 @@ public class ExtractionDimensionSpec implements DimensionSpec
|
|||
public int hashCode()
|
||||
{
|
||||
int result = dimension != null ? dimension.hashCode() : 0;
|
||||
result = 31 * result + (dimExtractionFn != null ? dimExtractionFn.hashCode() : 0);
|
||||
result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0);
|
||||
result = 31 * result + (outputName != null ? outputName.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -1,73 +1,35 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright 2012 - 2015 Metamarkets Group Inc.
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.query.extraction;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
|
||||
/**
|
||||
*/
|
||||
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
|
||||
@JsonSubTypes(value = {
|
||||
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "partial", value = MatchingDimExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "javascript", value = JavascriptDimExtractionFn.class)
|
||||
})
|
||||
/**
|
||||
* A DimExtractionFn is a function that can be used to modify the values of a dimension column.
|
||||
*
|
||||
* A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a
|
||||
* regular expression with a capture group. When the regular expression matches the value of a dimension,
|
||||
* the value captured by the group is used for grouping operations instead of the dimension value.
|
||||
*/
|
||||
public interface DimExtractionFn
|
||||
public abstract class DimExtractionFn implements ExtractionFn
|
||||
{
|
||||
/**
|
||||
* Returns a byte[] unique to all concrete implementations of DimExtractionFn. This byte[] is used to
|
||||
* generate a cache key for the specific query.
|
||||
*
|
||||
* @return a byte[] unit to all concrete implements of DimExtractionFn
|
||||
*/
|
||||
public byte[] getCacheKey();
|
||||
@Override
|
||||
public String apply(Object value)
|
||||
{
|
||||
return apply(value.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* The "extraction" function. This should map a dimension value into some other value.
|
||||
*
|
||||
* In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the
|
||||
* empty string is considered invalid output for this method and should instead return null. This is
|
||||
* a contract on the method rather than enforced at a lower level in order to eliminate a global check
|
||||
* for extraction functions that do not already need one.
|
||||
*
|
||||
*
|
||||
* @param dimValue the original value of the dimension
|
||||
* @return a value that should be used instead of the original
|
||||
*/
|
||||
public String apply(String dimValue);
|
||||
|
||||
/**
|
||||
* Offers information on whether the extraction will preserve the original ordering of the values.
|
||||
*
|
||||
* Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards
|
||||
* ordering.
|
||||
*
|
||||
* @return true if ordering is preserved, false otherwise
|
||||
*/
|
||||
public boolean preservesOrdering();
|
||||
@Override
|
||||
public String apply(long value)
|
||||
{
|
||||
return apply(Long.toString(value));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright 2012 - 2015 Metamarkets Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.druid.query.extraction;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
|
||||
/**
|
||||
*/
|
||||
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
|
||||
@JsonSubTypes(value = {
|
||||
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "partial", value = MatchingDimExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "javascript", value = JavascriptExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class)
|
||||
})
|
||||
/**
|
||||
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
|
||||
*
|
||||
* A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a
|
||||
* regular expression with a capture group. When the regular expression matches the value of a dimension,
|
||||
* the value captured by the group is used for grouping operations instead of the dimension value.
|
||||
*/
|
||||
public interface ExtractionFn
|
||||
{
|
||||
/**
|
||||
* Returns a byte[] unique to all concrete implementations of DimExtractionFn. This byte[] is used to
|
||||
* generate a cache key for the specific query.
|
||||
*
|
||||
* @return a byte[] unit to all concrete implements of DimExtractionFn
|
||||
*/
|
||||
public byte[] getCacheKey();
|
||||
|
||||
/**
|
||||
* The "extraction" function. This should map a value into some other String value.
|
||||
*
|
||||
* In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the
|
||||
* empty string is considered invalid output for this method and should instead return null. This is
|
||||
* a contract on the method rather than enforced at a lower level in order to eliminate a global check
|
||||
* for extraction functions that do not already need one.
|
||||
*
|
||||
*
|
||||
* @param value the original value of the dimension
|
||||
* @return a value that should be used instead of the original
|
||||
*/
|
||||
public String apply(Object value);
|
||||
|
||||
public String apply(String value);
|
||||
|
||||
public String apply(long value);
|
||||
|
||||
/**
|
||||
* Offers information on whether the extraction will preserve the original ordering of the values.
|
||||
*
|
||||
* Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards
|
||||
* ordering.
|
||||
*
|
||||
* @return true if ordering is preserved, false otherwise
|
||||
*/
|
||||
public boolean preservesOrdering();
|
||||
}
|
|
@ -22,17 +22,15 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Strings;
|
||||
import com.metamx.common.StringUtils;
|
||||
import com.google.common.base.Strings;
|
||||
import com.metamx.common.StringUtils;
|
||||
import org.mozilla.javascript.Context;
|
||||
import org.mozilla.javascript.ContextFactory;
|
||||
import org.mozilla.javascript.ScriptableObject;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class JavascriptDimExtractionFn implements DimExtractionFn
|
||||
public class JavascriptExtractionFn implements ExtractionFn
|
||||
{
|
||||
private static Function<String, String> compile(String function) {
|
||||
private static Function<Object, String> compile(String function) {
|
||||
final ContextFactory contextFactory = ContextFactory.getGlobal();
|
||||
final Context context = contextFactory.enterContext();
|
||||
context.setOptimizationLevel(9);
|
||||
|
@ -43,9 +41,9 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
|
|||
Context.exit();
|
||||
|
||||
|
||||
return new Function<String, String>()
|
||||
return new Function<Object, String>()
|
||||
{
|
||||
public String apply(String input)
|
||||
public String apply(Object input)
|
||||
{
|
||||
// ideally we need a close() function to discard the context once it is not used anymore
|
||||
Context cx = Context.getCurrentContext();
|
||||
|
@ -53,7 +51,7 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
|
|||
cx = contextFactory.enterContext();
|
||||
}
|
||||
|
||||
final Object res = fn.call(cx, scope, scope, new String[]{input});
|
||||
final Object res = fn.call(cx, scope, scope, new Object[]{input});
|
||||
return res != null ? Context.toString(res) : null;
|
||||
}
|
||||
};
|
||||
|
@ -62,10 +60,10 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
|
|||
private static final byte CACHE_TYPE_ID = 0x4;
|
||||
|
||||
private final String function;
|
||||
private final Function<String, String> fn;
|
||||
private final Function<Object, String> fn;
|
||||
|
||||
@JsonCreator
|
||||
public JavascriptDimExtractionFn(
|
||||
public JavascriptExtractionFn(
|
||||
@JsonProperty("function") String function
|
||||
)
|
||||
{
|
||||
|
@ -90,10 +88,21 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
|
|||
}
|
||||
|
||||
@Override
|
||||
public String apply(String dimValue)
|
||||
public String apply(Object value)
|
||||
{
|
||||
String retVal = fn.apply(dimValue);
|
||||
return Strings.isNullOrEmpty(retVal) ? null : retVal;
|
||||
return Strings.emptyToNull(fn.apply(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(String value)
|
||||
{
|
||||
return this.apply((Object)value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(long value)
|
||||
{
|
||||
return this.apply((Long)value);
|
||||
}
|
||||
|
||||
@Override
|
|
@ -27,7 +27,7 @@ import java.util.regex.Pattern;
|
|||
|
||||
/**
|
||||
*/
|
||||
public class MatchingDimExtractionFn implements DimExtractionFn
|
||||
public class MatchingDimExtractionFn extends DimExtractionFn
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x2;
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ import java.util.regex.Pattern;
|
|||
|
||||
/**
|
||||
*/
|
||||
public class RegexDimExtractionFn implements DimExtractionFn
|
||||
public class RegexDimExtractionFn extends DimExtractionFn
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x1;
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.nio.ByteBuffer;
|
|||
|
||||
/**
|
||||
*/
|
||||
public class SearchQuerySpecDimExtractionFn implements DimExtractionFn
|
||||
public class SearchQuerySpecDimExtractionFn extends DimExtractionFn
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x3;
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ import java.util.Date;
|
|||
|
||||
/**
|
||||
*/
|
||||
public class TimeDimExtractionFn implements DimExtractionFn
|
||||
public class TimeDimExtractionFn extends DimExtractionFn
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x0;
|
||||
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.query.extraction;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.metamx.common.StringUtils;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Locale;
|
||||
|
||||
public class TimeFormatExtractionFn implements ExtractionFn
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x5;
|
||||
|
||||
private final DateTimeZone tz;
|
||||
private final String pattern;
|
||||
private final Locale locale;
|
||||
private final DateTimeFormatter formatter;
|
||||
|
||||
public TimeFormatExtractionFn(
|
||||
@JsonProperty("format") String pattern,
|
||||
@JsonProperty("timeZone") DateTimeZone tz,
|
||||
@JsonProperty("locale") String localeString
|
||||
)
|
||||
{
|
||||
Preconditions.checkArgument(pattern != null, "format cannot be null");
|
||||
|
||||
this.pattern = pattern;
|
||||
this.tz = tz;
|
||||
this.locale = localeString == null ? null : Locale.forLanguageTag(localeString);
|
||||
this.formatter = DateTimeFormat.forPattern(pattern)
|
||||
.withZone(tz == null ? DateTimeZone.UTC : tz)
|
||||
.withLocale(locale);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public DateTimeZone getTimeZone()
|
||||
{
|
||||
return tz;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getFormat()
|
||||
{
|
||||
return pattern;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getLocale()
|
||||
{
|
||||
if (locale != null) {
|
||||
return locale.toLanguageTag();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
byte[] exprBytes = StringUtils.toUtf8(pattern + "\u0001" + tz.getID() + "\u0001" + locale.toLanguageTag());
|
||||
return ByteBuffer.allocate(1 + exprBytes.length)
|
||||
.put(CACHE_TYPE_ID)
|
||||
.put(exprBytes)
|
||||
.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(long value)
|
||||
{
|
||||
return formatter.print(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(Object value)
|
||||
{
|
||||
return formatter.print(new DateTime(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(String value)
|
||||
{
|
||||
return apply((Object)value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
|
|||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.metamx.common.StringUtils;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
|
@ -31,22 +31,24 @@ public class ExtractionDimFilter implements DimFilter
|
|||
{
|
||||
private final String dimension;
|
||||
private final String value;
|
||||
private final DimExtractionFn dimExtractionFn;
|
||||
private final ExtractionFn extractionFn;
|
||||
|
||||
@JsonCreator
|
||||
public ExtractionDimFilter(
|
||||
@JsonProperty("dimension") String dimension,
|
||||
@JsonProperty("value") String value,
|
||||
@JsonProperty("dimExtractionFn") DimExtractionFn dimExtractionFn
|
||||
@JsonProperty("extractionFn") ExtractionFn extractionFn,
|
||||
// for backwards compatibility
|
||||
@Deprecated @JsonProperty("dimExtractionFn") ExtractionFn dimExtractionFn
|
||||
)
|
||||
{
|
||||
Preconditions.checkArgument(dimension != null, "dimension must not be null");
|
||||
Preconditions.checkArgument(value != null, "value must not be null");
|
||||
Preconditions.checkArgument(dimExtractionFn != null, "extraction function must not be null");
|
||||
Preconditions.checkArgument(extractionFn != null || dimExtractionFn != null, "extraction function must not be null");
|
||||
|
||||
this.dimension = dimension;
|
||||
this.value = value;
|
||||
this.dimExtractionFn = dimExtractionFn;
|
||||
this.extractionFn = extractionFn != null ? extractionFn : dimExtractionFn;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
|
@ -62,9 +64,9 @@ public class ExtractionDimFilter implements DimFilter
|
|||
}
|
||||
|
||||
@JsonProperty
|
||||
public DimExtractionFn getDimExtractionFn()
|
||||
public ExtractionFn getExtractionFn()
|
||||
{
|
||||
return dimExtractionFn;
|
||||
return extractionFn;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -83,6 +85,6 @@ public class ExtractionDimFilter implements DimFilter
|
|||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return String.format("%s(%s) = %s", dimExtractionFn, dimension, value);
|
||||
return String.format("%s(%s) = %s", extractionFn, dimension, value);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,10 +42,11 @@ import io.druid.query.aggregation.AggregatorFactory;
|
|||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
import io.druid.query.dimension.DimensionSpec;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.segment.Cursor;
|
||||
import io.druid.segment.DimensionSelector;
|
||||
import io.druid.segment.StorageAdapter;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
import io.druid.segment.filter.Filters;
|
||||
import org.joda.time.DateTime;
|
||||
|
@ -300,7 +301,7 @@ public class GroupByQueryEngine
|
|||
private List<ByteBuffer> unprocessedKeys;
|
||||
private Iterator<Row> delegate;
|
||||
|
||||
public RowIterator(GroupByQuery query, Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config)
|
||||
public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config)
|
||||
{
|
||||
this.query = query;
|
||||
this.cursor = cursor;
|
||||
|
@ -312,9 +313,13 @@ public class GroupByQueryEngine
|
|||
dimensionSpecs = query.getDimensions();
|
||||
dimensions = Lists.newArrayListWithExpectedSize(dimensionSpecs.size());
|
||||
dimNames = Lists.newArrayListWithExpectedSize(dimensionSpecs.size());
|
||||
|
||||
for (int i = 0; i < dimensionSpecs.size(); ++i) {
|
||||
final DimensionSpec dimSpec = dimensionSpecs.get(i);
|
||||
final DimensionSelector selector = cursor.makeDimensionSelector(dimSpec.getDimension());
|
||||
final DimensionSelector selector = cursor.makeDimensionSelector(
|
||||
dimSpec.getDimension(),
|
||||
dimSpec.getExtractionFn()
|
||||
);
|
||||
if (selector != null) {
|
||||
dimensions.add(selector);
|
||||
dimNames.add(dimSpec.getOutputName());
|
||||
|
@ -395,14 +400,9 @@ public class GroupByQueryEngine
|
|||
ByteBuffer keyBuffer = input.getKey().duplicate();
|
||||
for (int i = 0; i < dimensions.size(); ++i) {
|
||||
final DimensionSelector dimSelector = dimensions.get(i);
|
||||
final DimExtractionFn fn = dimensionSpecs.get(i).getDimExtractionFn();
|
||||
final int dimVal = keyBuffer.getInt();
|
||||
if (dimSelector.getValueCardinality() != dimVal) {
|
||||
if (fn != null) {
|
||||
theEvent.put(dimNames.get(i), fn.apply(dimSelector.lookupName(dimVal)));
|
||||
} else {
|
||||
theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal));
|
||||
}
|
||||
theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -161,7 +161,8 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
|||
|
||||
Map<String, DimensionSelector> dimSelectors = Maps.newHashMap();
|
||||
for (String dim : dimsToSearch) {
|
||||
dimSelectors.put(dim, cursor.makeDimensionSelector(dim));
|
||||
// switching to using DimensionSpec for search would allow the use of extractionFn here.
|
||||
dimSelectors.put(dim, cursor.makeDimensionSelector(dim, null));
|
||||
}
|
||||
|
||||
while (!cursor.isDone()) {
|
||||
|
|
|
@ -86,7 +86,8 @@ public class SelectQueryEngine
|
|||
|
||||
final Map<String, DimensionSelector> dimSelectors = Maps.newHashMap();
|
||||
for (String dim : dims) {
|
||||
final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim);
|
||||
// switching to using DimensionSpec for select would allow the use of extractionFn here.
|
||||
final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim, null);
|
||||
dimSelectors.put(dim, dimSelector);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package io.druid.query.topn;
|
|||
|
||||
import com.google.common.collect.Maps;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.segment.Capabilities;
|
||||
import io.druid.segment.Cursor;
|
||||
import io.druid.segment.DimensionSelector;
|
||||
|
@ -44,7 +45,8 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
|
|||
|
||||
@Override
|
||||
public TopNParams makeInitParams(
|
||||
final DimensionSelector dimSelector, final Cursor cursor
|
||||
final DimensionSelector dimSelector,
|
||||
final Cursor cursor
|
||||
)
|
||||
{
|
||||
return new TopNParams(
|
||||
|
@ -64,11 +66,10 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
|
|||
params.getCardinality()
|
||||
);
|
||||
|
||||
if (!query.getDimensionSpec().preservesOrdering()) {
|
||||
return provider.build();
|
||||
}
|
||||
|
||||
return query.getTopNMetricSpec().configureOptimizer(provider).build();
|
||||
// Unlike regular topN we cannot rely on ordering to optimize.
|
||||
// Optimization possibly requires a reverse lookup from value to ID, which is
|
||||
// not possible when applying an extraction function
|
||||
return provider.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -98,11 +99,11 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
|
|||
final IndexedInts dimValues = dimSelector.getRow();
|
||||
|
||||
for (int i = 0; i < dimValues.size(); ++i) {
|
||||
final int dimIndex = dimValues.get(i);
|
||||
|
||||
final int dimIndex = dimValues.get(i);
|
||||
Aggregator[] theAggregators = rowSelector[dimIndex];
|
||||
if (theAggregators == null) {
|
||||
String key = query.getDimensionSpec().getDimExtractionFn().apply(dimSelector.lookupName(dimIndex));
|
||||
final String key = dimSelector.lookupName(dimIndex);
|
||||
theAggregators = aggregatesStore.get(key);
|
||||
if (theAggregators == null) {
|
||||
theAggregators = makeAggregators(cursor, query.getAggregatorSpecs());
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.query.topn;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.segment.Capabilities;
|
||||
import io.druid.segment.Cursor;
|
||||
import io.druid.segment.DimensionSelector;
|
||||
import io.druid.segment.LongColumnSelector;
|
||||
import io.druid.segment.column.Column;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class TimeExtractionTopNAlgorithm extends BaseTopNAlgorithm<int[], Map<String, Aggregator[]>, TopNParams>
|
||||
{
|
||||
public static final int[] EMPTY_INTS = new int[]{};
|
||||
private final TopNQuery query;
|
||||
|
||||
public TimeExtractionTopNAlgorithm(Capabilities capabilities, TopNQuery query)
|
||||
{
|
||||
super(capabilities);
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TopNParams makeInitParams(DimensionSelector dimSelector, Cursor cursor)
|
||||
{
|
||||
return new TopNParams(
|
||||
dimSelector,
|
||||
cursor,
|
||||
dimSelector.getValueCardinality(),
|
||||
Integer.MAX_VALUE
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess)
|
||||
{
|
||||
return EMPTY_INTS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int[] updateDimValSelector(int[] dimValSelector, int numProcessed, int numToProcess)
|
||||
{
|
||||
return dimValSelector;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Aggregator[]> makeDimValAggregateStore(TopNParams params)
|
||||
{
|
||||
return Maps.newHashMap();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void scanAndAggregate(
|
||||
TopNParams params, int[] dimValSelector, Map<String, Aggregator[]> aggregatesStore, int numProcessed
|
||||
)
|
||||
{
|
||||
final Cursor cursor = params.getCursor();
|
||||
final DimensionSelector dimSelector = params.getDimSelector();
|
||||
|
||||
while (!cursor.isDone()) {
|
||||
final String key = dimSelector.lookupName(dimSelector.getRow().get(0));
|
||||
|
||||
Aggregator[] theAggregators = aggregatesStore.get(key);
|
||||
if (theAggregators == null) {
|
||||
theAggregators = makeAggregators(cursor, query.getAggregatorSpecs());
|
||||
aggregatesStore.put(key, theAggregators);
|
||||
}
|
||||
|
||||
for (Aggregator aggregator : theAggregators) {
|
||||
aggregator.aggregate();
|
||||
}
|
||||
|
||||
cursor.advance();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void updateResults(
|
||||
TopNParams params,
|
||||
int[] dimValSelector,
|
||||
Map<String, Aggregator[]> aggregatesStore,
|
||||
TopNResultBuilder resultBuilder
|
||||
)
|
||||
{
|
||||
for (Map.Entry<String, Aggregator[]> entry : aggregatesStore.entrySet()) {
|
||||
Aggregator[] aggs = entry.getValue();
|
||||
if (aggs != null && aggs.length > 0) {
|
||||
Object[] vals = new Object[aggs.length];
|
||||
for (int i = 0; i < aggs.length; i++) {
|
||||
vals[i] = aggs[i].get();
|
||||
}
|
||||
|
||||
resultBuilder.addEntry(
|
||||
entry.getKey(),
|
||||
entry.getKey(),
|
||||
vals
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void closeAggregators(Map<String, Aggregator[]> stringMap)
|
||||
{
|
||||
for (Aggregator[] aggregators : stringMap.values()) {
|
||||
for (Aggregator agg : aggregators) {
|
||||
agg.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cleanup(TopNParams params)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
|
@ -24,7 +24,7 @@ public class TopNAlgorithmSelector
|
|||
private final int cardinality;
|
||||
private final int numBytesPerRecord;
|
||||
|
||||
private volatile boolean hasDimExtractionFn;
|
||||
private volatile boolean hasExtractionFn;
|
||||
private volatile boolean aggregateAllMetrics;
|
||||
private volatile boolean aggregateTopNMetricFirst;
|
||||
|
||||
|
@ -34,9 +34,9 @@ public class TopNAlgorithmSelector
|
|||
this.numBytesPerRecord = numBytesPerRecord;
|
||||
}
|
||||
|
||||
public void setHasDimExtractionFn(boolean hasDimExtractionFn)
|
||||
public void setHasExtractionFn(boolean hasExtractionFn)
|
||||
{
|
||||
this.hasDimExtractionFn = hasDimExtractionFn;
|
||||
this.hasExtractionFn = hasExtractionFn;
|
||||
}
|
||||
|
||||
public void setAggregateAllMetrics(boolean aggregateAllMetrics)
|
||||
|
@ -53,9 +53,9 @@ public class TopNAlgorithmSelector
|
|||
}
|
||||
}
|
||||
|
||||
public boolean isHasDimExtractionFn()
|
||||
public boolean isHasExtractionFn()
|
||||
{
|
||||
return hasDimExtractionFn;
|
||||
return hasExtractionFn;
|
||||
}
|
||||
|
||||
public boolean isAggregateAllMetrics()
|
||||
|
|
|
@ -41,7 +41,10 @@ public class TopNMapFn implements Function<Cursor, Result<TopNResultValue>>
|
|||
@SuppressWarnings("unchecked")
|
||||
public Result<TopNResultValue> apply(Cursor cursor)
|
||||
{
|
||||
final DimensionSelector dimSelector = cursor.makeDimensionSelector(query.getDimensionSpec().getDimension());
|
||||
final DimensionSelector dimSelector = cursor.makeDimensionSelector(
|
||||
query.getDimensionSpec().getDimension(),
|
||||
query.getDimensionSpec().getExtractionFn()
|
||||
);
|
||||
if (dimSelector == null) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -139,8 +139,8 @@ public class TopNQuery extends BaseQuery<Result<TopNResultValue>>
|
|||
|
||||
public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector)
|
||||
{
|
||||
if (dimensionSpec.getDimExtractionFn() != null) {
|
||||
selector.setHasDimExtractionFn(true);
|
||||
if (dimensionSpec.getExtractionFn() != null) {
|
||||
selector.setHasExtractionFn(true);
|
||||
}
|
||||
topNMetricSpec.initTopNAlgorithmSelector(selector);
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import io.druid.segment.Capabilities;
|
|||
import io.druid.segment.Cursor;
|
||||
import io.druid.segment.SegmentMissingException;
|
||||
import io.druid.segment.StorageAdapter;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.filter.Filters;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
|
@ -88,7 +89,10 @@ public class TopNQueryEngine
|
|||
private Function<Cursor, Result<TopNResultValue>> getMapFn(TopNQuery query, final StorageAdapter adapter)
|
||||
{
|
||||
final Capabilities capabilities = adapter.getCapabilities();
|
||||
final int cardinality = adapter.getDimensionCardinality(query.getDimensionSpec().getDimension());
|
||||
final String dimension = query.getDimensionSpec().getDimension();
|
||||
|
||||
final int cardinality = adapter.getDimensionCardinality(dimension);
|
||||
|
||||
int numBytesPerRecord = 0;
|
||||
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
|
||||
numBytesPerRecord += aggregatorFactory.getMaxIntermediateSize();
|
||||
|
@ -97,8 +101,18 @@ public class TopNQueryEngine
|
|||
final TopNAlgorithmSelector selector = new TopNAlgorithmSelector(cardinality, numBytesPerRecord);
|
||||
query.initTopNAlgorithmSelector(selector);
|
||||
|
||||
TopNAlgorithm topNAlgorithm = null;
|
||||
if (selector.isHasDimExtractionFn()) {
|
||||
final TopNAlgorithm topNAlgorithm;
|
||||
if (
|
||||
selector.isHasExtractionFn() &&
|
||||
// TimeExtractionTopNAlgorithm can work on any single-value dimension of type long.
|
||||
// Once we have arbitrary dimension types following check should be replaced by checking
|
||||
// that the column is of type long and single-value.
|
||||
dimension.equals(Column.TIME_COLUMN_NAME)
|
||||
) {
|
||||
// A special TimeExtractionTopNAlgorithm is required, since DimExtractionTopNAlgorithm
|
||||
// currently relies on the dimension cardinality to support lexicographic sorting
|
||||
topNAlgorithm = new TimeExtractionTopNAlgorithm(capabilities, query);
|
||||
} else if(selector.isHasExtractionFn()) {
|
||||
topNAlgorithm = new DimExtractionTopNAlgorithm(capabilities, query);
|
||||
} else if (selector.isAggregateAllMetrics()) {
|
||||
topNAlgorithm = new PooledTopNAlgorithm(capabilities, query, bufferPool);
|
||||
|
|
|
@ -17,12 +17,16 @@
|
|||
|
||||
package io.druid.segment;
|
||||
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/**
|
||||
* Factory class for MetricSelectors
|
||||
*/
|
||||
public interface ColumnSelectorFactory
|
||||
{
|
||||
public DimensionSelector makeDimensionSelector(String dimensionName);
|
||||
public DimensionSelector makeDimensionSelector(String dimensionName, @Nullable ExtractionFn extractionFn);
|
||||
public FloatColumnSelector makeFloatColumnSelector(String columnName);
|
||||
public LongColumnSelector makeLongColumnSelector(String columnName);
|
||||
public ObjectColumnSelector makeObjectColumnSelector(String columnName);
|
||||
|
|
|
@ -19,6 +19,7 @@ package io.druid.segment;
|
|||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Predicates;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
@ -27,6 +28,7 @@ import com.metamx.common.guava.Sequence;
|
|||
import com.metamx.common.guava.Sequences;
|
||||
import io.druid.granularity.QueryGranularity;
|
||||
import io.druid.query.QueryInterruptedException;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.filter.Filter;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.column.ColumnCapabilities;
|
||||
|
@ -40,6 +42,7 @@ import io.druid.segment.data.Offset;
|
|||
import org.joda.time.DateTime;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
@ -96,7 +99,7 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
|
|||
return 0;
|
||||
}
|
||||
if (!column.getCapabilities().isDictionaryEncoded()) {
|
||||
throw new UnsupportedOperationException("Only know cardinality of dictionary encoded columns.");
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
return column.getDictionaryEncoding().getCardinality();
|
||||
}
|
||||
|
@ -272,14 +275,18 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
|
|||
}
|
||||
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(String dimension)
|
||||
public DimensionSelector makeDimensionSelector(final String dimension, @Nullable final ExtractionFn extractionFn)
|
||||
{
|
||||
DictionaryEncodedColumn cachedColumn = dictionaryColumnCache.get(dimension);
|
||||
final Column columnDesc = index.getColumn(dimension);
|
||||
if (columnDesc == null) {
|
||||
return NULL_DIMENSION_SELECTOR;
|
||||
}
|
||||
|
||||
if (dimension.equals(Column.TIME_COLUMN_NAME)) {
|
||||
return new SingleScanTimeDimSelector(makeLongColumnSelector(dimension), extractionFn);
|
||||
}
|
||||
|
||||
DictionaryEncodedColumn cachedColumn = dictionaryColumnCache.get(dimension);
|
||||
if (cachedColumn == null) {
|
||||
cachedColumn = columnDesc.getDictionaryEncoding();
|
||||
dictionaryColumnCache.put(dimension, cachedColumn);
|
||||
|
@ -308,13 +315,18 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
|
|||
@Override
|
||||
public String lookupName(int id)
|
||||
{
|
||||
final String retVal = column.lookupName(id);
|
||||
return retVal == null ? "" : retVal;
|
||||
final String value = column.lookupName(id);
|
||||
return extractionFn == null ?
|
||||
Strings.nullToEmpty(value) :
|
||||
extractionFn.apply(Strings.nullToEmpty(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int lookupId(String name)
|
||||
{
|
||||
if (extractionFn != null) {
|
||||
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
|
||||
}
|
||||
return column.lookupId(name);
|
||||
}
|
||||
};
|
||||
|
@ -356,12 +368,16 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
|
|||
@Override
|
||||
public String lookupName(int id)
|
||||
{
|
||||
return column.lookupName(id);
|
||||
final String value = column.lookupName(id);
|
||||
return extractionFn == null ? value : extractionFn.apply(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int lookupId(String name)
|
||||
{
|
||||
if (extractionFn != null) {
|
||||
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
|
||||
}
|
||||
return column.lookupId(name);
|
||||
}
|
||||
};
|
||||
|
@ -702,4 +718,5 @@ public class QueryableIndexStorageAdapter implements StorageAdapter
|
|||
return currentOffset;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -29,7 +29,16 @@ public interface StorageAdapter extends CursorFactory
|
|||
public Interval getInterval();
|
||||
public Indexed<String> getAvailableDimensions();
|
||||
public Iterable<String> getAvailableMetrics();
|
||||
public int getDimensionCardinality(String dimension);
|
||||
|
||||
/**
|
||||
* Returns the number of distinct values for the given dimension column
|
||||
* For dimensions of unknown cardinality, e.g. __time this currently returns
|
||||
* Integer.MAX_VALUE
|
||||
*
|
||||
* @param column
|
||||
* @return
|
||||
*/
|
||||
public int getDimensionCardinality(String column);
|
||||
public DateTime getMinTime();
|
||||
public DateTime getMaxTime();
|
||||
public Capabilities getCapabilities();
|
||||
|
|
|
@ -20,7 +20,7 @@ package io.druid.segment.filter;
|
|||
import com.google.common.collect.Lists;
|
||||
import com.metamx.collections.bitmap.ImmutableBitmap;
|
||||
import com.metamx.collections.bitmap.WrappedConciseBitmap;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.filter.BitmapIndexSelector;
|
||||
import io.druid.query.filter.Filter;
|
||||
import io.druid.query.filter.ValueMatcher;
|
||||
|
@ -36,12 +36,12 @@ public class ExtractionFilter implements Filter
|
|||
{
|
||||
private final String dimension;
|
||||
private final String value;
|
||||
private final DimExtractionFn fn;
|
||||
private final ExtractionFn fn;
|
||||
|
||||
public ExtractionFilter(
|
||||
String dimension,
|
||||
String value,
|
||||
DimExtractionFn fn
|
||||
ExtractionFn fn
|
||||
)
|
||||
{
|
||||
this.dimension = dimension;
|
||||
|
|
|
@ -75,7 +75,7 @@ public class Filters
|
|||
filter = new ExtractionFilter(
|
||||
extractionDimFilter.getDimension(),
|
||||
extractionDimFilter.getValue(),
|
||||
extractionDimFilter.getDimExtractionFn()
|
||||
extractionDimFilter.getExtractionFn()
|
||||
);
|
||||
} else if (dimFilter instanceof RegexDimFilter) {
|
||||
final RegexDimFilter regexDimFilter = (RegexDimFilter) dimFilter;
|
||||
|
|
|
@ -58,7 +58,7 @@ public class SelectorFilter implements Filter
|
|||
@Override
|
||||
public ValueMatcher makeMatcher(ColumnSelectorFactory columnSelectorFactory)
|
||||
{
|
||||
final DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimension);
|
||||
final DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimension, null);
|
||||
|
||||
// Missing columns match a null or empty string value and don't match anything else
|
||||
if (dimensionSelector == null) {
|
||||
|
|
|
@ -34,6 +34,7 @@ import io.druid.data.input.impl.SpatialDimensionSchema;
|
|||
import io.druid.granularity.QueryGranularity;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.segment.ColumnSelectorFactory;
|
||||
import io.druid.segment.DimensionSelector;
|
||||
import io.druid.segment.FloatColumnSelector;
|
||||
|
@ -163,7 +164,7 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
|
|||
}
|
||||
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(final String dimension)
|
||||
public DimensionSelector makeDimensionSelector(final String dimension, final ExtractionFn extractionFn)
|
||||
{
|
||||
return new DimensionSelector()
|
||||
{
|
||||
|
@ -209,12 +210,16 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
|
|||
@Override
|
||||
public String lookupName(int id)
|
||||
{
|
||||
return in.get().getDimension(dimension).get(id);
|
||||
final String value = in.get().getDimension(dimension).get(id);
|
||||
return extractionFn == null ? value : extractionFn.apply(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int lookupId(String name)
|
||||
{
|
||||
if (extractionFn != null) {
|
||||
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
|
||||
}
|
||||
return in.get().getDimension(dimension).indexOf(name);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -28,6 +28,7 @@ import com.metamx.common.guava.Sequence;
|
|||
import com.metamx.common.guava.Sequences;
|
||||
import io.druid.granularity.QueryGranularity;
|
||||
import io.druid.query.QueryInterruptedException;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.filter.Filter;
|
||||
import io.druid.query.filter.ValueMatcher;
|
||||
import io.druid.query.filter.ValueMatcherFactory;
|
||||
|
@ -38,6 +39,7 @@ import io.druid.segment.FloatColumnSelector;
|
|||
import io.druid.segment.LongColumnSelector;
|
||||
import io.druid.segment.NullDimensionSelector;
|
||||
import io.druid.segment.ObjectColumnSelector;
|
||||
import io.druid.segment.SingleScanTimeDimSelector;
|
||||
import io.druid.segment.StorageAdapter;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.data.Indexed;
|
||||
|
@ -99,6 +101,9 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
|
|||
@Override
|
||||
public int getDimensionCardinality(String dimension)
|
||||
{
|
||||
if(dimension.equals(Column.TIME_COLUMN_NAME)) {
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
IncrementalIndex.DimDim dimDim = index.getDimension(dimension);
|
||||
if (dimDim == null) {
|
||||
return 0;
|
||||
|
@ -272,8 +277,12 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
|
|||
}
|
||||
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(String dimension)
|
||||
public DimensionSelector makeDimensionSelector(final String dimension, @Nullable final ExtractionFn extractionFn)
|
||||
{
|
||||
if (dimension.equals(Column.TIME_COLUMN_NAME)) {
|
||||
return new SingleScanTimeDimSelector(makeLongColumnSelector(dimension), extractionFn);
|
||||
}
|
||||
|
||||
final IncrementalIndex.DimDim dimValLookup = index.getDimension(dimension);
|
||||
if (dimValLookup == null) {
|
||||
return NULL_DIMENSION_SELECTOR;
|
||||
|
@ -331,12 +340,17 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
|
|||
@Override
|
||||
public String lookupName(int id)
|
||||
{
|
||||
return dimValLookup.getValue(id);
|
||||
final String value = dimValLookup.getValue(id);
|
||||
return extractionFn == null ? value : extractionFn.apply(value);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int lookupId(String name)
|
||||
{
|
||||
if (extractionFn != null) {
|
||||
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
|
||||
}
|
||||
return dimValLookup.getId(name);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package io.druid.query.aggregation;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.filter.AndDimFilter;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.query.filter.NotDimFilter;
|
||||
|
@ -70,7 +71,7 @@ public class FilteredAggregatorTest
|
|||
return new ColumnSelectorFactory()
|
||||
{
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(String dimensionName)
|
||||
public DimensionSelector makeDimensionSelector(String dimensionName, ExtractionFn extractionFn)
|
||||
{
|
||||
if (dimensionName.equals("dim")) {
|
||||
return new DimensionSelector()
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.query.extraction;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import io.druid.jackson.DefaultObjectMapper;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TimeFormatExtractionFnTest
|
||||
{
|
||||
|
||||
private static final long[] timestamps = {
|
||||
new DateTime("2015-01-01T23:00:00Z").getMillis(),
|
||||
new DateTime("2015-01-02T23:00:00Z").getMillis(),
|
||||
new DateTime("2015-03-03T23:00:00Z").getMillis(),
|
||||
new DateTime("2015-03-04T23:00:00Z").getMillis(),
|
||||
new DateTime("2015-05-02T23:00:00Z").getMillis(),
|
||||
new DateTime("2015-12-21T23:00:00Z").getMillis()
|
||||
};
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testIAEForNullPattern() throws Exception
|
||||
{
|
||||
new TimeFormatExtractionFn(null, null, null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDayOfWeekExtraction() throws Exception
|
||||
{
|
||||
TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, null);
|
||||
Assert.assertEquals("Thursday", fn.apply(timestamps[0]));
|
||||
Assert.assertEquals("Friday", fn.apply(timestamps[1]));
|
||||
Assert.assertEquals("Tuesday", fn.apply(timestamps[2]));
|
||||
Assert.assertEquals("Wednesday", fn.apply(timestamps[3]));
|
||||
Assert.assertEquals("Saturday", fn.apply(timestamps[4]));
|
||||
Assert.assertEquals("Monday", fn.apply(timestamps[5]));
|
||||
|
||||
testSerde(fn, "EEEE", null, null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLocalizedExtraction() throws Exception
|
||||
{
|
||||
TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, "is");
|
||||
Assert.assertEquals("fimmtudagur", fn.apply(timestamps[0]));
|
||||
Assert.assertEquals("föstudagur", fn.apply(timestamps[1]));
|
||||
Assert.assertEquals("þriðjudagur", fn.apply(timestamps[2]));
|
||||
Assert.assertEquals("miðvikudagur", fn.apply(timestamps[3]));
|
||||
Assert.assertEquals("laugardagur", fn.apply(timestamps[4]));
|
||||
Assert.assertEquals("mánudagur", fn.apply(timestamps[5]));
|
||||
|
||||
testSerde(fn, "EEEE", null, "is");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTimeZoneExtraction() throws Exception
|
||||
{
|
||||
TimeFormatExtractionFn fn = new TimeFormatExtractionFn("'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de");
|
||||
Assert.assertEquals("In Berlin ist es schon Freitag", fn.apply(timestamps[0]));
|
||||
Assert.assertEquals("In Berlin ist es schon Samstag", fn.apply(timestamps[1]));
|
||||
Assert.assertEquals("In Berlin ist es schon Mittwoch", fn.apply(timestamps[2]));
|
||||
Assert.assertEquals("In Berlin ist es schon Donnerstag", fn.apply(timestamps[3]));
|
||||
Assert.assertEquals("In Berlin ist es schon Sonntag", fn.apply(timestamps[4]));
|
||||
Assert.assertEquals("In Berlin ist es schon Dienstag", fn.apply(timestamps[5]));
|
||||
|
||||
testSerde(fn, "'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de");
|
||||
}
|
||||
|
||||
public void testSerde(TimeFormatExtractionFn fn, String format, DateTimeZone tz, String locale) throws Exception {
|
||||
ObjectMapper objectMapper = new DefaultObjectMapper();
|
||||
String json = objectMapper.writeValueAsString(fn);
|
||||
TimeFormatExtractionFn deserialized = objectMapper.readValue(json, TimeFormatExtractionFn.class);
|
||||
|
||||
Assert.assertEquals(format, deserialized.getFormat());
|
||||
Assert.assertEquals(tz, deserialized.getTimeZone());
|
||||
Assert.assertEquals(locale, deserialized.getLocale());
|
||||
}
|
||||
}
|
|
@ -19,14 +19,15 @@ package io.druid.query.extraction.extraction;
|
|||
|
||||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.JavascriptDimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.extraction.JavascriptExtractionFn;
|
||||
import org.joda.time.DateTime;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
public class JavascriptDimExtractionFnTest
|
||||
public class JavascriptExtractionFnTest
|
||||
{
|
||||
private static final String[] testStrings = {
|
||||
"Quito",
|
||||
|
@ -43,24 +44,46 @@ public class JavascriptDimExtractionFnTest
|
|||
public void testJavascriptSubstring()
|
||||
{
|
||||
String function = "function(str) { return str.substring(0,3); }";
|
||||
DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
|
||||
ExtractionFn extractionFn = new JavascriptExtractionFn(function);
|
||||
|
||||
for (String str : testStrings) {
|
||||
String res = dimExtractionFn.apply(str);
|
||||
String res = extractionFn.apply(str);
|
||||
Assert.assertEquals(str.substring(0, 3), res);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTimeExample() throws Exception
|
||||
{
|
||||
String utcHour = "function(t) {\nreturn 'Second ' + Math.floor((t % 60000) / 1000);\n}";
|
||||
final long millis = new DateTime("2015-01-02T13:00:59.999Z").getMillis();
|
||||
Assert.assertEquals("Second 59" , new JavascriptExtractionFn(utcHour).apply(millis));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLongs() throws Exception
|
||||
{
|
||||
String typeOf = "function(x) {\nreturn typeof x\n}";
|
||||
Assert.assertEquals("number", new JavascriptExtractionFn(typeOf).apply(1234L));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFloats() throws Exception
|
||||
{
|
||||
String typeOf = "function(x) {\nreturn typeof x\n}";
|
||||
Assert.assertEquals("number", new JavascriptExtractionFn(typeOf).apply(1234.0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCastingAndNull()
|
||||
{
|
||||
String function = "function(x) {\n x = Number(x);\n if(isNaN(x)) return null;\n return Math.floor(x / 5) * 5;\n}";
|
||||
DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
|
||||
ExtractionFn extractionFn = new JavascriptExtractionFn(function);
|
||||
|
||||
Iterator<String> it = Iterators.forArray("0", "5", "5", "10", null);
|
||||
|
||||
for(String str : Lists.newArrayList("1", "5", "6", "10", "CA")) {
|
||||
String res = dimExtractionFn.apply(str);
|
||||
String res = extractionFn.apply(str);
|
||||
String expected = it.next();
|
||||
Assert.assertEquals(expected, res);
|
||||
}
|
||||
|
@ -70,11 +93,11 @@ public class JavascriptDimExtractionFnTest
|
|||
public void testJavascriptRegex()
|
||||
{
|
||||
String function = "function(str) { return str.replace(/[aeiou]/g, ''); }";
|
||||
DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
|
||||
ExtractionFn extractionFn = new JavascriptExtractionFn(function);
|
||||
|
||||
Iterator it = Iterators.forArray("Qt", "Clgry", "Tky", "Stckhlm", "Vncvr", "Prtr", "Wllngtn", "Ontr");
|
||||
for (String str : testStrings) {
|
||||
String res = dimExtractionFn.apply(str);
|
||||
String res = extractionFn.apply(str);
|
||||
Assert.assertEquals(it.next(), res);
|
||||
}
|
||||
}
|
||||
|
@ -274,13 +297,13 @@ public class JavascriptDimExtractionFnTest
|
|||
+ ""
|
||||
+ "}";
|
||||
|
||||
DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
|
||||
ExtractionFn extractionFn = new JavascriptExtractionFn(function);
|
||||
|
||||
Iterator<String> inputs = Iterators.forArray("introducing", "exploratory", "analytics", "on", "large", "datasets");
|
||||
Iterator<String> it = Iterators.forArray("introduc", "exploratori", "analyt", "on", "larg", "dataset");
|
||||
|
||||
while(inputs.hasNext()) {
|
||||
String res = dimExtractionFn.apply(inputs.next());
|
||||
String res = extractionFn.apply(inputs.next());
|
||||
Assert.assertEquals(it.next(), res);
|
||||
}
|
||||
}
|
|
@ -18,7 +18,7 @@
|
|||
package io.druid.query.extraction.extraction;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.extraction.MatchingDimExtractionFn;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
@ -47,12 +47,12 @@ public class MatchingDimExtractionFnTest
|
|||
public void testExtraction()
|
||||
{
|
||||
String regex = ".*[Tt][Oo].*";
|
||||
DimExtractionFn dimExtractionFn = new MatchingDimExtractionFn(regex);
|
||||
ExtractionFn extractionFn = new MatchingDimExtractionFn(regex);
|
||||
List<String> expected = Arrays.asList("Quito", "Tokyo", "Stockholm", "Pretoria", "Wellington");
|
||||
Set<String> extracted = Sets.newHashSet();
|
||||
|
||||
for (String str : testStrings) {
|
||||
String res = dimExtractionFn.apply(str);
|
||||
String res = extractionFn.apply(str);
|
||||
if (res != null) {
|
||||
extracted.add(res);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package io.druid.query.extraction.extraction;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.extraction.RegexDimExtractionFn;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
@ -53,11 +53,11 @@ public class RegexDimExtractionFnTest
|
|||
public void testPathExtraction()
|
||||
{
|
||||
String regex = "/([^/]+)/";
|
||||
DimExtractionFn dimExtractionFn = new RegexDimExtractionFn(regex);
|
||||
ExtractionFn extractionFn = new RegexDimExtractionFn(regex);
|
||||
Set<String> extracted = Sets.newHashSet();
|
||||
|
||||
for (String path : paths) {
|
||||
extracted.add(dimExtractionFn.apply(path));
|
||||
extracted.add(extractionFn.apply(path));
|
||||
}
|
||||
|
||||
Assert.assertEquals(2, extracted.size());
|
||||
|
@ -69,11 +69,11 @@ public class RegexDimExtractionFnTest
|
|||
public void testDeeperPathExtraction()
|
||||
{
|
||||
String regex = "^/([^/]+/[^/]+)(/|$)";
|
||||
DimExtractionFn dimExtractionFn = new RegexDimExtractionFn(regex);
|
||||
ExtractionFn extractionFn = new RegexDimExtractionFn(regex);
|
||||
Set<String> extracted = Sets.newHashSet();
|
||||
|
||||
for (String path : paths) {
|
||||
extracted.add(dimExtractionFn.apply(path));
|
||||
extracted.add(extractionFn.apply(path));
|
||||
}
|
||||
|
||||
Assert.assertEquals(4, extracted.size());
|
||||
|
@ -87,11 +87,11 @@ public class RegexDimExtractionFnTest
|
|||
public void testStringExtraction()
|
||||
{
|
||||
String regex = "(.)";
|
||||
DimExtractionFn dimExtractionFn = new RegexDimExtractionFn(regex);
|
||||
ExtractionFn extractionFn = new RegexDimExtractionFn(regex);
|
||||
Set<String> extracted = Sets.newHashSet();
|
||||
|
||||
for (String testString : testStrings) {
|
||||
extracted.add(dimExtractionFn.apply(testString));
|
||||
extracted.add(extractionFn.apply(testString));
|
||||
}
|
||||
|
||||
Assert.assertEquals(3, extracted.size());
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package io.druid.query.extraction.extraction;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.extraction.SearchQuerySpecDimExtractionFn;
|
||||
import io.druid.query.search.search.FragmentSearchQuerySpec;
|
||||
import io.druid.query.search.search.SearchQuerySpec;
|
||||
|
@ -50,12 +50,12 @@ public class SearchQuerySpecDimExtractionFnTest
|
|||
SearchQuerySpec spec = new FragmentSearchQuerySpec(
|
||||
Arrays.asList("to", "yo")
|
||||
);
|
||||
DimExtractionFn dimExtractionFn = new SearchQuerySpecDimExtractionFn(spec);
|
||||
ExtractionFn extractionFn = new SearchQuerySpecDimExtractionFn(spec);
|
||||
List<String> expected = Arrays.asList("Kyoto", "Tokyo", "Toyokawa", "Yorktown");
|
||||
Set<String> extracted = Sets.newHashSet();
|
||||
|
||||
for (String str : testStrings) {
|
||||
String res = dimExtractionFn.apply(str);
|
||||
String res = extractionFn.apply(str);
|
||||
if (res != null) {
|
||||
extracted.add(res);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package io.druid.query.extraction.extraction;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.extraction.TimeDimExtractionFn;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
@ -42,10 +42,10 @@ public class TimeDimExtractionFnTest
|
|||
public void testMonthExtraction()
|
||||
{
|
||||
Set<String> months = Sets.newHashSet();
|
||||
DimExtractionFn dimExtractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy");
|
||||
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy");
|
||||
|
||||
for (String dim : dims) {
|
||||
months.add(dimExtractionFn.apply(dim));
|
||||
months.add(extractionFn.apply(dim));
|
||||
}
|
||||
|
||||
Assert.assertEquals(months.size(), 4);
|
||||
|
@ -59,10 +59,10 @@ public class TimeDimExtractionFnTest
|
|||
public void testQuarterExtraction()
|
||||
{
|
||||
Set<String> quarters = Sets.newHashSet();
|
||||
DimExtractionFn dimExtractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy");
|
||||
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy");
|
||||
|
||||
for (String dim : dims) {
|
||||
quarters.add(dimExtractionFn.apply(dim));
|
||||
quarters.add(extractionFn.apply(dim));
|
||||
}
|
||||
|
||||
Assert.assertEquals(quarters.size(), 3);
|
||||
|
|
|
@ -52,9 +52,14 @@ import io.druid.query.dimension.DefaultDimensionSpec;
|
|||
import io.druid.query.dimension.DimensionSpec;
|
||||
import io.druid.query.dimension.ExtractionDimensionSpec;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.extraction.RegexDimExtractionFn;
|
||||
import io.druid.query.extraction.TimeFormatExtractionFn;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.query.filter.JavaScriptDimFilter;
|
||||
import io.druid.query.filter.OrDimFilter;
|
||||
import io.druid.query.filter.RegexDimFilter;
|
||||
import io.druid.query.filter.SelectorDimFilter;
|
||||
import io.druid.query.groupby.having.EqualToHavingSpec;
|
||||
import io.druid.query.groupby.having.GreaterThanHavingSpec;
|
||||
import io.druid.query.groupby.having.HavingSpec;
|
||||
|
@ -64,6 +69,7 @@ import io.druid.query.groupby.orderby.LimitSpec;
|
|||
import io.druid.query.groupby.orderby.OrderByColumnSpec;
|
||||
import io.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||
import io.druid.segment.TestHelper;
|
||||
import io.druid.segment.column.Column;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.Interval;
|
||||
|
@ -322,8 +328,8 @@ public class GroupByQueryRunnerTest
|
|||
@Test
|
||||
public void testGroupByWithNullProducingDimExtractionFn()
|
||||
{
|
||||
final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})");
|
||||
final DimExtractionFn nullExtractionFn = new DimExtractionFn()
|
||||
final ExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})");
|
||||
final ExtractionFn nullExtractionFn = new DimExtractionFn()
|
||||
{
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
|
@ -354,7 +360,7 @@ public class GroupByQueryRunnerTest
|
|||
))
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimensions(Lists.<DimensionSpec>newArrayList(
|
||||
new ExtractionDimensionSpec("quality", "alias", nullExtractionFn)
|
||||
new ExtractionDimensionSpec("quality", "alias", nullExtractionFn, null)
|
||||
))
|
||||
.build();
|
||||
|
||||
|
@ -389,8 +395,8 @@ public class GroupByQueryRunnerTest
|
|||
*/
|
||||
public void testGroupByWithEmptyStringProducingDimExtractionFn()
|
||||
{
|
||||
final DimExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})");
|
||||
final DimExtractionFn emptyStringExtractionFn = new DimExtractionFn()
|
||||
final ExtractionFn fn1 = new RegexDimExtractionFn("(\\w{1})");
|
||||
final ExtractionFn emptyStringExtractionFn = new DimExtractionFn()
|
||||
{
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
|
@ -421,7 +427,7 @@ public class GroupByQueryRunnerTest
|
|||
))
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimensions(Lists.<DimensionSpec>newArrayList(
|
||||
new ExtractionDimensionSpec("quality", "alias", emptyStringExtractionFn)
|
||||
new ExtractionDimensionSpec("quality", "alias", emptyStringExtractionFn, null)
|
||||
))
|
||||
.build();
|
||||
|
||||
|
@ -672,11 +678,13 @@ public class GroupByQueryRunnerTest
|
|||
final GroupByQuery allGranQuery = builder.copy().setGranularity(QueryGranularity.ALL).build();
|
||||
|
||||
QueryRunner mergedRunner = factory.getToolchest().mergeResults(
|
||||
new QueryRunner<Row>() {
|
||||
new QueryRunner<Row>()
|
||||
{
|
||||
@Override
|
||||
public Sequence<Row> run(
|
||||
Query<Row> query, Map<String, Object> responseContext
|
||||
) {
|
||||
)
|
||||
{
|
||||
// simulate two daily segments
|
||||
final Query query1 = query.withQuerySegmentSpec(
|
||||
new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-02/2011-04-03")))
|
||||
|
@ -2272,4 +2280,61 @@ public class GroupByQueryRunnerTest
|
|||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByTimeExtraction()
|
||||
{
|
||||
GroupByQuery query = GroupByQuery
|
||||
.builder()
|
||||
.setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval)
|
||||
.setDimensions(
|
||||
Lists.newArrayList(
|
||||
new DefaultDimensionSpec("market", "market"),
|
||||
new ExtractionDimensionSpec(
|
||||
Column.TIME_COLUMN_NAME,
|
||||
"dayOfWeek",
|
||||
new TimeFormatExtractionFn("EEEE", null, null),
|
||||
null
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
QueryRunnerTestHelper.indexDoubleSum
|
||||
)
|
||||
)
|
||||
.setPostAggregatorSpecs(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.setGranularity(QueryRunnerTestHelper.allGran)
|
||||
.setDimFilter(
|
||||
new OrDimFilter(
|
||||
Arrays.<DimFilter>asList(
|
||||
new SelectorDimFilter("market", "spot"),
|
||||
new SelectorDimFilter("market", "upfront")
|
||||
)
|
||||
)
|
||||
)
|
||||
.build();
|
||||
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "spot", "index", 13219.574157714844, "rows", 117L, "addRowsIndexConstant", 13337.574157714844),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "spot", "index", 13557.738830566406, "rows", 117L, "addRowsIndexConstant", 13675.738830566406),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "spot", "index", 13493.751281738281, "rows", 117L, "addRowsIndexConstant", 13611.751281738281),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "spot", "index", 13585.541015625, "rows", 117L, "addRowsIndexConstant", 13703.541015625),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "spot", "index", 14279.127197265625, "rows", 126L, "addRowsIndexConstant", 14406.127197265625),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "spot", "index", 13199.471435546875, "rows", 117L, "addRowsIndexConstant", 13317.471435546875),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Wednesday", "market", "spot", "index", 14271.368591308594, "rows", 126L, "addRowsIndexConstant", 14398.368591308594),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "upfront", "index", 27297.8623046875, "rows", 26L, "addRowsIndexConstant", 27324.8623046875),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "upfront", "index", 27619.58447265625, "rows", 26L, "addRowsIndexConstant", 27646.58447265625),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "upfront", "index", 27820.83154296875, "rows", 26L, "addRowsIndexConstant", 27847.83154296875),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "upfront", "index", 24791.223876953125, "rows", 26L, "addRowsIndexConstant", 24818.223876953125),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "upfront", "index", 28562.748901367188, "rows", 28L, "addRowsIndexConstant", 28591.748901367188),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "upfront", "index", 26968.280639648438, "rows", 26L, "addRowsIndexConstant", 26995.280639648438),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Wednesday", "market", "upfront", "index", 28985.5751953125, "rows", 28L, "addRowsIndexConstant", 29014.5751953125)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,13 +41,14 @@ import io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory;
|
|||
import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory;
|
||||
import io.druid.query.dimension.ExtractionDimensionSpec;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.extraction.RegexDimExtractionFn;
|
||||
import io.druid.query.extraction.TimeFormatExtractionFn;
|
||||
import io.druid.query.filter.AndDimFilter;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||
import io.druid.query.timeseries.TimeseriesQuery;
|
||||
import io.druid.query.timeseries.TimeseriesResultValue;
|
||||
import io.druid.segment.TestHelper;
|
||||
import io.druid.segment.column.Column;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.Interval;
|
||||
import org.junit.Test;
|
||||
|
@ -1399,6 +1400,55 @@ public class TopNQueryRunnerTest
|
|||
TestHelper.assertExpectedResults(expectedResults, runner.run(query, context));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNCollapsingDimExtraction()
|
||||
{
|
||||
TopNQuery query = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.qualityDimension,
|
||||
new RegexDimExtractionFn(".(.)"), null
|
||||
)
|
||||
)
|
||||
.metric("index")
|
||||
.threshold(2)
|
||||
.intervals(QueryRunnerTestHelper.fullOnInterval)
|
||||
.aggregators(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
QueryRunnerTestHelper.indexDoubleSum
|
||||
)
|
||||
)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.build();
|
||||
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<>(
|
||||
new DateTime("2011-01-12T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.<Map<String, Object>>asList(
|
||||
ImmutableMap.<String, Object>of(
|
||||
QueryRunnerTestHelper.qualityDimension, "e",
|
||||
"rows", 558L,
|
||||
"index", 246645.1204032898,
|
||||
"addRowsIndexConstant", 247204.1204032898
|
||||
),
|
||||
ImmutableMap.<String, Object>of(
|
||||
QueryRunnerTestHelper.qualityDimension, "r",
|
||||
"rows", 372L,
|
||||
"index", 222051.08961486816,
|
||||
"addRowsIndexConstant", 222424.08961486816
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
HashMap<String, Object> context = new HashMap<String, Object>();
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query, context));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNDimExtraction()
|
||||
{
|
||||
|
@ -1407,7 +1457,7 @@ public class TopNQueryRunnerTest
|
|||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)")
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)"), null
|
||||
)
|
||||
)
|
||||
.metric("rows")
|
||||
|
@ -1459,7 +1509,7 @@ public class TopNQueryRunnerTest
|
|||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)")
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)"), null
|
||||
)
|
||||
)
|
||||
.metric(new LexicographicTopNMetricSpec(null))
|
||||
|
@ -1511,7 +1561,7 @@ public class TopNQueryRunnerTest
|
|||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("..(.)")
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("..(.)"), null
|
||||
)
|
||||
)
|
||||
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null)))
|
||||
|
@ -1563,10 +1613,75 @@ public class TopNQueryRunnerTest
|
|||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)")
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)"), null
|
||||
)
|
||||
)
|
||||
.metric(new LexicographicTopNMetricSpec("spot"))
|
||||
.metric(new LexicographicTopNMetricSpec("s"))
|
||||
.threshold(4)
|
||||
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.build();
|
||||
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<TopNResultValue>(
|
||||
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.<Map<String, Object>>asList(
|
||||
ImmutableMap.<String, Object>of(
|
||||
QueryRunnerTestHelper.marketDimension, "t",
|
||||
"rows", 4L,
|
||||
"index", 5351.814697265625D,
|
||||
"addRowsIndexConstant", 5356.814697265625D,
|
||||
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||
),
|
||||
ImmutableMap.<String, Object>of(
|
||||
QueryRunnerTestHelper.marketDimension, "u",
|
||||
"rows", 4L,
|
||||
"index", 4875.669677734375D,
|
||||
"addRowsIndexConstant", 4880.669677734375D,
|
||||
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
HashMap<String, Object> context = new HashMap<String, Object>();
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query, context));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNLexicographicDimExtractionWithSortingPreservedAndPreviousStop()
|
||||
{
|
||||
TopNQuery query = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension,
|
||||
new DimExtractionFn()
|
||||
{
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
return new byte[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(String value)
|
||||
{
|
||||
return value.substring(0, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}, null
|
||||
)
|
||||
)
|
||||
.metric(new LexicographicTopNMetricSpec("s"))
|
||||
.threshold(4)
|
||||
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||
|
@ -1609,7 +1724,7 @@ public class TopNQueryRunnerTest
|
|||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)")
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("(.)"), null
|
||||
)
|
||||
)
|
||||
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("u")))
|
||||
|
@ -1654,7 +1769,7 @@ public class TopNQueryRunnerTest
|
|||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("..(.)")
|
||||
QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new RegexDimExtractionFn("..(.)"), null
|
||||
)
|
||||
)
|
||||
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p")))
|
||||
|
@ -1693,7 +1808,7 @@ public class TopNQueryRunnerTest
|
|||
@Test
|
||||
public void testTopNWithNullProducingDimExtractionFn()
|
||||
{
|
||||
final DimExtractionFn nullStringDimExtraction = new DimExtractionFn()
|
||||
final ExtractionFn nullStringDimExtraction = new DimExtractionFn()
|
||||
{
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
|
@ -1723,7 +1838,7 @@ public class TopNQueryRunnerTest
|
|||
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, nullStringDimExtraction)
|
||||
new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, nullStringDimExtraction, null)
|
||||
)
|
||||
.build();
|
||||
|
||||
|
@ -1771,7 +1886,7 @@ public class TopNQueryRunnerTest
|
|||
*/
|
||||
public void testTopNWithEmptyStringProducingDimExtractionFn()
|
||||
{
|
||||
final DimExtractionFn emptyStringDimExtraction = new DimExtractionFn()
|
||||
final ExtractionFn emptyStringDimExtraction = new DimExtractionFn()
|
||||
{
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
|
@ -1801,7 +1916,7 @@ public class TopNQueryRunnerTest
|
|||
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, emptyStringDimExtraction))
|
||||
new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, emptyStringDimExtraction, null))
|
||||
.build();
|
||||
|
||||
|
||||
|
@ -2159,4 +2274,55 @@ public class TopNQueryRunnerTest
|
|||
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query, Maps.newHashMap()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNTimeExtraction()
|
||||
{
|
||||
TopNQuery query = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
Column.TIME_COLUMN_NAME,
|
||||
"dayOfWeek",
|
||||
new TimeFormatExtractionFn("EEEE", null, null),
|
||||
null
|
||||
)
|
||||
)
|
||||
.metric("index")
|
||||
.threshold(2)
|
||||
.intervals(QueryRunnerTestHelper.fullOnInterval)
|
||||
.aggregators(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
QueryRunnerTestHelper.indexDoubleSum
|
||||
)
|
||||
)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.build();
|
||||
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<>(
|
||||
new DateTime("2011-01-12T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.<Map<String, Object>>asList(
|
||||
ImmutableMap.<String, Object>of(
|
||||
"dayOfWeek", "Wednesday",
|
||||
"rows", 182L,
|
||||
"index", 76010.28100585938,
|
||||
"addRowsIndexConstant", 76193.28100585938
|
||||
),
|
||||
ImmutableMap.<String, Object>of(
|
||||
"dayOfWeek", "Thursday",
|
||||
"rows", 182L,
|
||||
"index", 75203.26300811768,
|
||||
"addRowsIndexConstant", 75386.26300811768
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
HashMap<String, Object> context = new HashMap<String, Object>();
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query, context));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
|
||||
package io.druid.segment.filter;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.metamx.collections.bitmap.BitmapFactory;
|
||||
import com.metamx.collections.bitmap.ConciseBitmapFactory;
|
||||
|
@ -25,18 +24,15 @@ import com.metamx.collections.bitmap.ImmutableBitmap;
|
|||
import com.metamx.collections.bitmap.WrappedConciseBitmap;
|
||||
import com.metamx.collections.spatial.ImmutableRTree;
|
||||
import io.druid.query.extraction.DimExtractionFn;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.filter.BitmapIndexSelector;
|
||||
import io.druid.query.filter.Filter;
|
||||
import io.druid.segment.data.ArrayIndexed;
|
||||
import io.druid.segment.data.Indexed;
|
||||
import it.uniroma3.mat.extendedset.intset.ConciseSet;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
@ -94,7 +90,7 @@ public class ExtractionDimFilterTest
|
|||
return null;
|
||||
}
|
||||
};
|
||||
private static final DimExtractionFn DIM_EXTRACTION_FN = new DimExtractionFn()
|
||||
private static final ExtractionFn DIM_EXTRACTION_FN = new DimExtractionFn()
|
||||
{
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
|
|
|
@ -273,7 +273,7 @@ public class IncrementalIndexStorageAdapterTest
|
|||
Cursor cursor = Sequences.toList(Sequences.limit(cursorSequence, 1), Lists.<Cursor>newArrayList()).get(0);
|
||||
DimensionSelector dimSelector;
|
||||
|
||||
dimSelector = cursor.makeDimensionSelector("sally");
|
||||
dimSelector = cursor.makeDimensionSelector("sally", null);
|
||||
Assert.assertEquals("bo", dimSelector.lookupName(dimSelector.getRow().get(0)));
|
||||
|
||||
index.add(
|
||||
|
@ -287,7 +287,7 @@ public class IncrementalIndexStorageAdapterTest
|
|||
// Cursor reset should not be affected by out of order values
|
||||
cursor.reset();
|
||||
|
||||
dimSelector = cursor.makeDimensionSelector("sally");
|
||||
dimSelector = cursor.makeDimensionSelector("sally", null);
|
||||
Assert.assertEquals("bo", dimSelector.lookupName(dimSelector.getRow().get(0)));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue