diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index 8879290db43..26160373a24 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -341,6 +341,16 @@ Example for chaining [regular expression extraction function](#regular-expressio It will transform dimension values with specified extraction functions in the order named. For example, `'/druid/prod/historical'` is transformed to `'the dru'` as regular expression extraction function first transforms it to `'druid'` and then, javascript extraction function transforms it to `'the druid'`, and lastly, substring extraction function transforms it to `'the dru'`. +### String Format Extraction Function + +Returns the dimension value formatted according to the given format string. + +```json +{ "type" : "stringFormat", "format" : } +``` + +For example if you want to concat "[" and "]" before and after the actual dimension value, you need to specify "[%s]" as format string. + ### Filtering DimensionSpecs These are only valid for multi-valued dimensions. If you have a row in druid that has a multi-valued dimension with values ["v1", "v2", "v3"] and you send a groupBy/topN query grouping by that dimension with [query filter](filter.html) for value "v1". In the response you will get 3 rows containing "v1", "v2" and "v3". This behavior might be unintuitive for some use cases. diff --git a/processing/src/main/java/io/druid/query/extraction/CascadeExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/CascadeExtractionFn.java index a7dc5d13f6e..b3d09805ca1 100644 --- a/processing/src/main/java/io/druid/query/extraction/CascadeExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/CascadeExtractionFn.java @@ -29,38 +29,44 @@ import java.util.Arrays; public class CascadeExtractionFn implements ExtractionFn { - private static final byte CACHE_TYPE_ID = 0x9; - private final ExtractionFn extractionFns[]; private final ChainedExtractionFn chainedExtractionFn; private final ChainedExtractionFn DEFAULT_CHAINED_EXTRACTION_FN = new ChainedExtractionFn( - new ExtractionFn() { - public byte[] getCacheKey() { + new ExtractionFn() + { + public byte[] getCacheKey() + { return new byte[0]; } - public String apply(Object value) { + public String apply(Object value) + { return null; } - public String apply(String value) { + public String apply(String value) + { return null; } - public String apply(long value) { + public String apply(long value) + { return null; } - public boolean preservesOrdering() { + public boolean preservesOrdering() + { return false; } - public ExtractionType getExtractionType() { + public ExtractionType getExtractionType() + { return ExtractionType.MANY_TO_ONE; } @Override - public String toString() { + public String toString() + { return "nullExtractionFn{}"; } }, @@ -78,48 +84,55 @@ public class CascadeExtractionFn implements ExtractionFn this.chainedExtractionFn = DEFAULT_CHAINED_EXTRACTION_FN; } else { ChainedExtractionFn root = null; - for (int idx = 0; idx < extractionFns.length; idx++) { - Preconditions.checkArgument(extractionFns[idx] != null, "empty function is not allowed"); - root = new ChainedExtractionFn(extractionFns[idx], root); + for (ExtractionFn fn : extractionFn) { + Preconditions.checkArgument(fn != null, "empty function is not allowed"); + root = new ChainedExtractionFn(fn, root); } this.chainedExtractionFn = root; } } @JsonProperty - public ExtractionFn[] getExtractionFns() { + public ExtractionFn[] getExtractionFns() + { return extractionFns; } @Override - public byte[] getCacheKey() { - byte[] cacheKey = new byte[] {CACHE_TYPE_ID}; + public byte[] getCacheKey() + { + byte[] cacheKey = new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_CASCADE}; return Bytes.concat(cacheKey, chainedExtractionFn.getCacheKey()); } @Override - public String apply(Object value) { + public String apply(Object value) + { return chainedExtractionFn.apply(value); } @Override - public String apply(String value){ + public String apply(String value) + { return chainedExtractionFn.apply(value); } @Override - public String apply(long value){ + public String apply(long value) + { return chainedExtractionFn.apply(value); } @Override - public boolean preservesOrdering(){ + public boolean preservesOrdering() + { return chainedExtractionFn.preservesOrdering(); } @Override - public ExtractionType getExtractionType(){ + public ExtractionType getExtractionType() + { return chainedExtractionFn.getExtractionType(); } @@ -152,44 +165,53 @@ public class CascadeExtractionFn implements ExtractionFn } @Override - public String toString() { + public String toString() + { return "CascadeExtractionFn{" + - "extractionFns=[" + chainedExtractionFn.toString() + "]}"; + "extractionFns=[" + chainedExtractionFn.toString() + "]}"; } - private class ChainedExtractionFn { + private class ChainedExtractionFn + { private final ExtractionFn fn; private final ChainedExtractionFn child; - public ChainedExtractionFn(ExtractionFn fn, ChainedExtractionFn child) { + public ChainedExtractionFn(ExtractionFn fn, ChainedExtractionFn child) + { this.fn = fn; this.child = child; } - public byte[] getCacheKey() { + public byte[] getCacheKey() + { byte[] fnCacheKey = fn.getCacheKey(); return (child != null) ? Bytes.concat(fnCacheKey, child.getCacheKey()) : fnCacheKey; } - public String apply(Object value) { + public String apply(Object value) + { return fn.apply((child != null) ? child.apply(value) : value); } - public String apply(String value){ + public String apply(String value) + { return fn.apply((child != null) ? child.apply(value) : value); } - public String apply(long value){ + public String apply(long value) + { return fn.apply((child != null) ? child.apply(value) : value); } - public boolean preservesOrdering(){ + public boolean preservesOrdering() + { boolean childPreservesOrdering = (child == null) || child.preservesOrdering(); return fn.preservesOrdering() && childPreservesOrdering; } - public ExtractionType getExtractionType(){ + public ExtractionType getExtractionType() + { if (child != null && child.getExtractionType() == ExtractionType.MANY_TO_ONE) { return ExtractionType.MANY_TO_ONE; } else { @@ -227,10 +249,11 @@ public class CascadeExtractionFn implements ExtractionFn return result; } - public String toString() { + public String toString() + { return (child != null) - ? Joiner.on(",").join(child.toString(), fn.toString()) - : fn.toString(); + ? Joiner.on(",").join(child.toString(), fn.toString()) + : fn.toString(); } } } diff --git a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java index f3127afd8ae..afb22cd567b 100644 --- a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java @@ -24,7 +24,7 @@ public abstract class DimExtractionFn implements ExtractionFn @Override public String apply(Object value) { - return apply(value.toString()); + return apply(value == null ? null : value.toString()); } @Override diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java b/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java new file mode 100644 index 00000000000..3a5b8c5d88f --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java @@ -0,0 +1,38 @@ +/* +* Licensed to Metamarkets Group Inc. (Metamarkets) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. Metamarkets licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +package io.druid.query.extraction; + +/** + * + */ +public class ExtractionCacheHelper +{ + public static final byte CACHE_TYPE_ID_TIME_DIM = 0x0; + public static final byte CACHE_TYPE_ID_REGEX = 0x1; + public static final byte CACHE_TYPE_ID_MATCHING_DIM = 0x2; + public static final byte CACHE_TYPE_ID_SEARCH_QUERY = 0x3; + public static final byte CACHE_TYPE_ID_JAVASCRIPT = 0x4; + public static final byte CACHE_TYPE_ID_TIME_FORMAT = 0x5; + public static final byte CACHE_TYPE_ID_IDENTITY = 0x6; + public static final byte CACHE_TYPE_ID_LOOKUP = 0x7; + public static final byte CACHE_TYPE_ID_SUBSTRING = 0x8; + public static final byte CACHE_TYPE_ID_CASCADE = 0x9; + public static final byte CACHE_TYPE_ID_STRING_FORMAT = 0xA; +} diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java index 800bc8e4054..6361ca90bad 100644 --- a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java @@ -35,7 +35,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; @JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class), @JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class), @JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class), - @JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class) + @JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class), + @JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class) }) /** * An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension) @@ -56,7 +57,7 @@ public interface ExtractionFn /** * The "extraction" function. This should map a value into some other String value. - * + *

* In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the * empty string is considered invalid output for this method and should instead return null. This is * a contract on the method rather than enforced at a lower level in order to eliminate a global check @@ -74,7 +75,7 @@ public interface ExtractionFn /** * Offers information on whether the extraction will preserve the original ordering of the values. - *

+ *

* Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards * ordering. * diff --git a/processing/src/main/java/io/druid/query/extraction/IdentityExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/IdentityExtractionFn.java index 76a71977084..9d9153efabd 100644 --- a/processing/src/main/java/io/druid/query/extraction/IdentityExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/IdentityExtractionFn.java @@ -23,8 +23,6 @@ import com.google.common.base.Strings; public class IdentityExtractionFn implements ExtractionFn { - private static final byte CACHE_TYPE_ID = 0x6; - private static final IdentityExtractionFn instance = new IdentityExtractionFn(); private IdentityExtractionFn() @@ -35,7 +33,7 @@ public class IdentityExtractionFn implements ExtractionFn @Override public byte[] getCacheKey() { - return new byte[]{CACHE_TYPE_ID}; + return new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_IDENTITY}; } @Override @@ -67,13 +65,13 @@ public class IdentityExtractionFn implements ExtractionFn { return ExtractionType.ONE_TO_ONE; } - + @Override public String toString() { return "Identity"; } - + @Override public boolean equals(Object o) { diff --git a/processing/src/main/java/io/druid/query/extraction/JavaScriptExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavaScriptExtractionFn.java index 4830abb58eb..0263ddc10aa 100644 --- a/processing/src/main/java/io/druid/query/extraction/JavaScriptExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/JavaScriptExtractionFn.java @@ -61,8 +61,6 @@ public class JavaScriptExtractionFn implements ExtractionFn }; } - private static final byte CACHE_TYPE_ID = 0x4; - private final String function; private final Function fn; private final boolean injective; @@ -97,7 +95,7 @@ public class JavaScriptExtractionFn implements ExtractionFn { byte[] bytes = StringUtils.toUtf8(function); return ByteBuffer.allocate(1 + bytes.length) - .put(CACHE_TYPE_ID) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_JAVASCRIPT) .put(bytes) .array(); } diff --git a/processing/src/main/java/io/druid/query/extraction/LookupExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/LookupExtractionFn.java index 878ac6db2ac..d12b545116e 100644 --- a/processing/src/main/java/io/druid/query/extraction/LookupExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/LookupExtractionFn.java @@ -33,8 +33,6 @@ import java.io.IOException; public class LookupExtractionFn extends FunctionalExtraction { - private static final byte CACHE_TYPE_ID = 0x7; - private final LookupExtractor lookup; private final boolean optimize; @@ -98,7 +96,7 @@ public class LookupExtractionFn extends FunctionalExtraction { try { final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - outputStream.write(CACHE_TYPE_ID); + outputStream.write(ExtractionCacheHelper.CACHE_TYPE_ID_LOOKUP); outputStream.write(lookup.getCacheKey()); if (getReplaceMissingValueWith() != null) { outputStream.write(StringUtils.toUtf8(getReplaceMissingValueWith())); diff --git a/processing/src/main/java/io/druid/query/extraction/MatchingDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/MatchingDimExtractionFn.java index 8c32f00a54e..c69edd58c63 100644 --- a/processing/src/main/java/io/druid/query/extraction/MatchingDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/MatchingDimExtractionFn.java @@ -32,8 +32,6 @@ import java.util.regex.Pattern; */ public class MatchingDimExtractionFn extends DimExtractionFn { - private static final byte CACHE_TYPE_ID = 0x2; - private final String expr; private final Pattern pattern; @@ -53,7 +51,7 @@ public class MatchingDimExtractionFn extends DimExtractionFn { byte[] exprBytes = StringUtils.toUtf8(expr); return ByteBuffer.allocate(1 + exprBytes.length) - .put(CACHE_TYPE_ID) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_MATCHING_DIM) .put(exprBytes) .array(); } diff --git a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java index b86c84a473d..e9f13e98919 100644 --- a/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/RegexDimExtractionFn.java @@ -33,7 +33,6 @@ import java.util.regex.Pattern; */ public class RegexDimExtractionFn extends DimExtractionFn { - private static final byte CACHE_TYPE_ID = 0x1; private static final byte CACHE_KEY_SEPARATOR = (byte) 0xFF; private final String expr; @@ -75,7 +74,7 @@ public class RegexDimExtractionFn extends DimExtractionFn totalLen += 2; // separators return ByteBuffer.allocate(totalLen) - .put(CACHE_TYPE_ID) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_REGEX) .put(exprBytes) .put(CACHE_KEY_SEPARATOR) .put(replaceStrBytes) diff --git a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java index 9b925a6ae41..31f0ccabb62 100644 --- a/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/SearchQuerySpecDimExtractionFn.java @@ -30,8 +30,6 @@ import java.nio.ByteBuffer; */ public class SearchQuerySpecDimExtractionFn extends DimExtractionFn { - private static final byte CACHE_TYPE_ID = 0x3; - private final SearchQuerySpec searchQuerySpec; @JsonCreator @@ -55,7 +53,7 @@ public class SearchQuerySpecDimExtractionFn extends DimExtractionFn { byte[] specBytes = searchQuerySpec.getCacheKey(); return ByteBuffer.allocate(1 + specBytes.length) - .put(CACHE_TYPE_ID) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_SEARCH_QUERY) .put(specBytes) .array(); } diff --git a/processing/src/main/java/io/druid/query/extraction/StringFormatExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/StringFormatExtractionFn.java new file mode 100644 index 00000000000..fcdf7e03d8c --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/StringFormatExtractionFn.java @@ -0,0 +1,101 @@ +/* +* Licensed to Metamarkets Group Inc. (Metamarkets) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. Metamarkets licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.metamx.common.StringUtils; + +import java.nio.ByteBuffer; + +/** + * + */ +public class StringFormatExtractionFn extends DimExtractionFn +{ + private final String format; + + @JsonCreator + public StringFormatExtractionFn( + @JsonProperty("format") String format + ) + { + Preconditions.checkArgument(!Strings.isNullOrEmpty(format), "format string should not be empty"); + this.format = format; + } + + @JsonProperty + public String getFormat() + { + return format; + } + + @Override + public byte[] getCacheKey() + { + byte[] bytes = StringUtils.toUtf8(format); + return ByteBuffer.allocate(1 + bytes.length) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_STRING_FORMAT) + .put(bytes) + .array(); + } + + @Override + public String apply(String value) + { + return String.format(format, value); + } + + @Override + public boolean preservesOrdering() + { + return false; + } + + @Override + public ExtractionType getExtractionType() + { + return ExtractionType.MANY_TO_ONE; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + StringFormatExtractionFn that = (StringFormatExtractionFn) o; + + return format.equals(that.format); + + } + + @Override + public int hashCode() + { + return format.hashCode(); + } +} diff --git a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java index ccb757b5d6f..acf14517781 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java @@ -33,8 +33,6 @@ import java.util.Date; */ public class TimeDimExtractionFn extends DimExtractionFn { - private static final byte CACHE_TYPE_ID = 0x0; - private final String timeFormat; private final SimpleDateFormat timeFormatter; private final String resultFormat; @@ -62,7 +60,7 @@ public class TimeDimExtractionFn extends DimExtractionFn { byte[] timeFormatBytes = StringUtils.toUtf8(timeFormat); return ByteBuffer.allocate(1 + timeFormatBytes.length) - .put(CACHE_TYPE_ID) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_TIME_DIM) .put(timeFormatBytes) .array(); } diff --git a/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java index f86331f321e..2ee2011e701 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java @@ -32,8 +32,6 @@ import java.util.Locale; public class TimeFormatExtractionFn implements ExtractionFn { - private static final byte CACHE_TYPE_ID = 0x5; - private final DateTimeZone tz; private final String pattern; private final Locale locale; @@ -82,7 +80,7 @@ public class TimeFormatExtractionFn implements ExtractionFn { byte[] exprBytes = StringUtils.toUtf8(pattern + "\u0001" + tz.getID() + "\u0001" + locale.toLanguageTag()); return ByteBuffer.allocate(1 + exprBytes.length) - .put(CACHE_TYPE_ID) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_TIME_FORMAT) .put(exprBytes) .array(); } @@ -102,7 +100,7 @@ public class TimeFormatExtractionFn implements ExtractionFn @Override public String apply(String value) { - return apply((Object)value); + return apply((Object) value); } @Override diff --git a/processing/src/test/java/io/druid/query/extraction/StringFormatExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/StringFormatExtractionFnTest.java new file mode 100644 index 00000000000..866e8449d43 --- /dev/null +++ b/processing/src/test/java/io/druid/query/extraction/StringFormatExtractionFnTest.java @@ -0,0 +1,67 @@ +/* +* Licensed to Metamarkets Group Inc. (Metamarkets) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. Metamarkets licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.databind.ObjectMapper; +import io.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +/** + * + */ +public class StringFormatExtractionFnTest +{ + + @Test + public void testApply() throws Exception + { + StringFormatExtractionFn fn = new StringFormatExtractionFn("[%s]"); + long test = 1000L; + Assert.assertEquals("[1000]", fn.apply(test)); + } + + @Test + public void testApplyNull() throws Exception + { + StringFormatExtractionFn fn = new StringFormatExtractionFn("[%s]"); + String test = null; + Assert.assertEquals("[null]", fn.apply(test)); + } + + @Test + public void testSerde() throws Exception + { + final ObjectMapper objectMapper = new DefaultObjectMapper(); + final String json = "{ \"type\" : \"stringFormat\", \"format\" : \"[%s]\" }"; + StringFormatExtractionFn extractionFn = (StringFormatExtractionFn) objectMapper.readValue(json, ExtractionFn.class); + + Assert.assertEquals("[%s]", extractionFn.getFormat()); + + // round trip + Assert.assertEquals( + extractionFn, + objectMapper.readValue( + objectMapper.writeValueAsBytes(extractionFn), + ExtractionFn.class + ) + ); + } +}