diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index 26160373a24..95e23286dc4 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -371,3 +371,23 @@ Following filtered dimension spec retains only the values matching regex. Note t ``` For more details and examples, see [multi-valued dimensions](multi-valued-dimensions.html). + +### Upper and Lower extraction functions. + +Returns the dimension values as all upper case or lower case. +Optionally user can specify the language to use in order to perform upper or lower transformation + +```json +{ + "type" : "upper", + "locale":"fr" +} +``` + +or without setting "locale" (in this case, the current value of the default locale for this instance of the Java Virtual Machine.) + +```json +{ + "type" : "lower" +} +``` diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java b/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java index 3a5b8c5d88f..26f58399d55 100644 --- a/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java +++ b/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java @@ -35,4 +35,6 @@ public class ExtractionCacheHelper public static final byte CACHE_TYPE_ID_SUBSTRING = 0x8; public static final byte CACHE_TYPE_ID_CASCADE = 0x9; public static final byte CACHE_TYPE_ID_STRING_FORMAT = 0xA; + public static final byte CACHE_TYPE_ID_UPPER = 0xB; + public static final byte CACHE_TYPE_ID_LOWER = 0xC; } diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java index 6361ca90bad..f62adf78ebd 100644 --- a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java @@ -36,7 +36,9 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; @JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class), @JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class), @JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class), - @JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class) + @JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class), + @JsonSubTypes.Type(name = "upper", value = UpperExtractionFn.class), + @JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class) }) /** * An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension) diff --git a/processing/src/main/java/io/druid/query/extraction/LowerExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/LowerExtractionFn.java new file mode 100644 index 00000000000..4f3b87b8dbf --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/LowerExtractionFn.java @@ -0,0 +1,97 @@ +/* + * + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.base.Strings; +import com.metamx.common.StringUtils; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Locale; + +@JsonTypeName("lower") +public class LowerExtractionFn implements ExtractionFn +{ + private final Locale locale; + + @JsonProperty + private final String localeString; + + public LowerExtractionFn(@JsonProperty("locale") String localeString) + { + this.localeString = localeString; + this.locale = localeString == null ? Locale.getDefault() : Locale.forLanguageTag(localeString); + } + + /** + * @param key string input of extraction function + * + * @return new string with all of the characters in {@code key} as an lower case or null if {@code key} is empty or null + */ + + @Nullable + @Override + public String apply(String key) + { + if (Strings.isNullOrEmpty(key)) { + return null; + } + return key.toLowerCase(locale); + } + + @Override + public String apply(long value) + { + return apply(String.valueOf(value)); + } + + @Override + public boolean preservesOrdering() + { + return false; + } + + @Override + public ExtractionType getExtractionType() + { + return ExtractionType.MANY_TO_ONE; + } + + @Override + public byte[] getCacheKey() + { + byte[] localeBytes = StringUtils.toUtf8(Strings.nullToEmpty(localeString)); + return ByteBuffer.allocate(2 + localeBytes.length) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_LOWER) + .put((byte) 0XFF) + .put(localeBytes) + .array(); + } + + @Override + public String apply(Object value) + { + return apply(String.valueOf(value)); + } +} diff --git a/processing/src/main/java/io/druid/query/extraction/UpperExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/UpperExtractionFn.java new file mode 100644 index 00000000000..7d6ded1e8a4 --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/UpperExtractionFn.java @@ -0,0 +1,96 @@ +/* + * + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * / + */ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.base.Strings; +import com.metamx.common.StringUtils; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Locale; + +@JsonTypeName("upper") +public class UpperExtractionFn implements ExtractionFn +{ + private final Locale locale; + + @JsonProperty + private final String localeString; + + public UpperExtractionFn(@JsonProperty("locale") String localeString) + { + this.localeString = localeString; + this.locale = localeString == null ? Locale.getDefault() : Locale.forLanguageTag(localeString); + } + + /** + * @param key string input of extraction function + * + * @return new string with all of the characters in {@code key} as an upper case or null if {@code key} is empty or null + */ + @Nullable + @Override + public String apply(String key) + { + if (Strings.isNullOrEmpty(key)) { + return null; + } + return key.toUpperCase(locale); + } + + @Override + public String apply(long value) + { + return apply(String.valueOf(value)); + } + + @Override + public boolean preservesOrdering() + { + return false; + } + + @Override + public ExtractionType getExtractionType() + { + return ExtractionType.MANY_TO_ONE; + } + + @Override + public byte[] getCacheKey() + { + byte[] localeBytes = StringUtils.toUtf8(Strings.nullToEmpty(localeString)); + return ByteBuffer.allocate(2 + localeBytes.length) + .put(ExtractionCacheHelper.CACHE_TYPE_ID_UPPER) + .put((byte) 0XFF) + .put(localeBytes) + .array(); + } + + @Override + public String apply(Object value) + { + return apply(String.valueOf(value)); + } +} diff --git a/processing/src/test/java/io/druid/query/extraction/LowerExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/LowerExtractionFnTest.java new file mode 100644 index 00000000000..a239c4347cb --- /dev/null +++ b/processing/src/test/java/io/druid/query/extraction/LowerExtractionFnTest.java @@ -0,0 +1,46 @@ +/* + * + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * / + */ + +package io.druid.query.extraction; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +public class LowerExtractionFnTest +{ + ExtractionFn extractionFn = new LowerExtractionFn(null); + @Test + public void testApply() + { + Assert.assertEquals("lower 1 string", extractionFn.apply("lOwER 1 String")); + Assert.assertEquals(null, extractionFn.apply("")); + Assert.assertEquals(null, extractionFn.apply(null)); + } + + @Test + public void testGetCacheKey() + { + Assert.assertArrayEquals(extractionFn.getCacheKey(), extractionFn.getCacheKey()); + Assert.assertFalse(Arrays.equals(extractionFn.getCacheKey(), new UpperExtractionFn(null).getCacheKey())); + } +} diff --git a/processing/src/test/java/io/druid/query/extraction/UpperExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/UpperExtractionFnTest.java new file mode 100644 index 00000000000..55b82c346b7 --- /dev/null +++ b/processing/src/test/java/io/druid/query/extraction/UpperExtractionFnTest.java @@ -0,0 +1,46 @@ +/* + * + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * / + */ + +package io.druid.query.extraction; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +public class UpperExtractionFnTest +{ + ExtractionFn extractionFn = new UpperExtractionFn(null); + @Test + public void testApply() + { + Assert.assertEquals("UPPER", extractionFn.apply("uPpeR")); + Assert.assertEquals(null, extractionFn.apply("")); + Assert.assertEquals(null, extractionFn.apply(null)); + } + + @Test + public void testGetCacheKey() + { + Assert.assertArrayEquals(extractionFn.getCacheKey(), extractionFn.getCacheKey()); + Assert.assertFalse(Arrays.equals(extractionFn.getCacheKey(), new LowerExtractionFn(null).getCacheKey())); + } +}