From b06ac54a5ee9176a97baad9d69ce96aa27265eae Mon Sep 17 00:00:00 2001 From: dongyifeng Date: Sat, 13 Oct 2018 08:51:09 +0800 Subject: [PATCH] add PrefixFilteredDimensionSpec for multi-value dimensions (#6307) * add PrefixFilteredDimensionSpec for multi-value dimensions * add docs for PrefixFilteredDimensionSpec * remove unnecessary null handling * add null check to the result of NullHandling --- docs/content/querying/dimensionspecs.md | 6 + .../druid/query/dimension/DimensionSpec.java | 3 +- .../PrefixFilteredDimensionSpec.java | 144 ++++++++++++++++++ .../dimension/RegexFilteredDimensionSpec.java | 3 +- .../PrefixFilteredDimensionSpecTest.java | 98 ++++++++++++ 5 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpec.java create mode 100644 processing/src/test/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpecTest.java diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index b70fa68583a..e9a41e00fd1 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -66,6 +66,12 @@ Following filtered dimension spec retains only the values matching regex. Note t { "type" : "regexFiltered", "delegate" : , "pattern": } ``` +Following filtered dimension spec retains only the values starting with the same prefix. + +```json +{ "type" : "prefixFiltered", "delegate" : , "prefix": } +``` + For more details and examples, see [multi-value dimensions](multi-value-dimensions.html). ### Lookup DimensionSpecs diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java index 3adf5d507af..bbf970ed2bc 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java @@ -37,7 +37,8 @@ import javax.annotation.Nullable; @JsonSubTypes.Type(name = "default", value = DefaultDimensionSpec.class), @JsonSubTypes.Type(name = "extraction", value = ExtractionDimensionSpec.class), @JsonSubTypes.Type(name = "regexFiltered", value = RegexFilteredDimensionSpec.class), - @JsonSubTypes.Type(name = "listFiltered", value = ListFilteredDimensionSpec.class) + @JsonSubTypes.Type(name = "listFiltered", value = ListFilteredDimensionSpec.class), + @JsonSubTypes.Type(name = "prefixFiltered", value = PrefixFilteredDimensionSpec.class) }) public interface DimensionSpec extends Cacheable { diff --git a/processing/src/main/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpec.java new file mode 100644 index 00000000000..d4904b5c5c0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpec.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.dimension; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.filter.DimFilterUtils; +import org.apache.druid.segment.DimensionSelector; +import it.unimi.dsi.fastutil.ints.Int2IntMap; +import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; +import java.nio.ByteBuffer; +import javax.annotation.Nullable; + +/** + */ +public class PrefixFilteredDimensionSpec extends BaseFilteredDimensionSpec +{ + + private static final byte CACHE_TYPE_ID = 0x4; + + private final String prefix; + + public PrefixFilteredDimensionSpec( + @JsonProperty("delegate") DimensionSpec delegate, + @JsonProperty("prefix") String prefix //rows not starting with the prefix will be discarded + ) + { + super(delegate); + this.prefix = Preconditions.checkNotNull(prefix, "prefix must not be null"); + } + + @JsonProperty + public String getPrefix() + { + return prefix; + } + + @Override + public DimensionSelector decorate(final DimensionSelector selector) + { + if (selector == null) { + return null; + } + + final int selectorCardinality = selector.getValueCardinality(); + if (selectorCardinality < 0 || !selector.nameLookupPossibleInAdvance()) { + return new PredicateFilteredDimensionSelector( + selector, + new Predicate() + { + @Override + public boolean apply(@Nullable String input) + { + String val = NullHandling.nullToEmptyIfNeeded(input); + return val == null ? false : val.startsWith(prefix); + } + } + ); + } + + int count = 0; + final Int2IntOpenHashMap forwardMapping = new Int2IntOpenHashMap(); + forwardMapping.defaultReturnValue(-1); + for (int i = 0; i < selectorCardinality; i++) { + String val = NullHandling.nullToEmptyIfNeeded(selector.lookupName(i)); + if (val != null && val.startsWith(prefix)) { + forwardMapping.put(i, count++); + } + } + + final int[] reverseMapping = new int[forwardMapping.size()]; + for (Int2IntMap.Entry e : forwardMapping.int2IntEntrySet()) { + reverseMapping[e.getIntValue()] = e.getIntKey(); + } + return new ForwardingFilteredDimensionSelector(selector, forwardMapping, reverseMapping); + } + + @Override + public byte[] getCacheKey() + { + byte[] delegateCacheKey = delegate.getCacheKey(); + byte[] prefixBytes = StringUtils.toUtf8(prefix); + return ByteBuffer.allocate(2 + delegateCacheKey.length + prefixBytes.length) + .put(CACHE_TYPE_ID) + .put(delegateCacheKey) + .put(DimFilterUtils.STRING_SEPARATOR) + .put(prefixBytes) + .array(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + PrefixFilteredDimensionSpec that = (PrefixFilteredDimensionSpec) o; + + if (!delegate.equals(that.delegate)) { + return false; + } + return prefix.equals(that.prefix); + } + + @Override + public int hashCode() + { + int result = delegate.hashCode(); + result = 31 * result + prefix.hashCode(); + return result; + } + + @Override + public String toString() + { + return "PrefixFilteredDimensionSpec{" + + "Prefix='" + prefix + '\'' + + '}'; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/dimension/RegexFilteredDimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/RegexFilteredDimensionSpec.java index 0ad457d5744..84883c6d403 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/RegexFilteredDimensionSpec.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/RegexFilteredDimensionSpec.java @@ -76,7 +76,8 @@ public class RegexFilteredDimensionSpec extends BaseFilteredDimensionSpec @Override public boolean apply(@Nullable String input) { - return compiledRegex.matcher(NullHandling.nullToEmptyIfNeeded(input)).matches(); + String val = NullHandling.nullToEmptyIfNeeded(input); + return val == null ? false : compiledRegex.matcher(val).matches(); } } ); diff --git a/processing/src/test/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpecTest.java b/processing/src/test/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpecTest.java new file mode 100644 index 00000000000..2dc8fecb025 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpecTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.dimension; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.data.IndexedInts; +import java.util.Arrays; +import org.junit.Assert; +import org.junit.Test; + +/** + */ +public class PrefixFilteredDimensionSpecTest +{ + + @Test + public void testSerde() throws Exception + { + ObjectMapper mapper = TestHelper.makeJsonMapper(); + + String jsonStr = "{\n" + + " \"type\": \"prefixFiltered\",\n" + + " \"delegate\": {\n" + + " \"type\": \"default\",\n" + + " \"dimension\": \"foo\",\n" + + " \"outputName\": \"bar\"\n" + + " },\n" + + " \"prefix\": \"xxx\"\n" + + "}"; + + PrefixFilteredDimensionSpec actual = (PrefixFilteredDimensionSpec) mapper.readValue( + mapper.writeValueAsString(mapper.readValue(jsonStr, DimensionSpec.class)), + DimensionSpec.class); + + PrefixFilteredDimensionSpec expected = new PrefixFilteredDimensionSpec( + new DefaultDimensionSpec("foo", "bar"), + "xxx" + ); + + Assert.assertEquals(expected, actual); + } + + @Test + public void testGetCacheKey() + { + PrefixFilteredDimensionSpec spec1 = new PrefixFilteredDimensionSpec( + new DefaultDimensionSpec("foo", "bar"), + "xxx" + ); + + PrefixFilteredDimensionSpec spec2 = new PrefixFilteredDimensionSpec( + new DefaultDimensionSpec("foo", "bar"), + "xyz" + ); + + Assert.assertFalse(Arrays.equals(spec1.getCacheKey(), spec2.getCacheKey())); + } + + @Test + public void testDecorator() + { + PrefixFilteredDimensionSpec spec = new PrefixFilteredDimensionSpec( + new DefaultDimensionSpec("foo", "far"), + "c" + ); + + DimensionSelector selector = spec.decorate(TestDimensionSelector.instance); + + Assert.assertEquals(1, selector.getValueCardinality()); + + IndexedInts row = selector.getRow(); + Assert.assertEquals(1, row.size()); + Assert.assertEquals(0, row.get(0)); + + Assert.assertEquals("c", selector.lookupName(0)); + + Assert.assertEquals(0, selector.idLookup().lookupId("c")); + } +}