add PrefixFilteredDimensionSpec for multi-value dimensions (#6307)

* add PrefixFilteredDimensionSpec for multi-value dimensions

* add docs for PrefixFilteredDimensionSpec

* remove unnecessary null handling

* add null check to the result of NullHandling
This commit is contained in:
dongyifeng 2018-10-13 08:51:09 +08:00 committed by Jonathan Wei
parent 0f4f5f2877
commit b06ac54a5e
5 changed files with 252 additions and 2 deletions

View File

@ -66,6 +66,12 @@ Following filtered dimension spec retains only the values matching regex. Note t
{ "type" : "regexFiltered", "delegate" : <dimensionSpec>, "pattern": <java regex pattern> }
```
Following filtered dimension spec retains only the values starting with the same prefix.
```json
{ "type" : "prefixFiltered", "delegate" : <dimensionSpec>, "prefix": <prefix string> }
```
For more details and examples, see [multi-value dimensions](multi-value-dimensions.html).
### Lookup DimensionSpecs

View File

@ -37,7 +37,8 @@ import javax.annotation.Nullable;
@JsonSubTypes.Type(name = "default", value = DefaultDimensionSpec.class),
@JsonSubTypes.Type(name = "extraction", value = ExtractionDimensionSpec.class),
@JsonSubTypes.Type(name = "regexFiltered", value = RegexFilteredDimensionSpec.class),
@JsonSubTypes.Type(name = "listFiltered", value = ListFilteredDimensionSpec.class)
@JsonSubTypes.Type(name = "listFiltered", value = ListFilteredDimensionSpec.class),
@JsonSubTypes.Type(name = "prefixFiltered", value = PrefixFilteredDimensionSpec.class)
})
public interface DimensionSpec extends Cacheable
{

View File

@ -0,0 +1,144 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.dimension;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.filter.DimFilterUtils;
import org.apache.druid.segment.DimensionSelector;
import it.unimi.dsi.fastutil.ints.Int2IntMap;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import java.nio.ByteBuffer;
import javax.annotation.Nullable;
/**
*/
public class PrefixFilteredDimensionSpec extends BaseFilteredDimensionSpec
{
private static final byte CACHE_TYPE_ID = 0x4;
private final String prefix;
public PrefixFilteredDimensionSpec(
@JsonProperty("delegate") DimensionSpec delegate,
@JsonProperty("prefix") String prefix //rows not starting with the prefix will be discarded
)
{
super(delegate);
this.prefix = Preconditions.checkNotNull(prefix, "prefix must not be null");
}
@JsonProperty
public String getPrefix()
{
return prefix;
}
@Override
public DimensionSelector decorate(final DimensionSelector selector)
{
if (selector == null) {
return null;
}
final int selectorCardinality = selector.getValueCardinality();
if (selectorCardinality < 0 || !selector.nameLookupPossibleInAdvance()) {
return new PredicateFilteredDimensionSelector(
selector,
new Predicate<String>()
{
@Override
public boolean apply(@Nullable String input)
{
String val = NullHandling.nullToEmptyIfNeeded(input);
return val == null ? false : val.startsWith(prefix);
}
}
);
}
int count = 0;
final Int2IntOpenHashMap forwardMapping = new Int2IntOpenHashMap();
forwardMapping.defaultReturnValue(-1);
for (int i = 0; i < selectorCardinality; i++) {
String val = NullHandling.nullToEmptyIfNeeded(selector.lookupName(i));
if (val != null && val.startsWith(prefix)) {
forwardMapping.put(i, count++);
}
}
final int[] reverseMapping = new int[forwardMapping.size()];
for (Int2IntMap.Entry e : forwardMapping.int2IntEntrySet()) {
reverseMapping[e.getIntValue()] = e.getIntKey();
}
return new ForwardingFilteredDimensionSelector(selector, forwardMapping, reverseMapping);
}
@Override
public byte[] getCacheKey()
{
byte[] delegateCacheKey = delegate.getCacheKey();
byte[] prefixBytes = StringUtils.toUtf8(prefix);
return ByteBuffer.allocate(2 + delegateCacheKey.length + prefixBytes.length)
.put(CACHE_TYPE_ID)
.put(delegateCacheKey)
.put(DimFilterUtils.STRING_SEPARATOR)
.put(prefixBytes)
.array();
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
PrefixFilteredDimensionSpec that = (PrefixFilteredDimensionSpec) o;
if (!delegate.equals(that.delegate)) {
return false;
}
return prefix.equals(that.prefix);
}
@Override
public int hashCode()
{
int result = delegate.hashCode();
result = 31 * result + prefix.hashCode();
return result;
}
@Override
public String toString()
{
return "PrefixFilteredDimensionSpec{" +
"Prefix='" + prefix + '\'' +
'}';
}
}

View File

@ -76,7 +76,8 @@ public class RegexFilteredDimensionSpec extends BaseFilteredDimensionSpec
@Override
public boolean apply(@Nullable String input)
{
return compiledRegex.matcher(NullHandling.nullToEmptyIfNeeded(input)).matches();
String val = NullHandling.nullToEmptyIfNeeded(input);
return val == null ? false : compiledRegex.matcher(val).matches();
}
}
);

View File

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.dimension;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.data.IndexedInts;
import java.util.Arrays;
import org.junit.Assert;
import org.junit.Test;
/**
*/
public class PrefixFilteredDimensionSpecTest
{
@Test
public void testSerde() throws Exception
{
ObjectMapper mapper = TestHelper.makeJsonMapper();
String jsonStr = "{\n"
+ " \"type\": \"prefixFiltered\",\n"
+ " \"delegate\": {\n"
+ " \"type\": \"default\",\n"
+ " \"dimension\": \"foo\",\n"
+ " \"outputName\": \"bar\"\n"
+ " },\n"
+ " \"prefix\": \"xxx\"\n"
+ "}";
PrefixFilteredDimensionSpec actual = (PrefixFilteredDimensionSpec) mapper.readValue(
mapper.writeValueAsString(mapper.readValue(jsonStr, DimensionSpec.class)),
DimensionSpec.class);
PrefixFilteredDimensionSpec expected = new PrefixFilteredDimensionSpec(
new DefaultDimensionSpec("foo", "bar"),
"xxx"
);
Assert.assertEquals(expected, actual);
}
@Test
public void testGetCacheKey()
{
PrefixFilteredDimensionSpec spec1 = new PrefixFilteredDimensionSpec(
new DefaultDimensionSpec("foo", "bar"),
"xxx"
);
PrefixFilteredDimensionSpec spec2 = new PrefixFilteredDimensionSpec(
new DefaultDimensionSpec("foo", "bar"),
"xyz"
);
Assert.assertFalse(Arrays.equals(spec1.getCacheKey(), spec2.getCacheKey()));
}
@Test
public void testDecorator()
{
PrefixFilteredDimensionSpec spec = new PrefixFilteredDimensionSpec(
new DefaultDimensionSpec("foo", "far"),
"c"
);
DimensionSelector selector = spec.decorate(TestDimensionSelector.instance);
Assert.assertEquals(1, selector.getValueCardinality());
IndexedInts row = selector.getRow();
Assert.assertEquals(1, row.size());
Assert.assertEquals(0, row.get(0));
Assert.assertEquals("c", selector.lookupName(0));
Assert.assertEquals(0, selector.idLookup().lookupId("c"));
}
}