Merge pull request #1986 from metamx/substring

fixes #1874 adding a substring extraction function, tests, and documentation
This commit is contained in:
Himanshu 2015-12-02 23:45:47 -06:00
commit 00c6027777
4 changed files with 284 additions and 1 deletions

View File

@ -72,6 +72,24 @@ matches, otherwise returns null.
{ "type" : "searchQuery", "query" : <search_query_spec> }
```
### Substring Extraction Function
Returns a substring of the dimension value starting from the supplied index and of the desired length. If the desired
length exceeds the length of the dimension value, the remainder of the string starting at index will be returned.
If index is greater than the length of the dimension value, null will be returned.
```json
{ "type" : "substring", "index" : 1, "length" : 4 }
```
The length may be omitted for substring to return the remainder of the dimension value starting from index,
or null if index greater than the length of the dimension value.
```json
{ "type" : "substring", "index" : 3 }
```
### Time Format Extraction Function
Returns the dimension value formatted according to the given format string, time zone, and locale.

View File

@ -31,7 +31,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
@JsonSubTypes.Type(name = "javascript", value = JavascriptExtractionFn.class),
@JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class),
@JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class),
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class)
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class),
@JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class)
})
/**
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)

View File

@ -0,0 +1,133 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.extraction;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
/**
*/
public class SubstringDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x8;
private final int index;
private final int end;
@JsonCreator
public SubstringDimExtractionFn(
@JsonProperty("index") int index,
@Nullable
@JsonProperty("length") Integer length
)
{
Preconditions.checkArgument(length == null || length > 0, "length must be strictly positive");
this.index = index;
this.end = length != null ? index + length : -1;
}
@Override
public byte[] getCacheKey()
{
return ByteBuffer.allocate(1 + 8)
.put(CACHE_TYPE_ID)
.putInt(this.index)
.putInt(this.end)
.array();
}
@Override
public String apply(String dimValue)
{
if (Strings.isNullOrEmpty(dimValue)) {
return null;
}
int len = dimValue.length();
if (index < len) {
if (end > 0) {
return dimValue.substring(index, Math.min(end, len));
} else {
return dimValue.substring(index);
}
} else {
return null;
}
}
@JsonProperty
public int getIndex()
{
return index;
}
@JsonProperty
public Integer getLength()
{
return end > -1 ? end - index : null;
}
@Override
public boolean preservesOrdering()
{
return index == 0 ? true : false;
}
@Override
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
SubstringDimExtractionFn that = (SubstringDimExtractionFn) o;
if (index != that.index) {
return false;
}
return end == that.end;
}
@Override
public int hashCode()
{
int result = index;
result = 31 * result + end;
return result;
}
}

View File

@ -0,0 +1,131 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.extraction;
import com.google.common.collect.Sets;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
import java.util.Set;
/**
*/
public class SubstringDimExtractionFnTest
{
@Test
public void testSubstrings()
{
ExtractionFn extractionFn = new SubstringDimExtractionFn(1, 3);
Assert.assertEquals("ppl", extractionFn.apply("apple"));
Assert.assertEquals("e", extractionFn.apply("be"));
Assert.assertEquals("ool", extractionFn.apply("cool"));
Assert.assertEquals(null, extractionFn.apply("a"));
}
@Test (expected = IllegalArgumentException.class)
public void testZeroLength() {
ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,0);
}
@Test
public void testNoLength()
{
ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,null);
// 0 length substring returns remainder
Assert.assertEquals("abcdef", extractionFnNoLength.apply("/abcdef"));
// 0 length substring empty result is null
Assert.assertEquals(null, extractionFnNoLength.apply("/"));
}
@Test
public void testGetCacheKey()
{
ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4);
ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4);
ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4);
Assert.assertArrayEquals(extractionFn1.getCacheKey(), extractionFn2.getCacheKey());
Assert.assertFalse(Arrays.equals(extractionFn1.getCacheKey(), extractionFn3.getCacheKey()));
}
@Test
public void testHashCode()
{
ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4);
ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4);
ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4);
Assert.assertEquals(extractionFn1.hashCode(), extractionFn2.hashCode());
Assert.assertNotEquals(extractionFn1.hashCode(), extractionFn3.hashCode());
}
@Test
public void testNullAndEmpty()
{
ExtractionFn extractionFn = new SubstringDimExtractionFn(2,4);
// no match, map empty input value to null
Assert.assertEquals(null, extractionFn.apply(""));
// null value, returns null
Assert.assertEquals(null, extractionFn.apply(null));
// empty match, map empty result to null
Assert.assertEquals(null, extractionFn.apply("/a"));
}
@Test
public void testSerde() throws Exception
{
final ObjectMapper objectMapper = new DefaultObjectMapper();
final String json = "{ \"type\" : \"substring\", \"index\" : 1, \"length\" : 3 }";
final String jsonNoLength = "{ \"type\" : \"substring\", \"index\" : 1 }";
SubstringDimExtractionFn extractionFn = (SubstringDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
SubstringDimExtractionFn extractionFnNoLength = (SubstringDimExtractionFn) objectMapper.readValue(jsonNoLength, ExtractionFn.class);
Assert.assertEquals(1, extractionFn.getIndex());
Assert.assertEquals(new Integer(3), extractionFn.getLength());
Assert.assertEquals(1, extractionFnNoLength.getIndex());
Assert.assertEquals(null, extractionFnNoLength.getLength());
// round trip
Assert.assertEquals(
extractionFn,
objectMapper.readValue(
objectMapper.writeValueAsBytes(extractionFn),
ExtractionFn.class
)
);
Assert.assertEquals(
extractionFnNoLength,
objectMapper.readValue(
objectMapper.writeValueAsBytes(extractionFnNoLength),
ExtractionFn.class
)
);
}
}