mirror of https://github.com/apache/druid.git
Merge pull request #1986 from metamx/substring
fixes #1874 adding a substring extraction function, tests, and documentation
This commit is contained in:
commit
00c6027777
|
@ -72,6 +72,24 @@ matches, otherwise returns null.
|
||||||
{ "type" : "searchQuery", "query" : <search_query_spec> }
|
{ "type" : "searchQuery", "query" : <search_query_spec> }
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Substring Extraction Function
|
||||||
|
|
||||||
|
Returns a substring of the dimension value starting from the supplied index and of the desired length. If the desired
|
||||||
|
length exceeds the length of the dimension value, the remainder of the string starting at index will be returned.
|
||||||
|
If index is greater than the length of the dimension value, null will be returned.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{ "type" : "substring", "index" : 1, "length" : 4 }
|
||||||
|
```
|
||||||
|
|
||||||
|
The length may be omitted for substring to return the remainder of the dimension value starting from index,
|
||||||
|
or null if index greater than the length of the dimension value.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{ "type" : "substring", "index" : 3 }
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### Time Format Extraction Function
|
### Time Format Extraction Function
|
||||||
|
|
||||||
Returns the dimension value formatted according to the given format string, time zone, and locale.
|
Returns the dimension value formatted according to the given format string, time zone, and locale.
|
||||||
|
|
|
@ -31,7 +31,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||||
@JsonSubTypes.Type(name = "javascript", value = JavascriptExtractionFn.class),
|
@JsonSubTypes.Type(name = "javascript", value = JavascriptExtractionFn.class),
|
||||||
@JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class),
|
@JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class),
|
||||||
@JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class),
|
@JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class),
|
||||||
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class)
|
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class),
|
||||||
|
@JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class)
|
||||||
})
|
})
|
||||||
/**
|
/**
|
||||||
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
|
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
|
||||||
|
|
|
@ -0,0 +1,133 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Metamarkets licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.query.extraction;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.base.Strings;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class SubstringDimExtractionFn extends DimExtractionFn
|
||||||
|
{
|
||||||
|
private static final byte CACHE_TYPE_ID = 0x8;
|
||||||
|
|
||||||
|
private final int index;
|
||||||
|
private final int end;
|
||||||
|
|
||||||
|
@JsonCreator
|
||||||
|
public SubstringDimExtractionFn(
|
||||||
|
@JsonProperty("index") int index,
|
||||||
|
@Nullable
|
||||||
|
@JsonProperty("length") Integer length
|
||||||
|
)
|
||||||
|
{
|
||||||
|
|
||||||
|
Preconditions.checkArgument(length == null || length > 0, "length must be strictly positive");
|
||||||
|
|
||||||
|
this.index = index;
|
||||||
|
this.end = length != null ? index + length : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] getCacheKey()
|
||||||
|
{
|
||||||
|
return ByteBuffer.allocate(1 + 8)
|
||||||
|
.put(CACHE_TYPE_ID)
|
||||||
|
.putInt(this.index)
|
||||||
|
.putInt(this.end)
|
||||||
|
.array();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String apply(String dimValue)
|
||||||
|
{
|
||||||
|
if (Strings.isNullOrEmpty(dimValue)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
int len = dimValue.length();
|
||||||
|
|
||||||
|
if (index < len) {
|
||||||
|
if (end > 0) {
|
||||||
|
return dimValue.substring(index, Math.min(end, len));
|
||||||
|
} else {
|
||||||
|
return dimValue.substring(index);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public int getIndex()
|
||||||
|
{
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public Integer getLength()
|
||||||
|
{
|
||||||
|
return end > -1 ? end - index : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean preservesOrdering()
|
||||||
|
{
|
||||||
|
return index == 0 ? true : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ExtractionType getExtractionType()
|
||||||
|
{
|
||||||
|
return ExtractionType.MANY_TO_ONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o)
|
||||||
|
{
|
||||||
|
if (this == o) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (o == null || getClass() != o.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
SubstringDimExtractionFn that = (SubstringDimExtractionFn) o;
|
||||||
|
|
||||||
|
if (index != that.index) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return end == that.end;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode()
|
||||||
|
{
|
||||||
|
int result = index;
|
||||||
|
result = 31 * result + end;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,131 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Metamarkets licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.query.extraction;
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import io.druid.jackson.DefaultObjectMapper;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class SubstringDimExtractionFnTest
|
||||||
|
{
|
||||||
|
@Test
|
||||||
|
public void testSubstrings()
|
||||||
|
{
|
||||||
|
ExtractionFn extractionFn = new SubstringDimExtractionFn(1, 3);
|
||||||
|
|
||||||
|
Assert.assertEquals("ppl", extractionFn.apply("apple"));
|
||||||
|
Assert.assertEquals("e", extractionFn.apply("be"));
|
||||||
|
Assert.assertEquals("ool", extractionFn.apply("cool"));
|
||||||
|
Assert.assertEquals(null, extractionFn.apply("a"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test (expected = IllegalArgumentException.class)
|
||||||
|
public void testZeroLength() {
|
||||||
|
ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoLength()
|
||||||
|
{
|
||||||
|
ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,null);
|
||||||
|
|
||||||
|
// 0 length substring returns remainder
|
||||||
|
Assert.assertEquals("abcdef", extractionFnNoLength.apply("/abcdef"));
|
||||||
|
|
||||||
|
// 0 length substring empty result is null
|
||||||
|
Assert.assertEquals(null, extractionFnNoLength.apply("/"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetCacheKey()
|
||||||
|
{
|
||||||
|
ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4);
|
||||||
|
ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4);
|
||||||
|
ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4);
|
||||||
|
|
||||||
|
Assert.assertArrayEquals(extractionFn1.getCacheKey(), extractionFn2.getCacheKey());
|
||||||
|
|
||||||
|
Assert.assertFalse(Arrays.equals(extractionFn1.getCacheKey(), extractionFn3.getCacheKey()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHashCode()
|
||||||
|
{
|
||||||
|
ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4);
|
||||||
|
ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4);
|
||||||
|
ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4);
|
||||||
|
|
||||||
|
Assert.assertEquals(extractionFn1.hashCode(), extractionFn2.hashCode());
|
||||||
|
|
||||||
|
Assert.assertNotEquals(extractionFn1.hashCode(), extractionFn3.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNullAndEmpty()
|
||||||
|
{
|
||||||
|
ExtractionFn extractionFn = new SubstringDimExtractionFn(2,4);
|
||||||
|
// no match, map empty input value to null
|
||||||
|
Assert.assertEquals(null, extractionFn.apply(""));
|
||||||
|
// null value, returns null
|
||||||
|
Assert.assertEquals(null, extractionFn.apply(null));
|
||||||
|
// empty match, map empty result to null
|
||||||
|
Assert.assertEquals(null, extractionFn.apply("/a"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSerde() throws Exception
|
||||||
|
{
|
||||||
|
final ObjectMapper objectMapper = new DefaultObjectMapper();
|
||||||
|
|
||||||
|
final String json = "{ \"type\" : \"substring\", \"index\" : 1, \"length\" : 3 }";
|
||||||
|
final String jsonNoLength = "{ \"type\" : \"substring\", \"index\" : 1 }";
|
||||||
|
|
||||||
|
SubstringDimExtractionFn extractionFn = (SubstringDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
|
||||||
|
SubstringDimExtractionFn extractionFnNoLength = (SubstringDimExtractionFn) objectMapper.readValue(jsonNoLength, ExtractionFn.class);
|
||||||
|
|
||||||
|
Assert.assertEquals(1, extractionFn.getIndex());
|
||||||
|
Assert.assertEquals(new Integer(3), extractionFn.getLength());
|
||||||
|
Assert.assertEquals(1, extractionFnNoLength.getIndex());
|
||||||
|
Assert.assertEquals(null, extractionFnNoLength.getLength());
|
||||||
|
|
||||||
|
// round trip
|
||||||
|
Assert.assertEquals(
|
||||||
|
extractionFn,
|
||||||
|
objectMapper.readValue(
|
||||||
|
objectMapper.writeValueAsBytes(extractionFn),
|
||||||
|
ExtractionFn.class
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
extractionFnNoLength,
|
||||||
|
objectMapper.readValue(
|
||||||
|
objectMapper.writeValueAsBytes(extractionFnNoLength),
|
||||||
|
ExtractionFn.class
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue