mirror of https://github.com/apache/druid.git
Merge pull request #1986 from metamx/substring
fixes #1874 adding a substring extraction function, tests, and documentation
This commit is contained in:
commit
00c6027777
|
@ -72,6 +72,24 @@ matches, otherwise returns null.
|
|||
{ "type" : "searchQuery", "query" : <search_query_spec> }
|
||||
```
|
||||
|
||||
### Substring Extraction Function
|
||||
|
||||
Returns a substring of the dimension value starting from the supplied index and of the desired length. If the desired
|
||||
length exceeds the length of the dimension value, the remainder of the string starting at index will be returned.
|
||||
If index is greater than the length of the dimension value, null will be returned.
|
||||
|
||||
```json
|
||||
{ "type" : "substring", "index" : 1, "length" : 4 }
|
||||
```
|
||||
|
||||
The length may be omitted for substring to return the remainder of the dimension value starting from index,
|
||||
or null if index greater than the length of the dimension value.
|
||||
|
||||
```json
|
||||
{ "type" : "substring", "index" : 3 }
|
||||
```
|
||||
|
||||
|
||||
### Time Format Extraction Function
|
||||
|
||||
Returns the dimension value formatted according to the given format string, time zone, and locale.
|
||||
|
|
|
@ -31,7 +31,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
|||
@JsonSubTypes.Type(name = "javascript", value = JavascriptExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class)
|
||||
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class)
|
||||
})
|
||||
/**
|
||||
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.query.extraction;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class SubstringDimExtractionFn extends DimExtractionFn
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x8;
|
||||
|
||||
private final int index;
|
||||
private final int end;
|
||||
|
||||
@JsonCreator
|
||||
public SubstringDimExtractionFn(
|
||||
@JsonProperty("index") int index,
|
||||
@Nullable
|
||||
@JsonProperty("length") Integer length
|
||||
)
|
||||
{
|
||||
|
||||
Preconditions.checkArgument(length == null || length > 0, "length must be strictly positive");
|
||||
|
||||
this.index = index;
|
||||
this.end = length != null ? index + length : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
return ByteBuffer.allocate(1 + 8)
|
||||
.put(CACHE_TYPE_ID)
|
||||
.putInt(this.index)
|
||||
.putInt(this.end)
|
||||
.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(String dimValue)
|
||||
{
|
||||
if (Strings.isNullOrEmpty(dimValue)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int len = dimValue.length();
|
||||
|
||||
if (index < len) {
|
||||
if (end > 0) {
|
||||
return dimValue.substring(index, Math.min(end, len));
|
||||
} else {
|
||||
return dimValue.substring(index);
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getIndex()
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public Integer getLength()
|
||||
{
|
||||
return end > -1 ? end - index : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return index == 0 ? true : false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExtractionType getExtractionType()
|
||||
{
|
||||
return ExtractionType.MANY_TO_ONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SubstringDimExtractionFn that = (SubstringDimExtractionFn) o;
|
||||
|
||||
if (index != that.index) {
|
||||
return false;
|
||||
}
|
||||
return end == that.end;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
int result = index;
|
||||
result = 31 * result + end;
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.query.extraction;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import io.druid.jackson.DefaultObjectMapper;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class SubstringDimExtractionFnTest
|
||||
{
|
||||
@Test
|
||||
public void testSubstrings()
|
||||
{
|
||||
ExtractionFn extractionFn = new SubstringDimExtractionFn(1, 3);
|
||||
|
||||
Assert.assertEquals("ppl", extractionFn.apply("apple"));
|
||||
Assert.assertEquals("e", extractionFn.apply("be"));
|
||||
Assert.assertEquals("ool", extractionFn.apply("cool"));
|
||||
Assert.assertEquals(null, extractionFn.apply("a"));
|
||||
}
|
||||
|
||||
@Test (expected = IllegalArgumentException.class)
|
||||
public void testZeroLength() {
|
||||
ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoLength()
|
||||
{
|
||||
ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,null);
|
||||
|
||||
// 0 length substring returns remainder
|
||||
Assert.assertEquals("abcdef", extractionFnNoLength.apply("/abcdef"));
|
||||
|
||||
// 0 length substring empty result is null
|
||||
Assert.assertEquals(null, extractionFnNoLength.apply("/"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCacheKey()
|
||||
{
|
||||
ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4);
|
||||
ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4);
|
||||
ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4);
|
||||
|
||||
Assert.assertArrayEquals(extractionFn1.getCacheKey(), extractionFn2.getCacheKey());
|
||||
|
||||
Assert.assertFalse(Arrays.equals(extractionFn1.getCacheKey(), extractionFn3.getCacheKey()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHashCode()
|
||||
{
|
||||
ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4);
|
||||
ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4);
|
||||
ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4);
|
||||
|
||||
Assert.assertEquals(extractionFn1.hashCode(), extractionFn2.hashCode());
|
||||
|
||||
Assert.assertNotEquals(extractionFn1.hashCode(), extractionFn3.hashCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullAndEmpty()
|
||||
{
|
||||
ExtractionFn extractionFn = new SubstringDimExtractionFn(2,4);
|
||||
// no match, map empty input value to null
|
||||
Assert.assertEquals(null, extractionFn.apply(""));
|
||||
// null value, returns null
|
||||
Assert.assertEquals(null, extractionFn.apply(null));
|
||||
// empty match, map empty result to null
|
||||
Assert.assertEquals(null, extractionFn.apply("/a"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerde() throws Exception
|
||||
{
|
||||
final ObjectMapper objectMapper = new DefaultObjectMapper();
|
||||
|
||||
final String json = "{ \"type\" : \"substring\", \"index\" : 1, \"length\" : 3 }";
|
||||
final String jsonNoLength = "{ \"type\" : \"substring\", \"index\" : 1 }";
|
||||
|
||||
SubstringDimExtractionFn extractionFn = (SubstringDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
|
||||
SubstringDimExtractionFn extractionFnNoLength = (SubstringDimExtractionFn) objectMapper.readValue(jsonNoLength, ExtractionFn.class);
|
||||
|
||||
Assert.assertEquals(1, extractionFn.getIndex());
|
||||
Assert.assertEquals(new Integer(3), extractionFn.getLength());
|
||||
Assert.assertEquals(1, extractionFnNoLength.getIndex());
|
||||
Assert.assertEquals(null, extractionFnNoLength.getLength());
|
||||
|
||||
// round trip
|
||||
Assert.assertEquals(
|
||||
extractionFn,
|
||||
objectMapper.readValue(
|
||||
objectMapper.writeValueAsBytes(extractionFn),
|
||||
ExtractionFn.class
|
||||
)
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
extractionFnNoLength,
|
||||
objectMapper.readValue(
|
||||
objectMapper.writeValueAsBytes(extractionFnNoLength),
|
||||
ExtractionFn.class
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue