Add "strlen" extractionFn. (#3731)

This commit is contained in:
Gian Merlino 2016-12-02 12:08:51 -08:00 committed by Slim
parent 4c5d10f8a3
commit 102375d9bb
5 changed files with 147 additions and 4 deletions

View File

@ -150,9 +150,13 @@ matches, otherwise returns null.
### Substring Extraction Function
Returns a substring of the dimension value starting from the supplied index and of the desired length. If the desired
length exceeds the length of the dimension value, the remainder of the string starting at index will be returned.
If index is greater than the length of the dimension value, null will be returned.
Returns a substring of the dimension value starting from the supplied index and of the desired length. Both index
and length are measured in the number of Unicode code units present in the string as if it were encoded in UTF-16.
Note that some Unicode characters may be represented by two code units. This is the same behavior as the Java String
class's "substring" method.
If the desired length exceeds the length of the dimension value, the remainder of the string starting at index will
be returned. If index is greater than the length of the dimension value, null will be returned.
```json
{ "type" : "substring", "index" : 1, "length" : 4 }
@ -165,6 +169,17 @@ or null if index greater than the length of the dimension value.
{ "type" : "substring", "index" : 3 }
```
### Strlen Extraction Function
Returns the length of dimension values, as measured in the number of Unicode code units present in the string as if it
were encoded in UTF-16. Note that some Unicode characters may be represented by two code units. This is the same
behavior as the Java String class's "length" method.
null strings are considered as having zero length.
```json
{ "type" : "strlen" }
```
### Time Format Extraction Function

View File

@ -38,4 +38,5 @@ public class ExtractionCacheHelper
public static final byte CACHE_TYPE_ID_UPPER = 0xB;
public static final byte CACHE_TYPE_ID_LOWER = 0xC;
public static final byte CACHE_TYPE_ID_BUCKET = 0xD;
public static final byte CACHE_TYPE_ID_STRLEN = 0xE;
}

View File

@ -42,7 +42,8 @@ import io.druid.query.lookup.RegisteredLookupExtractionFn;
@JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class),
@JsonSubTypes.Type(name = "upper", value = UpperExtractionFn.class),
@JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class),
@JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class)
@JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class),
@JsonSubTypes.Type(name = "strlen", value = StrlenExtractionFn.class)
})
/**
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)

View File

@ -0,0 +1,61 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.extraction;
import com.fasterxml.jackson.annotation.JsonCreator;
public class StrlenExtractionFn extends DimExtractionFn
{
private static final StrlenExtractionFn INSTANCE = new StrlenExtractionFn();
private StrlenExtractionFn()
{
}
@JsonCreator
public static StrlenExtractionFn instance()
{
return INSTANCE;
}
@Override
public String apply(String value)
{
return String.valueOf(value == null ? 0 : value.length());
}
@Override
public boolean preservesOrdering()
{
return false;
}
@Override
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}
@Override
public byte[] getCacheKey()
{
return new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_STRLEN};
}
}

View File

@ -0,0 +1,65 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.extraction;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
public class StrlenExtractionFnTest
{
@Test
public void testApply()
{
Assert.assertEquals("0", StrlenExtractionFn.instance().apply(null));
Assert.assertEquals("0", StrlenExtractionFn.instance().apply(""));
Assert.assertEquals("1", StrlenExtractionFn.instance().apply("x"));
Assert.assertEquals("3", StrlenExtractionFn.instance().apply("foo"));
Assert.assertEquals("3", StrlenExtractionFn.instance().apply("föo"));
Assert.assertEquals("2", StrlenExtractionFn.instance().apply("\uD83D\uDE02"));
Assert.assertEquals("1", StrlenExtractionFn.instance().apply(1));
Assert.assertEquals("2", StrlenExtractionFn.instance().apply(-1));
}
@Test
public void testGetCacheKey()
{
Assert.assertArrayEquals(StrlenExtractionFn.instance().getCacheKey(), StrlenExtractionFn.instance().getCacheKey());
}
@Test
public void testSerde() throws Exception
{
final ObjectMapper objectMapper = new DefaultObjectMapper();
final String json = "{ \"type\" : \"strlen\" }";
StrlenExtractionFn extractionFn = (StrlenExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
StrlenExtractionFn extractionFnRoundTrip = (StrlenExtractionFn) objectMapper.readValue(
objectMapper.writeValueAsString(extractionFn),
ExtractionFn.class
);
// Should all actually be the same instance.
Assert.assertTrue(extractionFn == extractionFnRoundTrip);
Assert.assertTrue(extractionFn == StrlenExtractionFn.instance());
}
}