Support alphanumeric sort in search query (#2593)

* support alphanumeric sort in search query

* address a comment about handling equals() and hashCode()

* address comments

* add Ut for string comparators

* address a comment about space indentations.
This commit is contained in:
jaehong choi 2016-06-29 07:06:18 +09:00 committed by Fangjin Yang
parent 1d40df4bb7
commit efbcbf5315
10 changed files with 409 additions and 30 deletions

View File

@ -23,7 +23,8 @@ OrderByColumnSpecs indicate how to do order by operations. Each order-by conditi
```json
{
"dimension" : "<Any dimension or metric name>",
"direction" : <"ascending"|"descending">
"direction" : <"ascending"|"descending">,
"dimensionOrder" : <"lexicographic(default)"|"alphanumeric"|"strlen">
}
```

View File

@ -38,7 +38,7 @@ There are several main parts to a search query:
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|searchDimensions|The dimensions to run the search over. Excluding this means the search is run over all dimensions.|no|
|query|See [SearchQuerySpec](../querying/searchqueryspec.html).|yes|
|sort|An object specifying how the results of the search should be sorted. Two possible types here are "lexicographic" (the default sort) and "strlen".|no|
|sort|An object specifying how the results of the search should be sorted. Possible types here are "lexicographic" (the default sort), "alphanumeric" and "strlen".|no|
|context|See [Context](../querying/query-context.html)|no|
The format of the result is:

View File

@ -25,6 +25,8 @@ import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Ints;
import com.google.common.primitives.UnsignedBytes;
import com.metamx.common.IAE;
import com.metamx.common.StringUtils;
@ -34,14 +36,17 @@ public class StringComparators
{
public static final String LEXICOGRAPHIC_NAME = "lexicographic";
public static final String ALPHANUMERIC_NAME = "alphanumeric";
public static final String STRLEN_NAME = "strlen";
public static final LexicographicComparator LEXICOGRAPHIC = new LexicographicComparator();
public static final AlphanumericComparator ALPHANUMERIC = new AlphanumericComparator();
public static final StrlenComparator STRLEN = new StrlenComparator();
@JsonTypeInfo(use=Id.NAME, include=As.PROPERTY, property="type", defaultImpl = LexicographicComparator.class)
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = StringComparators.LEXICOGRAPHIC_NAME, value = LexicographicComparator.class),
@JsonSubTypes.Type(name = StringComparators.ALPHANUMERIC_NAME, value = AlphanumericComparator.class)
@JsonSubTypes.Type(name = StringComparators.ALPHANUMERIC_NAME, value = AlphanumericComparator.class),
@JsonSubTypes.Type(name = StringComparators.STRLEN_NAME, value = StrlenComparator.class)
})
public static interface StringComparator extends Comparator<String>
{
@ -49,6 +54,16 @@ public class StringComparators
public static class LexicographicComparator implements StringComparator
{
private static final Ordering<String> ORDERING = Ordering.from(new Comparator<String>()
{
@Override
public int compare(String s, String s2)
{
return UnsignedBytes.lexicographicalComparator().compare(
StringUtils.toUtf8(s), StringUtils.toUtf8(s2));
}
}).nullsFirst();
@Override
public int compare(String s, String s2)
{
@ -56,18 +71,8 @@ public class StringComparators
if(s == s2){
return 0;
}
// null first
if (s == null) {
return -1;
}
if (s2 == null) {
return 1;
}
return UnsignedBytes.lexicographicalComparator().compare(
StringUtils.toUtf8(s),
StringUtils.toUtf8(s2)
);
return ORDERING.compare(s, s2);
}
@Override
@ -101,6 +106,9 @@ public class StringComparators
if (str1 == null)
{
if (str2 == null) {
return 0;
}
return -1;
} else if (str2 == null)
{
@ -272,7 +280,7 @@ public class StringComparators
// compare the substrings
return String.CASE_INSENSITIVE_ORDER.compare(str0.substring(start0, pos[0]), str1.substring(start1, pos[1]));
}
@Override
public boolean equals(Object o)
{
@ -282,10 +290,10 @@ public class StringComparators
if (o == null || getClass() != o.getClass()) {
return false;
}
return true;
}
@Override
public String toString()
{
@ -293,12 +301,55 @@ public class StringComparators
}
}
public static class StrlenComparator implements StringComparator
{
private static final Ordering<String> ORDERING = Ordering.from(new Comparator<String>()
{
@Override
public int compare(String s, String s2)
{
return Ints.compare(s.length(), s2.length());
}
}).nullsFirst().compound(Ordering.natural());
@Override
public int compare(String s, String s2)
{
if (s == s2) {
return 0;
}
return ORDERING.compare(s, s2);
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
return true;
}
@Override
public String toString()
{
return StringComparators.STRLEN_NAME;
}
}
public static StringComparator makeComparator(String type)
{
if (type.equals(StringComparators.LEXICOGRAPHIC_NAME)) {
return LEXICOGRAPHIC;
} else if (type.equals(StringComparators.ALPHANUMERIC_NAME)) {
return ALPHANUMERIC;
} else if (type.equals(StringComparators.STRLEN_NAME)) {
return STRLEN;
} else {
throw new IAE("Unknown string comparator[%s]", type);
}

View File

@ -0,0 +1,78 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.search.search;
import com.fasterxml.jackson.annotation.JsonCreator;
import io.druid.query.ordering.StringComparators;
import java.util.Comparator;
/**
*/
public class AlphanumericSearchSortSpec implements SearchSortSpec
{
@JsonCreator
public AlphanumericSearchSortSpec(
)
{
}
@Override
public Comparator<SearchHit> getComparator()
{
return new Comparator<SearchHit>()
{
@Override
public int compare(SearchHit searchHit1, SearchHit searchHit2)
{
int retVal = StringComparators.ALPHANUMERIC.compare(
searchHit1.getValue(), searchHit2.getValue());
if (retVal == 0) {
retVal = StringComparators.LEXICOGRAPHIC.compare(
searchHit1.getDimension(), searchHit2.getDimension());
}
return retVal;
}
};
}
public String toString()
{
return "alphanumericSort";
}
@Override
public boolean equals(Object other) {
return this == other || other instanceof AlphanumericSearchSortSpec;
}
@Override
public int hashCode()
{
return 0;
}
@Override
public byte[] getCacheKey()
{
return toString().getBytes();
}
}

View File

@ -21,6 +21,8 @@ package io.druid.query.search.search;
import com.fasterxml.jackson.annotation.JsonCreator;
import io.druid.query.ordering.StringComparators;
import java.util.Comparator;
/**
@ -41,9 +43,12 @@ public class LexicographicSearchSortSpec implements SearchSortSpec
@Override
public int compare(SearchHit searchHit, SearchHit searchHit1)
{
int retVal = searchHit.getValue().compareTo(searchHit1.getValue());
int retVal = StringComparators.LEXICOGRAPHIC.compare(
searchHit.getValue(), searchHit1.getValue());
if (retVal == 0) {
retVal = searchHit.getDimension().compareTo(searchHit1.getDimension());
retVal = StringComparators.LEXICOGRAPHIC.compare(
searchHit.getDimension(), searchHit1.getDimension());
}
return retVal;
}
@ -63,6 +68,12 @@ public class LexicographicSearchSortSpec implements SearchSortSpec
@Override
public boolean equals(Object other) {
return (other instanceof LexicographicSearchSortSpec);
return this == other || other instanceof LexicographicSearchSortSpec;
}
@Override
public int hashCode()
{
return 0;
}
}

View File

@ -29,6 +29,7 @@ import java.util.Comparator;
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = LexicographicSearchSortSpec.class)
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = "lexicographic", value = LexicographicSearchSortSpec.class),
@JsonSubTypes.Type(name = "alphanumeric", value = AlphanumericSearchSortSpec.class),
@JsonSubTypes.Type(name = "strlen", value = StrlenSearchSortSpec.class)
})
public interface SearchSortSpec

View File

@ -19,7 +19,7 @@
package io.druid.query.search.search;
import com.google.common.primitives.Ints;
import io.druid.query.ordering.StringComparators;
import java.util.Comparator;
@ -38,14 +38,10 @@ public class StrlenSearchSortSpec implements SearchSortSpec
@Override
public int compare(SearchHit s, SearchHit s1)
{
final String v1 = s.getValue();
final String v2 = s1.getValue();
int res = Ints.compare(v1.length(), v2.length());
int res = StringComparators.STRLEN.compare(s.getValue(), s1.getValue());
if (res == 0) {
res = v1.compareTo(v2);
}
if (res == 0) {
res = s.getDimension().compareTo(s1.getDimension());
res = StringComparators.LEXICOGRAPHIC.compare(
s.getDimension(), s1.getDimension());
}
return res;
}
@ -62,4 +58,15 @@ public class StrlenSearchSortSpec implements SearchSortSpec
{
return "stringLengthSort";
}
@Override
public boolean equals(Object other) {
return this == other || other instanceof StrlenSearchSortSpec;
}
@Override
public int hashCode()
{
return 0;
}
}

View File

@ -0,0 +1,157 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.ordering;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.query.ordering.StringComparators.StringComparator;
public class StringComparatorsTest
{
private void commonTest(StringComparator comparator)
{
// equality test
Assert.assertTrue(comparator.compare(null, null) == 0);
Assert.assertTrue(comparator.compare("", "") == 0);
Assert.assertTrue(comparator.compare("123", "123") == 0);
Assert.assertTrue(comparator.compare("abc123", "abc123") == 0);
// empty strings < non-empty
Assert.assertTrue(comparator.compare("", "abc") < 0);
Assert.assertTrue(comparator.compare("abc", "") > 0);
// null first test
Assert.assertTrue(comparator.compare(null, "apple") < 0);
}
@Test
public void testLexicographicComparator()
{
commonTest(StringComparators.LEXICOGRAPHIC);
Assert.assertTrue(StringComparators.LEXICOGRAPHIC.compare("apple", "banana") < 0);
Assert.assertTrue(StringComparators.LEXICOGRAPHIC.compare("banana", "banana") == 0);
}
@Test
public void testAlphanumericComparator()
{
commonTest(StringComparators.ALPHANUMERIC);
// numbers < non numeric
Assert.assertTrue(StringComparators.ALPHANUMERIC.compare("123", "abc") < 0);
Assert.assertTrue(StringComparators.ALPHANUMERIC.compare("abc", "123") > 0);
// numbers ordered numerically
Assert.assertTrue(StringComparators.ALPHANUMERIC.compare("2", "11") < 0);
Assert.assertTrue(StringComparators.ALPHANUMERIC.compare("a2", "a11") < 0);
// leading zeros
Assert.assertTrue(StringComparators.ALPHANUMERIC.compare("02", "11") < 0);
Assert.assertTrue(StringComparators.ALPHANUMERIC.compare("02", "002") < 0);
// decimal points ...
Assert.assertTrue(StringComparators.ALPHANUMERIC.compare("1.3", "1.5") < 0);
// ... don't work too well
Assert.assertTrue(StringComparators.ALPHANUMERIC.compare("1.3", "1.15") < 0);
// but you can sort ranges
List<String> sorted = Lists.newArrayList("1-5", "11-15", "16-20", "21-25", "26-30", "6-10", "Other");
Collections.sort(sorted, StringComparators.ALPHANUMERIC);
Assert.assertEquals(
ImmutableList.of("1-5", "6-10", "11-15", "16-20", "21-25", "26-30", "Other"),
sorted
);
List<String> sortedFixedDecimal = Lists.newArrayList(
"Other", "[0.00-0.05)", "[0.05-0.10)", "[0.10-0.50)", "[0.50-1.00)",
"[1.00-5.00)", "[5.00-10.00)", "[10.00-20.00)"
);
Collections.sort(sortedFixedDecimal, StringComparators.ALPHANUMERIC);
Assert.assertEquals(
ImmutableList.of(
"[0.00-0.05)", "[0.05-0.10)", "[0.10-0.50)", "[0.50-1.00)",
"[1.00-5.00)", "[5.00-10.00)", "[10.00-20.00)", "Other"
),
sortedFixedDecimal
);
}
@Test
public void testStrlenComparator()
{
commonTest(StringComparators.STRLEN);
Assert.assertTrue(StringComparators.STRLEN.compare("a", "apple") < 0);
Assert.assertTrue(StringComparators.STRLEN.compare("a", "elppa") < 0);
Assert.assertTrue(StringComparators.STRLEN.compare("apple", "elppa") < 0);
}
@Test
public void testLexicographicComparatorSerdeTest() throws IOException
{
ObjectMapper jsonMapper = new DefaultObjectMapper();
String expectJsonSpec = "{\"type\":\"lexicographic\"}";
String jsonSpec = jsonMapper.writeValueAsString(StringComparators.LEXICOGRAPHIC);
Assert.assertEquals(expectJsonSpec, jsonSpec);
Assert.assertEquals(StringComparators.LEXICOGRAPHIC
, jsonMapper.readValue(expectJsonSpec, StringComparators.LexicographicComparator.class));
}
@Test
public void testAlphanumericComparatorSerdeTest() throws IOException
{
ObjectMapper jsonMapper = new DefaultObjectMapper();
String expectJsonSpec = "{\"type\":\"alphanumeric\"}";
String jsonSpec = jsonMapper.writeValueAsString(StringComparators.ALPHANUMERIC);
Assert.assertEquals(expectJsonSpec, jsonSpec);
Assert.assertEquals(StringComparators.ALPHANUMERIC
, jsonMapper.readValue(expectJsonSpec, StringComparators.AlphanumericComparator.class));
}
@Test
public void testStrlenComparatorSerdeTest() throws IOException
{
ObjectMapper jsonMapper = new DefaultObjectMapper();
String expectJsonSpec = "{\"type\":\"strlen\"}";
String jsonSpec = jsonMapper.writeValueAsString(StringComparators.STRLEN);
Assert.assertEquals(expectJsonSpec, jsonSpec);
Assert.assertEquals(StringComparators.STRLEN
, jsonMapper.readValue(expectJsonSpec, StringComparators.StrlenComparator.class));
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.search;
import io.druid.query.search.search.AlphanumericSearchSortSpec;
import io.druid.query.search.search.SearchHit;
import io.druid.query.search.search.SearchSortSpec;
import org.junit.Assert;
import org.junit.Test;
/**
*/
public class AlphanumericSearchSortSpecTest
{
@Test
public void testComparator()
{
SearchSortSpec spec = new AlphanumericSearchSortSpec();
SearchHit hit1 = new SearchHit("test", "a100");
SearchHit hit2 = new SearchHit("test", "a9");
SearchHit hit3 = new SearchHit("test", "b0");
Assert.assertTrue(spec.getComparator().compare(hit1, hit2) > 0);
Assert.assertTrue(spec.getComparator().compare(hit3, hit1) > 0);
Assert.assertTrue(spec.getComparator().compare(hit3, hit2) > 0);
}
}

View File

@ -22,6 +22,7 @@ package io.druid.query.search;
import com.google.common.collect.ImmutableList;
import io.druid.granularity.QueryGranularities;
import io.druid.query.Result;
import io.druid.query.search.search.AlphanumericSearchSortSpec;
import io.druid.query.search.search.LexicographicSearchSortSpec;
import io.druid.query.search.search.SearchHit;
import io.druid.query.search.search.StrlenSearchSortSpec;
@ -274,6 +275,33 @@ public class SearchBinaryFnTest
assertSearchMergeResult(expected.getValue(), actual.getValue());
}
@Test
public void testAlphanumericMerge()
{
AlphanumericSearchSortSpec searchSortSpec = new AlphanumericSearchSortSpec();
Comparator<SearchHit> c = searchSortSpec.getComparator();
Result<SearchResultValue> r1 = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits(c, "blah:a100", "blah:a9", "alah:a100"))
);
Result<SearchResultValue> r2 = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits(c, "blah:b0", "alah:c3"))
);
Result<SearchResultValue> expected = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits(c, "blah:a9", "alah:a100", "blah:a100", "blah:b0", "alah:c3"))
);
Result<SearchResultValue> actual = new SearchBinaryFn(
searchSortSpec, QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2);
Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp());
assertSearchMergeResult(expected.getValue(), actual.getValue());
}
// merge function expects input to be sorted as per comparator
private List<SearchHit> toHits(Comparator<SearchHit> comparator, String... hits) {
List<SearchHit> result = new ArrayList<>();