mirror of https://github.com/apache/druid.git
support alphanumeric sorting for dimensional columns in groupby
This commit is contained in:
parent
1430bc2c88
commit
2f2e2ff5b9
|
@ -36,6 +36,7 @@ import io.druid.data.input.Row;
|
|||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
import io.druid.query.dimension.DimensionSpec;
|
||||
import io.druid.query.ordering.StringComparators.StringComparator;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.nio.ByteBuffer;
|
||||
|
@ -139,7 +140,7 @@ public class DefaultLimitSpec implements LimitSpec
|
|||
} else if (aggregatorsMap.containsKey(columnName)) {
|
||||
nextOrdering = metricOrdering(columnName, aggregatorsMap.get(columnName).getComparator());
|
||||
} else if (dimensionsMap.containsKey(columnName)) {
|
||||
nextOrdering = dimensionOrdering(columnName);
|
||||
nextOrdering = dimensionOrdering(columnName, columnSpec.getDimensionComparator());
|
||||
}
|
||||
|
||||
if (nextOrdering == null) {
|
||||
|
@ -170,9 +171,9 @@ public class DefaultLimitSpec implements LimitSpec
|
|||
};
|
||||
}
|
||||
|
||||
private Ordering<Row> dimensionOrdering(final String dimension)
|
||||
private Ordering<Row> dimensionOrdering(final String dimension, final StringComparator comparator)
|
||||
{
|
||||
return Ordering.natural()
|
||||
return Ordering.from(comparator)
|
||||
.nullsFirst()
|
||||
.onResultOf(
|
||||
new Function<Row, String>()
|
||||
|
|
|
@ -29,6 +29,9 @@ import com.metamx.common.IAE;
|
|||
import com.metamx.common.ISE;
|
||||
import com.metamx.common.StringUtils;
|
||||
|
||||
import io.druid.query.ordering.StringComparators;
|
||||
import io.druid.query.ordering.StringComparators.StringComparator;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
|
@ -45,6 +48,8 @@ public class OrderByColumnSpec
|
|||
DESCENDING
|
||||
}
|
||||
|
||||
public static final StringComparator DEFAULT_DIMENSION_ORDER = StringComparators.LEXICOGRAPHIC;
|
||||
|
||||
/**
|
||||
* Maintain a map of the enum values so that we can just do a lookup and get a null if it doesn't exist instead
|
||||
* of an exception thrown.
|
||||
|
@ -61,6 +66,7 @@ public class OrderByColumnSpec
|
|||
|
||||
private final String dimension;
|
||||
private final Direction direction;
|
||||
private final StringComparator dimensionComparator;
|
||||
|
||||
@JsonCreator
|
||||
public static OrderByColumnSpec create(Object obj)
|
||||
|
@ -68,14 +74,15 @@ public class OrderByColumnSpec
|
|||
Preconditions.checkNotNull(obj, "Cannot build an OrderByColumnSpec from a null object.");
|
||||
|
||||
if (obj instanceof String) {
|
||||
return new OrderByColumnSpec(obj.toString(), null);
|
||||
return new OrderByColumnSpec(obj.toString(), null, null);
|
||||
} else if (obj instanceof Map) {
|
||||
final Map map = (Map) obj;
|
||||
|
||||
final String dimension = map.get("dimension").toString();
|
||||
final Direction direction = determineDirection(map.get("direction"));
|
||||
final StringComparator dimensionComparator = determinDimensionComparator(map.get("dimensionOrder"));
|
||||
|
||||
return new OrderByColumnSpec(dimension, direction);
|
||||
return new OrderByColumnSpec(dimension, direction, dimensionComparator);
|
||||
} else {
|
||||
throw new ISE("Cannot build an OrderByColumnSpec from a %s", obj.getClass());
|
||||
}
|
||||
|
@ -83,7 +90,7 @@ public class OrderByColumnSpec
|
|||
|
||||
public static OrderByColumnSpec asc(String dimension)
|
||||
{
|
||||
return new OrderByColumnSpec(dimension, Direction.ASCENDING);
|
||||
return new OrderByColumnSpec(dimension, Direction.ASCENDING, null);
|
||||
}
|
||||
|
||||
public static List<OrderByColumnSpec> ascending(String... dimension)
|
||||
|
@ -103,7 +110,7 @@ public class OrderByColumnSpec
|
|||
|
||||
public static OrderByColumnSpec desc(String dimension)
|
||||
{
|
||||
return new OrderByColumnSpec(dimension, Direction.DESCENDING);
|
||||
return new OrderByColumnSpec(dimension, Direction.DESCENDING, null);
|
||||
}
|
||||
|
||||
public static List<OrderByColumnSpec> descending(String... dimension)
|
||||
|
@ -125,9 +132,19 @@ public class OrderByColumnSpec
|
|||
String dimension,
|
||||
Direction direction
|
||||
)
|
||||
{
|
||||
this(dimension, direction, null);
|
||||
}
|
||||
|
||||
public OrderByColumnSpec(
|
||||
String dimension,
|
||||
Direction direction,
|
||||
StringComparator dimensionComparator
|
||||
)
|
||||
{
|
||||
this.dimension = dimension;
|
||||
this.direction = direction == null ? Direction.ASCENDING : direction;
|
||||
this.dimensionComparator = dimensionComparator == null ? DEFAULT_DIMENSION_ORDER : dimensionComparator;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
|
@ -142,6 +159,12 @@ public class OrderByColumnSpec
|
|||
return direction;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public StringComparator getDimensionComparator()
|
||||
{
|
||||
return dimensionComparator;
|
||||
}
|
||||
|
||||
public static Direction determineDirection(Object directionObj)
|
||||
{
|
||||
if (directionObj == null) {
|
||||
|
@ -172,6 +195,16 @@ public class OrderByColumnSpec
|
|||
return direction;
|
||||
}
|
||||
|
||||
private static StringComparator determinDimensionComparator(Object dimensionOrderObj)
|
||||
{
|
||||
if (dimensionOrderObj == null) {
|
||||
return DEFAULT_DIMENSION_ORDER;
|
||||
}
|
||||
|
||||
String dimensionOrderString = dimensionOrderObj.toString().toLowerCase();
|
||||
return StringComparators.makeComparator(dimensionOrderString);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
|
@ -187,6 +220,9 @@ public class OrderByColumnSpec
|
|||
if (!dimension.equals(that.dimension)) {
|
||||
return false;
|
||||
}
|
||||
if (!dimensionComparator.equals(that.dimensionComparator)) {
|
||||
return false;
|
||||
}
|
||||
return direction == that.direction;
|
||||
|
||||
}
|
||||
|
@ -196,6 +232,7 @@ public class OrderByColumnSpec
|
|||
{
|
||||
int result = dimension.hashCode();
|
||||
result = 31 * result + direction.hashCode();
|
||||
result = 31 * result + dimensionComparator.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -204,7 +241,8 @@ public class OrderByColumnSpec
|
|||
{
|
||||
return "OrderByColumnSpec{" +
|
||||
"dimension='" + dimension + '\'' +
|
||||
", direction=" + direction +
|
||||
", direction=" + direction + '\'' +
|
||||
", dimensionComparator='" + dimensionComparator + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,306 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.query.ordering;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
|
||||
import com.google.common.primitives.UnsignedBytes;
|
||||
import com.metamx.common.IAE;
|
||||
import com.metamx.common.StringUtils;
|
||||
|
||||
|
||||
public class StringComparators
|
||||
{
|
||||
public static final String LEXICOGRAPHIC_NAME = "lexicographic";
|
||||
public static final String ALPHANUMERIC_NAME = "alphanumeric";
|
||||
|
||||
public static final LexicographicComparator LEXICOGRAPHIC = new LexicographicComparator();
|
||||
public static final AlphanumericComparator ALPHANUMERIC = new AlphanumericComparator();
|
||||
|
||||
@JsonTypeInfo(use=Id.NAME, include=As.PROPERTY, property="type", defaultImpl = LexicographicComparator.class)
|
||||
@JsonSubTypes(value = {
|
||||
@JsonSubTypes.Type(name = StringComparators.LEXICOGRAPHIC_NAME, value = LexicographicComparator.class),
|
||||
@JsonSubTypes.Type(name = StringComparators.ALPHANUMERIC_NAME, value = AlphanumericComparator.class)
|
||||
})
|
||||
public static interface StringComparator extends Comparator<String>
|
||||
{
|
||||
}
|
||||
|
||||
public static class LexicographicComparator implements StringComparator
|
||||
{
|
||||
@Override
|
||||
public int compare(String s, String s2)
|
||||
{
|
||||
// Avoid conversion to bytes for equal references
|
||||
if(s == s2){
|
||||
return 0;
|
||||
}
|
||||
// null first
|
||||
if (s == null) {
|
||||
return -1;
|
||||
}
|
||||
if (s2 == null) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return UnsignedBytes.lexicographicalComparator().compare(
|
||||
StringUtils.toUtf8(s),
|
||||
StringUtils.toUtf8(s2)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return StringComparators.LEXICOGRAPHIC_NAME;
|
||||
}
|
||||
}
|
||||
|
||||
public static class AlphanumericComparator implements StringComparator
|
||||
{
|
||||
// This code is based on https://github.com/amjjd/java-alphanum, see
|
||||
// NOTICE file for more information
|
||||
public int compare(String str1, String str2)
|
||||
{
|
||||
int[] pos =
|
||||
{ 0, 0 };
|
||||
|
||||
if (str1 == null)
|
||||
{
|
||||
return -1;
|
||||
} else if (str2 == null)
|
||||
{
|
||||
return 1;
|
||||
} else if (str1.length() == 0)
|
||||
{
|
||||
return str2.length() == 0 ? 0 : -1;
|
||||
} else if (str2.length() == 0)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (pos[0] < str1.length() && pos[1] < str2.length())
|
||||
{
|
||||
int ch1 = str1.codePointAt(pos[0]);
|
||||
int ch2 = str2.codePointAt(pos[1]);
|
||||
|
||||
int result = 0;
|
||||
|
||||
if (isDigit(ch1))
|
||||
{
|
||||
result = isDigit(ch2) ? compareNumbers(str1, str2, pos) : -1;
|
||||
} else
|
||||
{
|
||||
result = isDigit(ch2) ? 1 : compareNonNumeric(str1, str2, pos);
|
||||
}
|
||||
|
||||
if (result != 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return str1.length() - str2.length();
|
||||
}
|
||||
|
||||
private int compareNumbers(String str0, String str1, int[] pos)
|
||||
{
|
||||
int delta = 0;
|
||||
int zeroes0 = 0, zeroes1 = 0;
|
||||
int ch0 = -1, ch1 = -1;
|
||||
|
||||
// Skip leading zeroes, but keep a count of them.
|
||||
while (pos[0] < str0.length() && isZero(ch0 = str0.codePointAt(pos[0])))
|
||||
{
|
||||
zeroes0++;
|
||||
pos[0] += Character.charCount(ch0);
|
||||
}
|
||||
while (pos[1] < str1.length() && isZero(ch1 = str1.codePointAt(pos[1])))
|
||||
{
|
||||
zeroes1++;
|
||||
pos[1] += Character.charCount(ch1);
|
||||
}
|
||||
|
||||
// If one sequence contains more significant digits than the
|
||||
// other, it's a larger number. In case they turn out to have
|
||||
// equal lengths, we compare digits at each position; the first
|
||||
// unequal pair determines which is the bigger number.
|
||||
while (true)
|
||||
{
|
||||
boolean noMoreDigits0 = (ch0 < 0) || !isDigit(ch0);
|
||||
boolean noMoreDigits1 = (ch1 < 0) || !isDigit(ch1);
|
||||
|
||||
if (noMoreDigits0 && noMoreDigits1)
|
||||
{
|
||||
return delta != 0 ? delta : zeroes0 - zeroes1;
|
||||
} else if (noMoreDigits0)
|
||||
{
|
||||
return -1;
|
||||
} else if (noMoreDigits1)
|
||||
{
|
||||
return 1;
|
||||
} else if (delta == 0 && ch0 != ch1)
|
||||
{
|
||||
delta = valueOf(ch0) - valueOf(ch1);
|
||||
}
|
||||
|
||||
if (pos[0] < str0.length())
|
||||
{
|
||||
ch0 = str0.codePointAt(pos[0]);
|
||||
if (isDigit(ch0))
|
||||
{
|
||||
pos[0] += Character.charCount(ch0);
|
||||
} else
|
||||
{
|
||||
ch0 = -1;
|
||||
}
|
||||
} else
|
||||
{
|
||||
ch0 = -1;
|
||||
}
|
||||
|
||||
if (pos[1] < str1.length())
|
||||
{
|
||||
ch1 = str1.codePointAt(pos[1]);
|
||||
if (isDigit(ch1))
|
||||
{
|
||||
pos[1] += Character.charCount(ch1);
|
||||
} else
|
||||
{
|
||||
ch1 = -1;
|
||||
}
|
||||
} else
|
||||
{
|
||||
ch1 = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isDigit(int ch)
|
||||
{
|
||||
return (ch >= '0' && ch <= '9') ||
|
||||
(ch >= '\u0660' && ch <= '\u0669') ||
|
||||
(ch >= '\u06F0' && ch <= '\u06F9') ||
|
||||
(ch >= '\u0966' && ch <= '\u096F') ||
|
||||
(ch >= '\uFF10' && ch <= '\uFF19');
|
||||
}
|
||||
|
||||
private boolean isZero(int ch)
|
||||
{
|
||||
return ch == '0' || ch == '\u0660' || ch == '\u06F0' || ch == '\u0966' || ch == '\uFF10';
|
||||
}
|
||||
|
||||
private int valueOf(int digit)
|
||||
{
|
||||
if (digit <= '9')
|
||||
{
|
||||
return digit - '0';
|
||||
}
|
||||
if (digit <= '\u0669')
|
||||
{
|
||||
return digit - '\u0660';
|
||||
}
|
||||
if (digit <= '\u06F9')
|
||||
{
|
||||
return digit - '\u06F0';
|
||||
}
|
||||
if (digit <= '\u096F')
|
||||
{
|
||||
return digit - '\u0966';
|
||||
}
|
||||
if (digit <= '\uFF19')
|
||||
{
|
||||
return digit - '\uFF10';
|
||||
}
|
||||
|
||||
return digit;
|
||||
}
|
||||
|
||||
private int compareNonNumeric(String str0, String str1, int[] pos)
|
||||
{
|
||||
// find the end of both non-numeric substrings
|
||||
int start0 = pos[0];
|
||||
int ch0 = str0.codePointAt(pos[0]);
|
||||
pos[0] += Character.charCount(ch0);
|
||||
while (pos[0] < str0.length() && !isDigit(ch0 = str0.codePointAt(pos[0])))
|
||||
{
|
||||
pos[0] += Character.charCount(ch0);
|
||||
}
|
||||
|
||||
int start1 = pos[1];
|
||||
int ch1 = str1.codePointAt(pos[1]);
|
||||
pos[1] += Character.charCount(ch1);
|
||||
while (pos[1] < str1.length() && !isDigit(ch1 = str1.codePointAt(pos[1])))
|
||||
{
|
||||
pos[1] += Character.charCount(ch1);
|
||||
}
|
||||
|
||||
// compare the substrings
|
||||
return String.CASE_INSENSITIVE_ORDER.compare(str0.substring(start0, pos[0]), str1.substring(start1, pos[1]));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return StringComparators.ALPHANUMERIC_NAME;
|
||||
}
|
||||
}
|
||||
|
||||
public static StringComparator makeComparator(String type)
|
||||
{
|
||||
if (type.equals(StringComparators.LEXICOGRAPHIC_NAME)) {
|
||||
return LEXICOGRAPHIC;
|
||||
} else if (type.equals(StringComparators.ALPHANUMERIC_NAME)) {
|
||||
return ALPHANUMERIC;
|
||||
} else {
|
||||
throw new IAE("Unknown string comparator[%s]", type);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
import com.metamx.common.StringUtils;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
import io.druid.query.ordering.StringComparators;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Comparator;
|
||||
|
@ -33,157 +34,7 @@ public class AlphaNumericTopNMetricSpec extends LexicographicTopNMetricSpec
|
|||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x2;
|
||||
|
||||
protected static Comparator<String> comparator = new Comparator<String>()
|
||||
{
|
||||
// This code is based on https://github.com/amjjd/java-alphanum, see NOTICE file for more information
|
||||
public int compare(String str1, String str2)
|
||||
{
|
||||
int[] pos = {0, 0};
|
||||
|
||||
if (str1 == null) {
|
||||
return -1;
|
||||
} else if (str2 == null) {
|
||||
return 1;
|
||||
} else if (str1.length() == 0) {
|
||||
return str2.length() == 0 ? 0 : -1;
|
||||
} else if (str2.length() == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (pos[0] < str1.length() && pos[1] < str2.length()) {
|
||||
int ch1 = str1.codePointAt(pos[0]);
|
||||
int ch2 = str2.codePointAt(pos[1]);
|
||||
|
||||
int result = 0;
|
||||
|
||||
if (isDigit(ch1)) {
|
||||
result = isDigit(ch2) ? compareNumbers(str1, str2, pos) : -1;
|
||||
} else {
|
||||
result = isDigit(ch2) ? 1 : compareNonNumeric(str1, str2, pos);
|
||||
}
|
||||
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return str1.length() - str2.length();
|
||||
}
|
||||
|
||||
private int compareNumbers(String str0, String str1, int[] pos)
|
||||
{
|
||||
int delta = 0;
|
||||
int zeroes0 = 0, zeroes1 = 0;
|
||||
int ch0 = -1, ch1 = -1;
|
||||
|
||||
// Skip leading zeroes, but keep a count of them.
|
||||
while (pos[0] < str0.length() && isZero(ch0 = str0.codePointAt(pos[0]))) {
|
||||
zeroes0++;
|
||||
pos[0] += Character.charCount(ch0);
|
||||
}
|
||||
while (pos[1] < str1.length() && isZero(ch1 = str1.codePointAt(pos[1]))) {
|
||||
zeroes1++;
|
||||
pos[1] += Character.charCount(ch1);
|
||||
}
|
||||
|
||||
// If one sequence contains more significant digits than the
|
||||
// other, it's a larger number. In case they turn out to have
|
||||
// equal lengths, we compare digits at each position; the first
|
||||
// unequal pair determines which is the bigger number.
|
||||
while (true) {
|
||||
boolean noMoreDigits0 = (ch0 < 0) || !isDigit(ch0);
|
||||
boolean noMoreDigits1 = (ch1 < 0) || !isDigit(ch1);
|
||||
|
||||
if (noMoreDigits0 && noMoreDigits1) {
|
||||
return delta != 0 ? delta : zeroes0 - zeroes1;
|
||||
} else if (noMoreDigits0) {
|
||||
return -1;
|
||||
} else if (noMoreDigits1) {
|
||||
return 1;
|
||||
} else if (delta == 0 && ch0 != ch1) {
|
||||
delta = valueOf(ch0) - valueOf(ch1);
|
||||
}
|
||||
|
||||
if (pos[0] < str0.length()) {
|
||||
ch0 = str0.codePointAt(pos[0]);
|
||||
if (isDigit(ch0)) {
|
||||
pos[0] += Character.charCount(ch0);
|
||||
} else {
|
||||
ch0 = -1;
|
||||
}
|
||||
} else {
|
||||
ch0 = -1;
|
||||
}
|
||||
|
||||
if (pos[1] < str1.length()) {
|
||||
ch1 = str1.codePointAt(pos[1]);
|
||||
if (isDigit(ch1)) {
|
||||
pos[1] += Character.charCount(ch1);
|
||||
} else {
|
||||
ch1 = -1;
|
||||
}
|
||||
} else {
|
||||
ch1 = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isDigit(int ch)
|
||||
{
|
||||
return (ch >= '0' && ch <= '9') ||
|
||||
(ch >= '\u0660' && ch <= '\u0669') ||
|
||||
(ch >= '\u06F0' && ch <= '\u06F9') ||
|
||||
(ch >= '\u0966' && ch <= '\u096F') ||
|
||||
(ch >= '\uFF10' && ch <= '\uFF19');
|
||||
}
|
||||
|
||||
private boolean isZero(int ch)
|
||||
{
|
||||
return ch == '0' || ch == '\u0660' || ch == '\u06F0' || ch == '\u0966' || ch == '\uFF10';
|
||||
}
|
||||
|
||||
private int valueOf(int digit)
|
||||
{
|
||||
if (digit <= '9') {
|
||||
return digit - '0';
|
||||
}
|
||||
if (digit <= '\u0669') {
|
||||
return digit - '\u0660';
|
||||
}
|
||||
if (digit <= '\u06F9') {
|
||||
return digit - '\u06F0';
|
||||
}
|
||||
if (digit <= '\u096F') {
|
||||
return digit - '\u0966';
|
||||
}
|
||||
if (digit <= '\uFF19') {
|
||||
return digit - '\uFF10';
|
||||
}
|
||||
|
||||
return digit;
|
||||
}
|
||||
|
||||
private int compareNonNumeric(String str0, String str1, int[] pos)
|
||||
{
|
||||
// find the end of both non-numeric substrings
|
||||
int start0 = pos[0];
|
||||
int ch0 = str0.codePointAt(pos[0]);
|
||||
pos[0] += Character.charCount(ch0);
|
||||
while (pos[0] < str0.length() && !isDigit(ch0 = str0.codePointAt(pos[0]))) {
|
||||
pos[0] += Character.charCount(ch0);
|
||||
}
|
||||
|
||||
int start1 = pos[1];
|
||||
int ch1 = str1.codePointAt(pos[1]);
|
||||
pos[1] += Character.charCount(ch1);
|
||||
while (pos[1] < str1.length() && !isDigit(ch1 = str1.codePointAt(pos[1]))) {
|
||||
pos[1] += Character.charCount(ch1);
|
||||
}
|
||||
|
||||
// compare the substrings
|
||||
return String.CASE_INSENSITIVE_ORDER.compare(str0.substring(start0, pos[0]), str1.substring(start1, pos[1]));
|
||||
}
|
||||
};
|
||||
protected static Comparator<String> comparator = StringComparators.ALPHANUMERIC;
|
||||
|
||||
@JsonCreator
|
||||
public AlphaNumericTopNMetricSpec(
|
||||
|
|
|
@ -26,6 +26,8 @@ import com.metamx.common.StringUtils;
|
|||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
import io.druid.query.dimension.DimensionSpec;
|
||||
import io.druid.query.ordering.StringComparators;
|
||||
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
@ -38,29 +40,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
|||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x1;
|
||||
|
||||
private static Comparator<String> comparator = new Comparator<String>()
|
||||
{
|
||||
@Override
|
||||
public int compare(String s, String s2)
|
||||
{
|
||||
// Avoid conversion to bytes for equal references
|
||||
if(s == s2){
|
||||
return 0;
|
||||
}
|
||||
// null first
|
||||
if (s == null) {
|
||||
return -1;
|
||||
}
|
||||
if (s2 == null) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return UnsignedBytes.lexicographicalComparator().compare(
|
||||
StringUtils.toUtf8(s),
|
||||
StringUtils.toUtf8(s2)
|
||||
);
|
||||
}
|
||||
};
|
||||
private static Comparator<String> comparator = StringComparators.LEXICOGRAPHIC;
|
||||
|
||||
private final String previousStop;
|
||||
|
||||
|
|
|
@ -79,6 +79,7 @@ import io.druid.query.groupby.having.OrHavingSpec;
|
|||
import io.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||
import io.druid.query.groupby.orderby.LimitSpec;
|
||||
import io.druid.query.groupby.orderby.OrderByColumnSpec;
|
||||
import io.druid.query.ordering.StringComparators;
|
||||
import io.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||
import io.druid.segment.TestHelper;
|
||||
import io.druid.segment.column.Column;
|
||||
|
@ -1649,6 +1650,67 @@ public class GroupByQueryRunnerTest
|
|||
TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithAlphaNumericDimensionOrder()
|
||||
{
|
||||
Map<String, String> map = new HashMap<>();
|
||||
map.put("automotive", "health105");
|
||||
map.put("business", "health20");
|
||||
map.put("entertainment", "travel47");
|
||||
map.put("health", "health55");
|
||||
map.put("mezzanine", "health09");
|
||||
map.put("news", "health0000");
|
||||
map.put("premium", "health999");
|
||||
map.put("technology", "travel123");
|
||||
map.put("travel", "travel555");
|
||||
|
||||
GroupByQuery query = GroupByQuery
|
||||
.builder()
|
||||
.setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new ExtractionDimensionSpec(
|
||||
"quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map), false, null, false, false), null
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setLimitSpec(new DefaultLimitSpec(Lists.<OrderByColumnSpec>newArrayList(
|
||||
new OrderByColumnSpec("alias", null, StringComparators.ALPHANUMERIC)), null))
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.build();
|
||||
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health0000", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health09", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health20", "rows", 1L, "idx", 118L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health55", "rows", 1L, "idx", 120L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health105", "rows", 1L, "idx", 135L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health999", "rows", 3L, "idx", 2900L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel47", "rows", 1L, "idx", 158L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel123", "rows", 1L, "idx", 78L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel555", "rows", 1L, "idx", 119L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health0000", "rows", 1L, "idx", 114L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health09", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health20", "rows", 1L, "idx", 112L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health55", "rows", 1L, "idx", 113L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health105", "rows", 1L, "idx", 147L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health999", "rows", 3L, "idx", 2505L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel47", "rows", 1L, "idx", 166L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel123", "rows", 1L, "idx", 97L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel555", "rows", 1L, "idx", 126L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
// This is a test to verify per limit groupings, but Druid currently does not support this functionality. At a point
|
||||
|
|
|
@ -33,6 +33,8 @@ import io.druid.query.dimension.DefaultDimensionSpec;
|
|||
import io.druid.query.dimension.DimensionSpec;
|
||||
import io.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||
import io.druid.query.groupby.orderby.OrderByColumnSpec;
|
||||
import io.druid.query.ordering.StringComparators;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -61,7 +63,7 @@ public class GroupByQueryTest
|
|||
.setPostAggregatorSpecs(ImmutableList.<PostAggregator>of(new FieldAccessPostAggregator("x", "idx")))
|
||||
.setLimitSpec(
|
||||
new DefaultLimitSpec(
|
||||
ImmutableList.of(new OrderByColumnSpec("alias", OrderByColumnSpec.Direction.ASCENDING)),
|
||||
ImmutableList.of(new OrderByColumnSpec("alias", OrderByColumnSpec.Direction.ASCENDING, StringComparators.LEXICOGRAPHIC)),
|
||||
100
|
||||
)
|
||||
)
|
||||
|
@ -69,6 +71,8 @@ public class GroupByQueryTest
|
|||
|
||||
String json = jsonMapper.writeValueAsString(query);
|
||||
Query serdeQuery = jsonMapper.readValue(json, Query.class);
|
||||
|
||||
System.out.println(json);
|
||||
|
||||
Assert.assertEquals(query, serdeQuery);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue