mirror of https://github.com/apache/druid.git
support alphanumeric sorting for dimensional columns in groupby
This commit is contained in:
parent
1430bc2c88
commit
2f2e2ff5b9
|
@ -36,6 +36,7 @@ import io.druid.data.input.Row;
|
||||||
import io.druid.query.aggregation.AggregatorFactory;
|
import io.druid.query.aggregation.AggregatorFactory;
|
||||||
import io.druid.query.aggregation.PostAggregator;
|
import io.druid.query.aggregation.PostAggregator;
|
||||||
import io.druid.query.dimension.DimensionSpec;
|
import io.druid.query.dimension.DimensionSpec;
|
||||||
|
import io.druid.query.ordering.StringComparators.StringComparator;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
@ -139,7 +140,7 @@ public class DefaultLimitSpec implements LimitSpec
|
||||||
} else if (aggregatorsMap.containsKey(columnName)) {
|
} else if (aggregatorsMap.containsKey(columnName)) {
|
||||||
nextOrdering = metricOrdering(columnName, aggregatorsMap.get(columnName).getComparator());
|
nextOrdering = metricOrdering(columnName, aggregatorsMap.get(columnName).getComparator());
|
||||||
} else if (dimensionsMap.containsKey(columnName)) {
|
} else if (dimensionsMap.containsKey(columnName)) {
|
||||||
nextOrdering = dimensionOrdering(columnName);
|
nextOrdering = dimensionOrdering(columnName, columnSpec.getDimensionComparator());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nextOrdering == null) {
|
if (nextOrdering == null) {
|
||||||
|
@ -170,9 +171,9 @@ public class DefaultLimitSpec implements LimitSpec
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private Ordering<Row> dimensionOrdering(final String dimension)
|
private Ordering<Row> dimensionOrdering(final String dimension, final StringComparator comparator)
|
||||||
{
|
{
|
||||||
return Ordering.natural()
|
return Ordering.from(comparator)
|
||||||
.nullsFirst()
|
.nullsFirst()
|
||||||
.onResultOf(
|
.onResultOf(
|
||||||
new Function<Row, String>()
|
new Function<Row, String>()
|
||||||
|
|
|
@ -29,6 +29,9 @@ import com.metamx.common.IAE;
|
||||||
import com.metamx.common.ISE;
|
import com.metamx.common.ISE;
|
||||||
import com.metamx.common.StringUtils;
|
import com.metamx.common.StringUtils;
|
||||||
|
|
||||||
|
import io.druid.query.ordering.StringComparators;
|
||||||
|
import io.druid.query.ordering.StringComparators.StringComparator;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -45,6 +48,8 @@ public class OrderByColumnSpec
|
||||||
DESCENDING
|
DESCENDING
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final StringComparator DEFAULT_DIMENSION_ORDER = StringComparators.LEXICOGRAPHIC;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maintain a map of the enum values so that we can just do a lookup and get a null if it doesn't exist instead
|
* Maintain a map of the enum values so that we can just do a lookup and get a null if it doesn't exist instead
|
||||||
* of an exception thrown.
|
* of an exception thrown.
|
||||||
|
@ -61,6 +66,7 @@ public class OrderByColumnSpec
|
||||||
|
|
||||||
private final String dimension;
|
private final String dimension;
|
||||||
private final Direction direction;
|
private final Direction direction;
|
||||||
|
private final StringComparator dimensionComparator;
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public static OrderByColumnSpec create(Object obj)
|
public static OrderByColumnSpec create(Object obj)
|
||||||
|
@ -68,14 +74,15 @@ public class OrderByColumnSpec
|
||||||
Preconditions.checkNotNull(obj, "Cannot build an OrderByColumnSpec from a null object.");
|
Preconditions.checkNotNull(obj, "Cannot build an OrderByColumnSpec from a null object.");
|
||||||
|
|
||||||
if (obj instanceof String) {
|
if (obj instanceof String) {
|
||||||
return new OrderByColumnSpec(obj.toString(), null);
|
return new OrderByColumnSpec(obj.toString(), null, null);
|
||||||
} else if (obj instanceof Map) {
|
} else if (obj instanceof Map) {
|
||||||
final Map map = (Map) obj;
|
final Map map = (Map) obj;
|
||||||
|
|
||||||
final String dimension = map.get("dimension").toString();
|
final String dimension = map.get("dimension").toString();
|
||||||
final Direction direction = determineDirection(map.get("direction"));
|
final Direction direction = determineDirection(map.get("direction"));
|
||||||
|
final StringComparator dimensionComparator = determinDimensionComparator(map.get("dimensionOrder"));
|
||||||
|
|
||||||
return new OrderByColumnSpec(dimension, direction);
|
return new OrderByColumnSpec(dimension, direction, dimensionComparator);
|
||||||
} else {
|
} else {
|
||||||
throw new ISE("Cannot build an OrderByColumnSpec from a %s", obj.getClass());
|
throw new ISE("Cannot build an OrderByColumnSpec from a %s", obj.getClass());
|
||||||
}
|
}
|
||||||
|
@ -83,7 +90,7 @@ public class OrderByColumnSpec
|
||||||
|
|
||||||
public static OrderByColumnSpec asc(String dimension)
|
public static OrderByColumnSpec asc(String dimension)
|
||||||
{
|
{
|
||||||
return new OrderByColumnSpec(dimension, Direction.ASCENDING);
|
return new OrderByColumnSpec(dimension, Direction.ASCENDING, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<OrderByColumnSpec> ascending(String... dimension)
|
public static List<OrderByColumnSpec> ascending(String... dimension)
|
||||||
|
@ -103,7 +110,7 @@ public class OrderByColumnSpec
|
||||||
|
|
||||||
public static OrderByColumnSpec desc(String dimension)
|
public static OrderByColumnSpec desc(String dimension)
|
||||||
{
|
{
|
||||||
return new OrderByColumnSpec(dimension, Direction.DESCENDING);
|
return new OrderByColumnSpec(dimension, Direction.DESCENDING, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<OrderByColumnSpec> descending(String... dimension)
|
public static List<OrderByColumnSpec> descending(String... dimension)
|
||||||
|
@ -125,9 +132,19 @@ public class OrderByColumnSpec
|
||||||
String dimension,
|
String dimension,
|
||||||
Direction direction
|
Direction direction
|
||||||
)
|
)
|
||||||
|
{
|
||||||
|
this(dimension, direction, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public OrderByColumnSpec(
|
||||||
|
String dimension,
|
||||||
|
Direction direction,
|
||||||
|
StringComparator dimensionComparator
|
||||||
|
)
|
||||||
{
|
{
|
||||||
this.dimension = dimension;
|
this.dimension = dimension;
|
||||||
this.direction = direction == null ? Direction.ASCENDING : direction;
|
this.direction = direction == null ? Direction.ASCENDING : direction;
|
||||||
|
this.dimensionComparator = dimensionComparator == null ? DEFAULT_DIMENSION_ORDER : dimensionComparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
|
@ -142,6 +159,12 @@ public class OrderByColumnSpec
|
||||||
return direction;
|
return direction;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public StringComparator getDimensionComparator()
|
||||||
|
{
|
||||||
|
return dimensionComparator;
|
||||||
|
}
|
||||||
|
|
||||||
public static Direction determineDirection(Object directionObj)
|
public static Direction determineDirection(Object directionObj)
|
||||||
{
|
{
|
||||||
if (directionObj == null) {
|
if (directionObj == null) {
|
||||||
|
@ -172,6 +195,16 @@ public class OrderByColumnSpec
|
||||||
return direction;
|
return direction;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static StringComparator determinDimensionComparator(Object dimensionOrderObj)
|
||||||
|
{
|
||||||
|
if (dimensionOrderObj == null) {
|
||||||
|
return DEFAULT_DIMENSION_ORDER;
|
||||||
|
}
|
||||||
|
|
||||||
|
String dimensionOrderString = dimensionOrderObj.toString().toLowerCase();
|
||||||
|
return StringComparators.makeComparator(dimensionOrderString);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o)
|
public boolean equals(Object o)
|
||||||
{
|
{
|
||||||
|
@ -187,6 +220,9 @@ public class OrderByColumnSpec
|
||||||
if (!dimension.equals(that.dimension)) {
|
if (!dimension.equals(that.dimension)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (!dimensionComparator.equals(that.dimensionComparator)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return direction == that.direction;
|
return direction == that.direction;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -196,6 +232,7 @@ public class OrderByColumnSpec
|
||||||
{
|
{
|
||||||
int result = dimension.hashCode();
|
int result = dimension.hashCode();
|
||||||
result = 31 * result + direction.hashCode();
|
result = 31 * result + direction.hashCode();
|
||||||
|
result = 31 * result + dimensionComparator.hashCode();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,7 +241,8 @@ public class OrderByColumnSpec
|
||||||
{
|
{
|
||||||
return "OrderByColumnSpec{" +
|
return "OrderByColumnSpec{" +
|
||||||
"dimension='" + dimension + '\'' +
|
"dimension='" + dimension + '\'' +
|
||||||
", direction=" + direction +
|
", direction=" + direction + '\'' +
|
||||||
|
", dimensionComparator='" + dimensionComparator + '\'' +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,306 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Metamarkets licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.query.ordering;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
|
||||||
|
import com.google.common.primitives.UnsignedBytes;
|
||||||
|
import com.metamx.common.IAE;
|
||||||
|
import com.metamx.common.StringUtils;
|
||||||
|
|
||||||
|
|
||||||
|
public class StringComparators
|
||||||
|
{
|
||||||
|
public static final String LEXICOGRAPHIC_NAME = "lexicographic";
|
||||||
|
public static final String ALPHANUMERIC_NAME = "alphanumeric";
|
||||||
|
|
||||||
|
public static final LexicographicComparator LEXICOGRAPHIC = new LexicographicComparator();
|
||||||
|
public static final AlphanumericComparator ALPHANUMERIC = new AlphanumericComparator();
|
||||||
|
|
||||||
|
@JsonTypeInfo(use=Id.NAME, include=As.PROPERTY, property="type", defaultImpl = LexicographicComparator.class)
|
||||||
|
@JsonSubTypes(value = {
|
||||||
|
@JsonSubTypes.Type(name = StringComparators.LEXICOGRAPHIC_NAME, value = LexicographicComparator.class),
|
||||||
|
@JsonSubTypes.Type(name = StringComparators.ALPHANUMERIC_NAME, value = AlphanumericComparator.class)
|
||||||
|
})
|
||||||
|
public static interface StringComparator extends Comparator<String>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class LexicographicComparator implements StringComparator
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public int compare(String s, String s2)
|
||||||
|
{
|
||||||
|
// Avoid conversion to bytes for equal references
|
||||||
|
if(s == s2){
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// null first
|
||||||
|
if (s == null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (s2 == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return UnsignedBytes.lexicographicalComparator().compare(
|
||||||
|
StringUtils.toUtf8(s),
|
||||||
|
StringUtils.toUtf8(s2)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o)
|
||||||
|
{
|
||||||
|
if (this == o) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (o == null || getClass() != o.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return StringComparators.LEXICOGRAPHIC_NAME;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class AlphanumericComparator implements StringComparator
|
||||||
|
{
|
||||||
|
// This code is based on https://github.com/amjjd/java-alphanum, see
|
||||||
|
// NOTICE file for more information
|
||||||
|
public int compare(String str1, String str2)
|
||||||
|
{
|
||||||
|
int[] pos =
|
||||||
|
{ 0, 0 };
|
||||||
|
|
||||||
|
if (str1 == null)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
} else if (str2 == null)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
} else if (str1.length() == 0)
|
||||||
|
{
|
||||||
|
return str2.length() == 0 ? 0 : -1;
|
||||||
|
} else if (str2.length() == 0)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (pos[0] < str1.length() && pos[1] < str2.length())
|
||||||
|
{
|
||||||
|
int ch1 = str1.codePointAt(pos[0]);
|
||||||
|
int ch2 = str2.codePointAt(pos[1]);
|
||||||
|
|
||||||
|
int result = 0;
|
||||||
|
|
||||||
|
if (isDigit(ch1))
|
||||||
|
{
|
||||||
|
result = isDigit(ch2) ? compareNumbers(str1, str2, pos) : -1;
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
result = isDigit(ch2) ? 1 : compareNonNumeric(str1, str2, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result != 0)
|
||||||
|
{
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return str1.length() - str2.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareNumbers(String str0, String str1, int[] pos)
|
||||||
|
{
|
||||||
|
int delta = 0;
|
||||||
|
int zeroes0 = 0, zeroes1 = 0;
|
||||||
|
int ch0 = -1, ch1 = -1;
|
||||||
|
|
||||||
|
// Skip leading zeroes, but keep a count of them.
|
||||||
|
while (pos[0] < str0.length() && isZero(ch0 = str0.codePointAt(pos[0])))
|
||||||
|
{
|
||||||
|
zeroes0++;
|
||||||
|
pos[0] += Character.charCount(ch0);
|
||||||
|
}
|
||||||
|
while (pos[1] < str1.length() && isZero(ch1 = str1.codePointAt(pos[1])))
|
||||||
|
{
|
||||||
|
zeroes1++;
|
||||||
|
pos[1] += Character.charCount(ch1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If one sequence contains more significant digits than the
|
||||||
|
// other, it's a larger number. In case they turn out to have
|
||||||
|
// equal lengths, we compare digits at each position; the first
|
||||||
|
// unequal pair determines which is the bigger number.
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
boolean noMoreDigits0 = (ch0 < 0) || !isDigit(ch0);
|
||||||
|
boolean noMoreDigits1 = (ch1 < 0) || !isDigit(ch1);
|
||||||
|
|
||||||
|
if (noMoreDigits0 && noMoreDigits1)
|
||||||
|
{
|
||||||
|
return delta != 0 ? delta : zeroes0 - zeroes1;
|
||||||
|
} else if (noMoreDigits0)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
} else if (noMoreDigits1)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
} else if (delta == 0 && ch0 != ch1)
|
||||||
|
{
|
||||||
|
delta = valueOf(ch0) - valueOf(ch1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos[0] < str0.length())
|
||||||
|
{
|
||||||
|
ch0 = str0.codePointAt(pos[0]);
|
||||||
|
if (isDigit(ch0))
|
||||||
|
{
|
||||||
|
pos[0] += Character.charCount(ch0);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
ch0 = -1;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
ch0 = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos[1] < str1.length())
|
||||||
|
{
|
||||||
|
ch1 = str1.codePointAt(pos[1]);
|
||||||
|
if (isDigit(ch1))
|
||||||
|
{
|
||||||
|
pos[1] += Character.charCount(ch1);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
ch1 = -1;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
ch1 = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isDigit(int ch)
|
||||||
|
{
|
||||||
|
return (ch >= '0' && ch <= '9') ||
|
||||||
|
(ch >= '\u0660' && ch <= '\u0669') ||
|
||||||
|
(ch >= '\u06F0' && ch <= '\u06F9') ||
|
||||||
|
(ch >= '\u0966' && ch <= '\u096F') ||
|
||||||
|
(ch >= '\uFF10' && ch <= '\uFF19');
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isZero(int ch)
|
||||||
|
{
|
||||||
|
return ch == '0' || ch == '\u0660' || ch == '\u06F0' || ch == '\u0966' || ch == '\uFF10';
|
||||||
|
}
|
||||||
|
|
||||||
|
private int valueOf(int digit)
|
||||||
|
{
|
||||||
|
if (digit <= '9')
|
||||||
|
{
|
||||||
|
return digit - '0';
|
||||||
|
}
|
||||||
|
if (digit <= '\u0669')
|
||||||
|
{
|
||||||
|
return digit - '\u0660';
|
||||||
|
}
|
||||||
|
if (digit <= '\u06F9')
|
||||||
|
{
|
||||||
|
return digit - '\u06F0';
|
||||||
|
}
|
||||||
|
if (digit <= '\u096F')
|
||||||
|
{
|
||||||
|
return digit - '\u0966';
|
||||||
|
}
|
||||||
|
if (digit <= '\uFF19')
|
||||||
|
{
|
||||||
|
return digit - '\uFF10';
|
||||||
|
}
|
||||||
|
|
||||||
|
return digit;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareNonNumeric(String str0, String str1, int[] pos)
|
||||||
|
{
|
||||||
|
// find the end of both non-numeric substrings
|
||||||
|
int start0 = pos[0];
|
||||||
|
int ch0 = str0.codePointAt(pos[0]);
|
||||||
|
pos[0] += Character.charCount(ch0);
|
||||||
|
while (pos[0] < str0.length() && !isDigit(ch0 = str0.codePointAt(pos[0])))
|
||||||
|
{
|
||||||
|
pos[0] += Character.charCount(ch0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int start1 = pos[1];
|
||||||
|
int ch1 = str1.codePointAt(pos[1]);
|
||||||
|
pos[1] += Character.charCount(ch1);
|
||||||
|
while (pos[1] < str1.length() && !isDigit(ch1 = str1.codePointAt(pos[1])))
|
||||||
|
{
|
||||||
|
pos[1] += Character.charCount(ch1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare the substrings
|
||||||
|
return String.CASE_INSENSITIVE_ORDER.compare(str0.substring(start0, pos[0]), str1.substring(start1, pos[1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o)
|
||||||
|
{
|
||||||
|
if (this == o) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (o == null || getClass() != o.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return StringComparators.ALPHANUMERIC_NAME;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static StringComparator makeComparator(String type)
|
||||||
|
{
|
||||||
|
if (type.equals(StringComparators.LEXICOGRAPHIC_NAME)) {
|
||||||
|
return LEXICOGRAPHIC;
|
||||||
|
} else if (type.equals(StringComparators.ALPHANUMERIC_NAME)) {
|
||||||
|
return ALPHANUMERIC;
|
||||||
|
} else {
|
||||||
|
throw new IAE("Unknown string comparator[%s]", type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.metamx.common.StringUtils;
|
import com.metamx.common.StringUtils;
|
||||||
import io.druid.query.aggregation.AggregatorFactory;
|
import io.druid.query.aggregation.AggregatorFactory;
|
||||||
import io.druid.query.aggregation.PostAggregator;
|
import io.druid.query.aggregation.PostAggregator;
|
||||||
|
import io.druid.query.ordering.StringComparators;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
@ -33,157 +34,7 @@ public class AlphaNumericTopNMetricSpec extends LexicographicTopNMetricSpec
|
||||||
{
|
{
|
||||||
private static final byte CACHE_TYPE_ID = 0x2;
|
private static final byte CACHE_TYPE_ID = 0x2;
|
||||||
|
|
||||||
protected static Comparator<String> comparator = new Comparator<String>()
|
protected static Comparator<String> comparator = StringComparators.ALPHANUMERIC;
|
||||||
{
|
|
||||||
// This code is based on https://github.com/amjjd/java-alphanum, see NOTICE file for more information
|
|
||||||
public int compare(String str1, String str2)
|
|
||||||
{
|
|
||||||
int[] pos = {0, 0};
|
|
||||||
|
|
||||||
if (str1 == null) {
|
|
||||||
return -1;
|
|
||||||
} else if (str2 == null) {
|
|
||||||
return 1;
|
|
||||||
} else if (str1.length() == 0) {
|
|
||||||
return str2.length() == 0 ? 0 : -1;
|
|
||||||
} else if (str2.length() == 0) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (pos[0] < str1.length() && pos[1] < str2.length()) {
|
|
||||||
int ch1 = str1.codePointAt(pos[0]);
|
|
||||||
int ch2 = str2.codePointAt(pos[1]);
|
|
||||||
|
|
||||||
int result = 0;
|
|
||||||
|
|
||||||
if (isDigit(ch1)) {
|
|
||||||
result = isDigit(ch2) ? compareNumbers(str1, str2, pos) : -1;
|
|
||||||
} else {
|
|
||||||
result = isDigit(ch2) ? 1 : compareNonNumeric(str1, str2, pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result != 0) {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return str1.length() - str2.length();
|
|
||||||
}
|
|
||||||
|
|
||||||
private int compareNumbers(String str0, String str1, int[] pos)
|
|
||||||
{
|
|
||||||
int delta = 0;
|
|
||||||
int zeroes0 = 0, zeroes1 = 0;
|
|
||||||
int ch0 = -1, ch1 = -1;
|
|
||||||
|
|
||||||
// Skip leading zeroes, but keep a count of them.
|
|
||||||
while (pos[0] < str0.length() && isZero(ch0 = str0.codePointAt(pos[0]))) {
|
|
||||||
zeroes0++;
|
|
||||||
pos[0] += Character.charCount(ch0);
|
|
||||||
}
|
|
||||||
while (pos[1] < str1.length() && isZero(ch1 = str1.codePointAt(pos[1]))) {
|
|
||||||
zeroes1++;
|
|
||||||
pos[1] += Character.charCount(ch1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If one sequence contains more significant digits than the
|
|
||||||
// other, it's a larger number. In case they turn out to have
|
|
||||||
// equal lengths, we compare digits at each position; the first
|
|
||||||
// unequal pair determines which is the bigger number.
|
|
||||||
while (true) {
|
|
||||||
boolean noMoreDigits0 = (ch0 < 0) || !isDigit(ch0);
|
|
||||||
boolean noMoreDigits1 = (ch1 < 0) || !isDigit(ch1);
|
|
||||||
|
|
||||||
if (noMoreDigits0 && noMoreDigits1) {
|
|
||||||
return delta != 0 ? delta : zeroes0 - zeroes1;
|
|
||||||
} else if (noMoreDigits0) {
|
|
||||||
return -1;
|
|
||||||
} else if (noMoreDigits1) {
|
|
||||||
return 1;
|
|
||||||
} else if (delta == 0 && ch0 != ch1) {
|
|
||||||
delta = valueOf(ch0) - valueOf(ch1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pos[0] < str0.length()) {
|
|
||||||
ch0 = str0.codePointAt(pos[0]);
|
|
||||||
if (isDigit(ch0)) {
|
|
||||||
pos[0] += Character.charCount(ch0);
|
|
||||||
} else {
|
|
||||||
ch0 = -1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ch0 = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pos[1] < str1.length()) {
|
|
||||||
ch1 = str1.codePointAt(pos[1]);
|
|
||||||
if (isDigit(ch1)) {
|
|
||||||
pos[1] += Character.charCount(ch1);
|
|
||||||
} else {
|
|
||||||
ch1 = -1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ch1 = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isDigit(int ch)
|
|
||||||
{
|
|
||||||
return (ch >= '0' && ch <= '9') ||
|
|
||||||
(ch >= '\u0660' && ch <= '\u0669') ||
|
|
||||||
(ch >= '\u06F0' && ch <= '\u06F9') ||
|
|
||||||
(ch >= '\u0966' && ch <= '\u096F') ||
|
|
||||||
(ch >= '\uFF10' && ch <= '\uFF19');
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isZero(int ch)
|
|
||||||
{
|
|
||||||
return ch == '0' || ch == '\u0660' || ch == '\u06F0' || ch == '\u0966' || ch == '\uFF10';
|
|
||||||
}
|
|
||||||
|
|
||||||
private int valueOf(int digit)
|
|
||||||
{
|
|
||||||
if (digit <= '9') {
|
|
||||||
return digit - '0';
|
|
||||||
}
|
|
||||||
if (digit <= '\u0669') {
|
|
||||||
return digit - '\u0660';
|
|
||||||
}
|
|
||||||
if (digit <= '\u06F9') {
|
|
||||||
return digit - '\u06F0';
|
|
||||||
}
|
|
||||||
if (digit <= '\u096F') {
|
|
||||||
return digit - '\u0966';
|
|
||||||
}
|
|
||||||
if (digit <= '\uFF19') {
|
|
||||||
return digit - '\uFF10';
|
|
||||||
}
|
|
||||||
|
|
||||||
return digit;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int compareNonNumeric(String str0, String str1, int[] pos)
|
|
||||||
{
|
|
||||||
// find the end of both non-numeric substrings
|
|
||||||
int start0 = pos[0];
|
|
||||||
int ch0 = str0.codePointAt(pos[0]);
|
|
||||||
pos[0] += Character.charCount(ch0);
|
|
||||||
while (pos[0] < str0.length() && !isDigit(ch0 = str0.codePointAt(pos[0]))) {
|
|
||||||
pos[0] += Character.charCount(ch0);
|
|
||||||
}
|
|
||||||
|
|
||||||
int start1 = pos[1];
|
|
||||||
int ch1 = str1.codePointAt(pos[1]);
|
|
||||||
pos[1] += Character.charCount(ch1);
|
|
||||||
while (pos[1] < str1.length() && !isDigit(ch1 = str1.codePointAt(pos[1]))) {
|
|
||||||
pos[1] += Character.charCount(ch1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// compare the substrings
|
|
||||||
return String.CASE_INSENSITIVE_ORDER.compare(str0.substring(start0, pos[0]), str1.substring(start1, pos[1]));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public AlphaNumericTopNMetricSpec(
|
public AlphaNumericTopNMetricSpec(
|
||||||
|
|
|
@ -26,6 +26,8 @@ import com.metamx.common.StringUtils;
|
||||||
import io.druid.query.aggregation.AggregatorFactory;
|
import io.druid.query.aggregation.AggregatorFactory;
|
||||||
import io.druid.query.aggregation.PostAggregator;
|
import io.druid.query.aggregation.PostAggregator;
|
||||||
import io.druid.query.dimension.DimensionSpec;
|
import io.druid.query.dimension.DimensionSpec;
|
||||||
|
import io.druid.query.ordering.StringComparators;
|
||||||
|
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
@ -38,29 +40,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
||||||
{
|
{
|
||||||
private static final byte CACHE_TYPE_ID = 0x1;
|
private static final byte CACHE_TYPE_ID = 0x1;
|
||||||
|
|
||||||
private static Comparator<String> comparator = new Comparator<String>()
|
private static Comparator<String> comparator = StringComparators.LEXICOGRAPHIC;
|
||||||
{
|
|
||||||
@Override
|
|
||||||
public int compare(String s, String s2)
|
|
||||||
{
|
|
||||||
// Avoid conversion to bytes for equal references
|
|
||||||
if(s == s2){
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// null first
|
|
||||||
if (s == null) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (s2 == null) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return UnsignedBytes.lexicographicalComparator().compare(
|
|
||||||
StringUtils.toUtf8(s),
|
|
||||||
StringUtils.toUtf8(s2)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
private final String previousStop;
|
private final String previousStop;
|
||||||
|
|
||||||
|
|
|
@ -79,6 +79,7 @@ import io.druid.query.groupby.having.OrHavingSpec;
|
||||||
import io.druid.query.groupby.orderby.DefaultLimitSpec;
|
import io.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||||
import io.druid.query.groupby.orderby.LimitSpec;
|
import io.druid.query.groupby.orderby.LimitSpec;
|
||||||
import io.druid.query.groupby.orderby.OrderByColumnSpec;
|
import io.druid.query.groupby.orderby.OrderByColumnSpec;
|
||||||
|
import io.druid.query.ordering.StringComparators;
|
||||||
import io.druid.query.spec.MultipleIntervalSegmentSpec;
|
import io.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||||
import io.druid.segment.TestHelper;
|
import io.druid.segment.TestHelper;
|
||||||
import io.druid.segment.column.Column;
|
import io.druid.segment.column.Column;
|
||||||
|
@ -1649,6 +1650,67 @@ public class GroupByQueryRunnerTest
|
||||||
TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
|
TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGroupByWithAlphaNumericDimensionOrder()
|
||||||
|
{
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
map.put("automotive", "health105");
|
||||||
|
map.put("business", "health20");
|
||||||
|
map.put("entertainment", "travel47");
|
||||||
|
map.put("health", "health55");
|
||||||
|
map.put("mezzanine", "health09");
|
||||||
|
map.put("news", "health0000");
|
||||||
|
map.put("premium", "health999");
|
||||||
|
map.put("technology", "travel123");
|
||||||
|
map.put("travel", "travel555");
|
||||||
|
|
||||||
|
GroupByQuery query = GroupByQuery
|
||||||
|
.builder()
|
||||||
|
.setDataSource(QueryRunnerTestHelper.dataSource)
|
||||||
|
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||||
|
.setDimensions(
|
||||||
|
Lists.<DimensionSpec>newArrayList(
|
||||||
|
new ExtractionDimensionSpec(
|
||||||
|
"quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map), false, null, false, false), null
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.setAggregatorSpecs(
|
||||||
|
Arrays.asList(
|
||||||
|
QueryRunnerTestHelper.rowsCount,
|
||||||
|
new LongSumAggregatorFactory("idx", "index")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.setLimitSpec(new DefaultLimitSpec(Lists.<OrderByColumnSpec>newArrayList(
|
||||||
|
new OrderByColumnSpec("alias", null, StringComparators.ALPHANUMERIC)), null))
|
||||||
|
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<Row> expectedResults = Arrays.asList(
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health0000", "rows", 1L, "idx", 121L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health09", "rows", 3L, "idx", 2870L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health20", "rows", 1L, "idx", 118L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health55", "rows", 1L, "idx", 120L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health105", "rows", 1L, "idx", 135L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health999", "rows", 3L, "idx", 2900L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel47", "rows", 1L, "idx", 158L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel123", "rows", 1L, "idx", 78L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel555", "rows", 1L, "idx", 119L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health0000", "rows", 1L, "idx", 114L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health09", "rows", 3L, "idx", 2447L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health20", "rows", 1L, "idx", 112L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health55", "rows", 1L, "idx", 113L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health105", "rows", 1L, "idx", 147L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health999", "rows", 3L, "idx", 2505L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel47", "rows", 1L, "idx", 166L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel123", "rows", 1L, "idx", 97L),
|
||||||
|
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel555", "rows", 1L, "idx", 126L)
|
||||||
|
);
|
||||||
|
|
||||||
|
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||||
|
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||||
|
}
|
||||||
|
|
||||||
@Ignore
|
@Ignore
|
||||||
@Test
|
@Test
|
||||||
// This is a test to verify per limit groupings, but Druid currently does not support this functionality. At a point
|
// This is a test to verify per limit groupings, but Druid currently does not support this functionality. At a point
|
||||||
|
|
|
@ -33,6 +33,8 @@ import io.druid.query.dimension.DefaultDimensionSpec;
|
||||||
import io.druid.query.dimension.DimensionSpec;
|
import io.druid.query.dimension.DimensionSpec;
|
||||||
import io.druid.query.groupby.orderby.DefaultLimitSpec;
|
import io.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||||
import io.druid.query.groupby.orderby.OrderByColumnSpec;
|
import io.druid.query.groupby.orderby.OrderByColumnSpec;
|
||||||
|
import io.druid.query.ordering.StringComparators;
|
||||||
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -61,7 +63,7 @@ public class GroupByQueryTest
|
||||||
.setPostAggregatorSpecs(ImmutableList.<PostAggregator>of(new FieldAccessPostAggregator("x", "idx")))
|
.setPostAggregatorSpecs(ImmutableList.<PostAggregator>of(new FieldAccessPostAggregator("x", "idx")))
|
||||||
.setLimitSpec(
|
.setLimitSpec(
|
||||||
new DefaultLimitSpec(
|
new DefaultLimitSpec(
|
||||||
ImmutableList.of(new OrderByColumnSpec("alias", OrderByColumnSpec.Direction.ASCENDING)),
|
ImmutableList.of(new OrderByColumnSpec("alias", OrderByColumnSpec.Direction.ASCENDING, StringComparators.LEXICOGRAPHIC)),
|
||||||
100
|
100
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -70,6 +72,8 @@ public class GroupByQueryTest
|
||||||
String json = jsonMapper.writeValueAsString(query);
|
String json = jsonMapper.writeValueAsString(query);
|
||||||
Query serdeQuery = jsonMapper.readValue(json, Query.class);
|
Query serdeQuery = jsonMapper.readValue(json, Query.class);
|
||||||
|
|
||||||
|
System.out.println(json);
|
||||||
|
|
||||||
Assert.assertEquals(query, serdeQuery);
|
Assert.assertEquals(query, serdeQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue