diff --git a/docs/content/TopNMetricSpec.md b/docs/content/TopNMetricSpec.md index 672f5d352f6..299f8a358ff 100644 --- a/docs/content/TopNMetricSpec.md +++ b/docs/content/TopNMetricSpec.md @@ -43,3 +43,20 @@ The grammar for dimension values sorted lexicographically is as follows: |--------|-----------|---------| |type|this indicates a lexicographic sort|yes| |previousStop|the starting point of the lexicographic sort. For example, if a previousStop value is 'b', all values before 'b' are discarded. This field can be used to paginate through all the dimension values.|no| + +## AlphaNumeric TopNMetricSpec + +Sort dimension values in alpha-numeric order, i.e treating numbers differently from other characters in sorting the values. +See [http://www.davekoelle.com/alphanum.html](http://www.davekoelle.com/alphanum.html) for details on how the algorithm works. + +```json +"metric": { + "type": "alphaNumeric", + "previousStop": "" +} +``` + +|property|description|required?| +|--------|-----------|---------| +|type|this indicates an alpha-numeric sort|yes| +|previousStop|the starting point of the alpha-numeric sort. For example, if a previousStop value is 'b', all values before 'b' are discarded. This field can be used to paginate through all the dimension values.|no| diff --git a/pom.xml b/pom.xml index 5b61d83682b..de8a4487880 100644 --- a/pom.xml +++ b/pom.xml @@ -406,6 +406,11 @@ httpcore 4.2 + + com.davekoelle + alphanum + 1.0.3 + org.apache.hadoop hadoop-client diff --git a/processing/pom.xml b/processing/pom.xml index b2979cbfea4..838f24ea07a 100644 --- a/processing/pom.xml +++ b/processing/pom.xml @@ -78,6 +78,10 @@ rhino 1.7R4 + + com.davekoelle + alphanum + diff --git a/processing/src/main/java/io/druid/query/topn/AlphaNumericTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/AlphaNumericTopNMetricSpec.java new file mode 100644 index 00000000000..008b8dccd76 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/AlphaNumericTopNMetricSpec.java @@ -0,0 +1,78 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013, 2014 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.davekoelle.alphanum.AlphanumComparator; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Charsets; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; + +import java.nio.ByteBuffer; +import java.util.Comparator; +import java.util.List; + +public class AlphaNumericTopNMetricSpec extends LexicographicTopNMetricSpec +{ + private static final byte CACHE_TYPE_ID = 0x2; + + private final String previousStop; + + @JsonCreator + public AlphaNumericTopNMetricSpec( + @JsonProperty("previousStop") String previousStop + ) + { + super(previousStop); + this.previousStop = (previousStop == null) ? "" : previousStop; + } + + @Override + public Comparator getComparator(List aggregatorSpecs, List postAggregatorSpecs) + { + return new AlphanumComparator(); + } + + @Override + public byte[] getCacheKey() + { + byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8); + + return ByteBuffer.allocate(1 + previousStopBytes.length) + .put(CACHE_TYPE_ID) + .put(previousStopBytes) + .array(); + } + + @Override + public TopNMetricSpecBuilder configureOptimizer(TopNMetricSpecBuilder builder) + { + return builder; + } + + @Override + public String toString() + { + return "AlphaNumericTopNMetricSpec{" + + "previousStop='" + previousStop + '\'' + + '}'; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java index 267f2f278dd..6e934e32dd6 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java @@ -35,6 +35,7 @@ import java.util.List; @JsonSubTypes(value = { @JsonSubTypes.Type(name = "numeric", value = NumericTopNMetricSpec.class), @JsonSubTypes.Type(name = "lexicographic", value = LexicographicTopNMetricSpec.class), + @JsonSubTypes.Type(name = "alphaNumeric", value = AlphaNumericTopNMetricSpec.class), @JsonSubTypes.Type(name = "inverted", value = InvertedTopNMetricSpec.class) }) public interface TopNMetricSpec