Merge pull request #516 from metamx/alphanumeric-topn

Alpha-numeric sorting for topN
This commit is contained in:
fjy 2014-05-05 17:09:24 -06:00
commit 8af927e2a7
5 changed files with 105 additions and 0 deletions

View File

@ -43,3 +43,20 @@ The grammar for dimension values sorted lexicographically is as follows:
|--------|-----------|---------|
|type|this indicates a lexicographic sort|yes|
|previousStop|the starting point of the lexicographic sort. For example, if a previousStop value is 'b', all values before 'b' are discarded. This field can be used to paginate through all the dimension values.|no|
## AlphaNumeric TopNMetricSpec
Sort dimension values in alpha-numeric order, i.e treating numbers differently from other characters in sorting the values.
See [http://www.davekoelle.com/alphanum.html](http://www.davekoelle.com/alphanum.html) for details on how the algorithm works.
```json
"metric": {
"type": "alphaNumeric",
"previousStop": "<previousStop_value>"
}
```
|property|description|required?|
|--------|-----------|---------|
|type|this indicates an alpha-numeric sort|yes|
|previousStop|the starting point of the alpha-numeric sort. For example, if a previousStop value is 'b', all values before 'b' are discarded. This field can be used to paginate through all the dimension values.|no|

View File

@ -406,6 +406,11 @@
<artifactId>httpcore</artifactId>
<version>4.2</version>
</dependency>
<dependency>
<groupId>com.davekoelle</groupId>
<artifactId>alphanum</artifactId>
<version>1.0.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>

View File

@ -78,6 +78,10 @@
<artifactId>rhino</artifactId>
<version>1.7R4</version>
</dependency>
<dependency>
<groupId>com.davekoelle</groupId>
<artifactId>alphanum</artifactId>
</dependency>
<!-- Tests -->

View File

@ -0,0 +1,78 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013, 2014 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.query.topn;
import com.davekoelle.alphanum.AlphanumComparator;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Charsets;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
import java.nio.ByteBuffer;
import java.util.Comparator;
import java.util.List;
public class AlphaNumericTopNMetricSpec extends LexicographicTopNMetricSpec
{
private static final byte CACHE_TYPE_ID = 0x2;
private final String previousStop;
@JsonCreator
public AlphaNumericTopNMetricSpec(
@JsonProperty("previousStop") String previousStop
)
{
super(previousStop);
this.previousStop = (previousStop == null) ? "" : previousStop;
}
@Override
public Comparator getComparator(List<AggregatorFactory> aggregatorSpecs, List<PostAggregator> postAggregatorSpecs)
{
return new AlphanumComparator();
}
@Override
public byte[] getCacheKey()
{
byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8);
return ByteBuffer.allocate(1 + previousStopBytes.length)
.put(CACHE_TYPE_ID)
.put(previousStopBytes)
.array();
}
@Override
public <T> TopNMetricSpecBuilder<T> configureOptimizer(TopNMetricSpecBuilder<T> builder)
{
return builder;
}
@Override
public String toString()
{
return "AlphaNumericTopNMetricSpec{" +
"previousStop='" + previousStop + '\'' +
'}';
}
}

View File

@ -35,6 +35,7 @@ import java.util.List;
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = "numeric", value = NumericTopNMetricSpec.class),
@JsonSubTypes.Type(name = "lexicographic", value = LexicographicTopNMetricSpec.class),
@JsonSubTypes.Type(name = "alphaNumeric", value = AlphaNumericTopNMetricSpec.class),
@JsonSubTypes.Type(name = "inverted", value = InvertedTopNMetricSpec.class)
})
public interface TopNMetricSpec