better handle dim extraction regex cases

This commit is contained in:
fjy 2014-05-28 10:27:00 -07:00
parent 8bcf4bc60e
commit 2ee1defce9
9 changed files with 134 additions and 11 deletions

View File

@ -96,6 +96,6 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
@Override
public boolean preservesOrdering()
{
return true;
return false;
}
}

View File

@ -70,7 +70,7 @@ public class PartialDimExtractionFn implements DimExtractionFn
@Override
public boolean preservesOrdering()
{
return false;
return true;
}
@Override

View File

@ -70,7 +70,7 @@ public class RegexDimExtractionFn implements DimExtractionFn
@Override
public boolean preservesOrdering()
{
return true;
return false;
}
@Override

View File

@ -66,7 +66,7 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn
@Override
public boolean preservesOrdering()
{
return false;
return true;
}
@Override

View File

@ -90,7 +90,7 @@ public class TimeDimExtractionFn implements DimExtractionFn
@Override
public boolean preservesOrdering()
{
return true;
return false;
}
@Override

View File

@ -65,9 +65,11 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
query,
params.getCardinality()
);
if (query.getDimensionSpec().preservesOrdering()) {
if (!query.getDimensionSpec().preservesOrdering()) {
return provider.build();
}
return query.getTopNMetricSpec().configureOptimizer(provider).build();
}

View File

@ -21,6 +21,7 @@ package io.druid.query.topn;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Ordering;
import com.metamx.common.guava.Comparators;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
@ -63,11 +64,27 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
@Override
public Comparator getComparator(
List<AggregatorFactory> aggregatorSpecs,
List<PostAggregator> postAggregatorSpecs
final List<AggregatorFactory> aggregatorSpecs,
final List<PostAggregator> postAggregatorSpecs
)
{
return Comparators.inverse(delegate.getComparator(aggregatorSpecs, postAggregatorSpecs));
return Comparators.inverse(
new Comparator()
{
@Override
public int compare(Object o1, Object o2)
{
// nulls last
if (o1 == null) {
return 1;
}
if (o2 == null) {
return -1;
}
return delegate.getComparator(aggregatorSpecs, postAggregatorSpecs).compare(o1, o2);
}
}
);
}
@Override

View File

@ -43,6 +43,13 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@Override
public int compare(String s, String s2)
{
// null first
if (s == null) {
return -1;
}
if (s2 == null) {
return 1;
}
return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8));
}
};
@ -54,7 +61,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@JsonProperty("previousStop") String previousStop
)
{
this.previousStop = (previousStop == null) ? "" : previousStop;
this.previousStop = previousStop;
}
@Override
@ -98,7 +105,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@Override
public byte[] getCacheKey()
{
byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8);
byte[] previousStopBytes = previousStop == null ? new byte[]{} : previousStop.getBytes(Charsets.UTF_8);
return ByteBuffer.allocate(1 + previousStopBytes.length)
.put(CACHE_TYPE_ID)

View File

@ -1263,6 +1263,58 @@ public class TopNQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNLexicographicDimExtraction2()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
)
)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null)))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "t",
"rows", 4L,
"index", 5351.814697265625D,
"addRowsIndexConstant", 5356.814697265625D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
),
ImmutableMap.<String, Object>of(
providerDimension, "o",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
),
ImmutableMap.<String, Object>of(
providerDimension, "f",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testTopNLexicographicDimExtractionWithPreviousStop()
{
@ -1354,6 +1406,51 @@ public class TopNQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNLexicographicDimExtractionWithPreviousStop2()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(
new ExtractionDimensionSpec(
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
)
)
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p")))
.threshold(4)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<TopNResultValue>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
providerDimension, "o",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
),
ImmutableMap.<String, Object>of(
providerDimension, "f",
"rows", 4L,
"index", 4875.669677734375D,
"addRowsIndexConstant", 4880.669677734375D,
"uniques", QueryRunnerTestHelper.UNIQUES_2
)
)
)
)
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
}
@Test
public void testInvertedTopNQuery()
{