mirror of https://github.com/apache/druid.git
better handle dim extraction regex cases
This commit is contained in:
parent
8bcf4bc60e
commit
2ee1defce9
|
@ -96,6 +96,6 @@ public class JavascriptDimExtractionFn implements DimExtractionFn
|
|||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,7 +70,7 @@ public class PartialDimExtractionFn implements DimExtractionFn
|
|||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -70,7 +70,7 @@ public class RegexDimExtractionFn implements DimExtractionFn
|
|||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -66,7 +66,7 @@ public class SearchQuerySpecDimExtractionFn implements DimExtractionFn
|
|||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -90,7 +90,7 @@ public class TimeDimExtractionFn implements DimExtractionFn
|
|||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -65,9 +65,11 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
|
|||
query,
|
||||
params.getCardinality()
|
||||
);
|
||||
if (query.getDimensionSpec().preservesOrdering()) {
|
||||
|
||||
if (!query.getDimensionSpec().preservesOrdering()) {
|
||||
return provider.build();
|
||||
}
|
||||
|
||||
return query.getTopNMetricSpec().configureOptimizer(provider).build();
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ package io.druid.query.topn;
|
|||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.metamx.common.guava.Comparators;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
|
@ -63,11 +64,27 @@ public class InvertedTopNMetricSpec implements TopNMetricSpec
|
|||
|
||||
@Override
|
||||
public Comparator getComparator(
|
||||
List<AggregatorFactory> aggregatorSpecs,
|
||||
List<PostAggregator> postAggregatorSpecs
|
||||
final List<AggregatorFactory> aggregatorSpecs,
|
||||
final List<PostAggregator> postAggregatorSpecs
|
||||
)
|
||||
{
|
||||
return Comparators.inverse(delegate.getComparator(aggregatorSpecs, postAggregatorSpecs));
|
||||
return Comparators.inverse(
|
||||
new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o1, Object o2)
|
||||
{
|
||||
// nulls last
|
||||
if (o1 == null) {
|
||||
return 1;
|
||||
}
|
||||
if (o2 == null) {
|
||||
return -1;
|
||||
}
|
||||
return delegate.getComparator(aggregatorSpecs, postAggregatorSpecs).compare(o1, o2);
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -43,6 +43,13 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
|||
@Override
|
||||
public int compare(String s, String s2)
|
||||
{
|
||||
// null first
|
||||
if (s == null) {
|
||||
return -1;
|
||||
}
|
||||
if (s2 == null) {
|
||||
return 1;
|
||||
}
|
||||
return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8));
|
||||
}
|
||||
};
|
||||
|
@ -54,7 +61,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
|||
@JsonProperty("previousStop") String previousStop
|
||||
)
|
||||
{
|
||||
this.previousStop = (previousStop == null) ? "" : previousStop;
|
||||
this.previousStop = previousStop;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -98,7 +105,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
|||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8);
|
||||
byte[] previousStopBytes = previousStop == null ? new byte[]{} : previousStop.getBytes(Charsets.UTF_8);
|
||||
|
||||
return ByteBuffer.allocate(1 + previousStopBytes.length)
|
||||
.put(CACHE_TYPE_ID)
|
||||
|
|
|
@ -1263,6 +1263,58 @@ public class TopNQueryRunnerTest
|
|||
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvertedTopNLexicographicDimExtraction2()
|
||||
{
|
||||
TopNQuery query = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
|
||||
)
|
||||
)
|
||||
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec(null)))
|
||||
.threshold(4)
|
||||
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.build();
|
||||
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<TopNResultValue>(
|
||||
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.<Map<String, Object>>asList(
|
||||
ImmutableMap.<String, Object>of(
|
||||
providerDimension, "t",
|
||||
"rows", 4L,
|
||||
"index", 5351.814697265625D,
|
||||
"addRowsIndexConstant", 5356.814697265625D,
|
||||
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||
),
|
||||
ImmutableMap.<String, Object>of(
|
||||
providerDimension, "o",
|
||||
"rows", 18L,
|
||||
"index", 2231.8768157958984D,
|
||||
"addRowsIndexConstant", 2250.8768157958984D,
|
||||
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||
),
|
||||
ImmutableMap.<String, Object>of(
|
||||
providerDimension, "f",
|
||||
"rows", 4L,
|
||||
"index", 4875.669677734375D,
|
||||
"addRowsIndexConstant", 4880.669677734375D,
|
||||
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNLexicographicDimExtractionWithPreviousStop()
|
||||
{
|
||||
|
@ -1354,6 +1406,51 @@ public class TopNQueryRunnerTest
|
|||
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvertedTopNLexicographicDimExtractionWithPreviousStop2()
|
||||
{
|
||||
TopNQuery query = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(
|
||||
new ExtractionDimensionSpec(
|
||||
providerDimension, providerDimension, new RegexDimExtractionFn("..(.)")
|
||||
)
|
||||
)
|
||||
.metric(new InvertedTopNMetricSpec(new LexicographicTopNMetricSpec("p")))
|
||||
.threshold(4)
|
||||
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.build();
|
||||
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<TopNResultValue>(
|
||||
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.<Map<String, Object>>asList(
|
||||
ImmutableMap.<String, Object>of(
|
||||
providerDimension, "o",
|
||||
"rows", 18L,
|
||||
"index", 2231.8768157958984D,
|
||||
"addRowsIndexConstant", 2250.8768157958984D,
|
||||
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||
),
|
||||
ImmutableMap.<String, Object>of(
|
||||
providerDimension, "f",
|
||||
"rows", 4L,
|
||||
"index", 4875.669677734375D,
|
||||
"addRowsIndexConstant", 4880.669677734375D,
|
||||
"uniques", QueryRunnerTestHelper.UNIQUES_2
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvertedTopNQuery()
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue