From be341bf4e31427bccc5a62f2ea87760bd873b90f Mon Sep 17 00:00:00 2001 From: "navis.ryu" Date: Wed, 13 Jan 2016 21:43:22 +0900 Subject: [PATCH] Support cardinality for search query (Fix for #2260) --- docs/content/querying/searchquery.md | 1 + .../src/main/java/io/druid/query/Druids.java | 10 +- .../io/druid/query/search/SearchBinaryFn.java | 64 +++- .../search/SearchQueryQueryToolChest.java | 8 +- .../druid/query/search/SearchQueryRunner.java | 62 ++-- .../search/LexicographicSearchSortSpec.java | 6 + .../druid/query/search/search/SearchHit.java | 17 +- .../query/search/search/SearchSortSpec.java | 4 +- .../search/search/StrlenSearchSortSpec.java | 6 + .../io/druid/query/QueryRunnerTestHelper.java | 2 + .../query/search/SearchBinaryFnTest.java | 29 +- .../query/search/SearchQueryRunnerTest.java | 334 ++++++++++++------ 12 files changed, 380 insertions(+), 163 deletions(-) diff --git a/docs/content/querying/searchquery.md b/docs/content/querying/searchquery.md index c11243d7483..e7d84d1f556 100644 --- a/docs/content/querying/searchquery.md +++ b/docs/content/querying/searchquery.md @@ -39,6 +39,7 @@ There are several main parts to a search query: |searchDimensions|The dimensions to run the search over. Excluding this means the search is run over all dimensions.|no| |query|See [SearchQuerySpec](../querying/searchqueryspec.html).|yes| |sort|An object specifying how the results of the search should be sorted. Two possible types here are "lexicographic" (the default sort) and "strlen".|no| +|computeCount|Include appearance count of each value in result. False by default.|no| |context|See [Context](../querying/query-context.html)|no| The format of the result is: diff --git a/processing/src/main/java/io/druid/query/Druids.java b/processing/src/main/java/io/druid/query/Druids.java index fe6c096160e..920c8da8635 100644 --- a/processing/src/main/java/io/druid/query/Druids.java +++ b/processing/src/main/java/io/druid/query/Druids.java @@ -44,6 +44,7 @@ import io.druid.query.search.search.FragmentSearchQuerySpec; import io.druid.query.search.search.InsensitiveContainsSearchQuerySpec; import io.druid.query.search.search.SearchQuery; import io.druid.query.search.search.SearchQuerySpec; +import io.druid.query.search.search.SearchSortSpec; import io.druid.query.select.PagingSpec; import io.druid.query.select.SelectQuery; import io.druid.query.spec.LegacySegmentSpec; @@ -547,6 +548,7 @@ public class Druids private QuerySegmentSpec querySegmentSpec; private List dimensions; private SearchQuerySpec querySpec; + private SearchSortSpec sortSpec; private Map context; public SearchQueryBuilder() @@ -571,7 +573,7 @@ public class Druids querySegmentSpec, dimensions, querySpec, - null, + sortSpec, context ); } @@ -735,6 +737,12 @@ public class Druids return fragments(q, false); } + public SearchQueryBuilder sortSpec(SearchSortSpec sortSpec) + { + this.sortSpec = sortSpec; + return this; + } + public SearchQueryBuilder fragments(List q, boolean caseSensitive) { Preconditions.checkNotNull(q, "no value"); diff --git a/processing/src/main/java/io/druid/query/search/SearchBinaryFn.java b/processing/src/main/java/io/druid/query/search/SearchBinaryFn.java index 1e3ae2c4357..beca47bbeed 100644 --- a/processing/src/main/java/io/druid/query/search/SearchBinaryFn.java +++ b/processing/src/main/java/io/druid/query/search/SearchBinaryFn.java @@ -21,15 +21,16 @@ package io.druid.query.search; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import com.metamx.common.guava.nary.BinaryFn; import io.druid.granularity.AllGranularity; import io.druid.granularity.QueryGranularity; import io.druid.query.Result; import io.druid.query.search.search.SearchHit; import io.druid.query.search.search.SearchSortSpec; +import org.joda.time.DateTime; -import java.util.TreeSet; +import java.util.Arrays; +import java.util.List; /** */ @@ -62,24 +63,53 @@ public class SearchBinaryFn return arg1; } + final int limit = gran instanceof AllGranularity ? this.limit : -1; + SearchResultValue arg1Vals = arg1.getValue(); SearchResultValue arg2Vals = arg2.getValue(); - TreeSet results = Sets.newTreeSet(searchSortSpec.getComparator()); - results.addAll(Lists.newArrayList(arg1Vals)); - results.addAll(Lists.newArrayList(arg2Vals)); + Iterable merged = Iterables.mergeSorted( + Arrays.asList(arg1Vals, arg2Vals), + searchSortSpec.getComparator() + ); - return (gran instanceof AllGranularity) - ? new Result( - arg1.getTimestamp(), new SearchResultValue( - Lists.newArrayList( - Iterables.limit(results, limit) - ) - ) - ) - : new Result( - gran.toDateTime(gran.truncate(arg1.getTimestamp().getMillis())), - new SearchResultValue(Lists.newArrayList(results)) - ); + int maxSize = arg1Vals.getValue().size() + arg2Vals.getValue().size(); + if (limit > 0) { + maxSize = Math.min(limit, maxSize); + } + List results = Lists.newArrayListWithExpectedSize(maxSize); + + SearchHit prev = null; + for (SearchHit searchHit : merged) { + if (prev == null) { + prev = searchHit; + continue; + } + if (prev.equals(searchHit)) { + if (prev.getCount() != null) { + prev = new SearchHit( + prev.getDimension(), + prev.getValue(), + prev.getCount() + searchHit.getCount() + ); + } + } else { + results.add(prev); + prev = searchHit; + if (limit > 0 && results.size() >= limit) { + break; + } + } + } + + if (prev != null && (limit < 0 || results.size() < limit)) { + results.add(prev); + } + + final DateTime timestamp = gran instanceof AllGranularity + ? arg1.getTimestamp() + : gran.toDateTime(gran.truncate(arg1.getTimestamp().getMillis())); + + return new Result(timestamp, new SearchResultValue(results)); } } diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/search/SearchQueryQueryToolChest.java index 86a86a3a1bf..a196c085232 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryQueryToolChest.java @@ -157,16 +157,20 @@ public class SearchQueryQueryToolChest extends QueryToolChest> final QueryableIndex index = segment.asQueryableIndex(); if (index != null) { - final TreeSet retVal = Sets.newTreeSet(query.getSort().getComparator()); + final TreeMap retVal = Maps.newTreeMap(query.getSort().getComparator()); Iterable dimsToSearch; if (dimensions == null || dimensions.isEmpty()) { @@ -105,13 +106,8 @@ public class SearchQueryRunner implements QueryRunner> final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions(); - final ImmutableBitmap baseFilter; - if (filter == null) { - baseFilter = bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), index.getNumRows()); - } else { - final ColumnSelectorBitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(bitmapFactory, index); - baseFilter = filter.getBitmapIndex(selector); - } + final ImmutableBitmap baseFilter = + filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index)); for (DimensionSpec dimension : dimsToSearch) { final Column column = index.getColumn(dimension.getDimension()); @@ -127,9 +123,19 @@ public class SearchQueryRunner implements QueryRunner> if (bitmapIndex != null) { for (int i = 0; i < bitmapIndex.getCardinality(); ++i) { String dimVal = Strings.nullToEmpty(extractionFn.apply(bitmapIndex.getValue(i))); - if (searchQuerySpec.accept(dimVal) && - bitmapFactory.intersection(Arrays.asList(baseFilter, bitmapIndex.getBitmap(i))).size() > 0) { - retVal.add(new SearchHit(dimension.getOutputName(), dimVal)); + if (!searchQuerySpec.accept(dimVal)) { + continue; + } + ImmutableBitmap bitmap = bitmapIndex.getBitmap(i); + if (baseFilter != null) { + bitmap = bitmapFactory.intersection(Arrays.asList(baseFilter, bitmap)); + } + if (bitmap.size() > 0) { + MutableInt counter = new MutableInt(bitmap.size()); + MutableInt prev = retVal.put(new SearchHit(dimension.getOutputName(), dimVal), counter); + if (prev != null) { + counter.add(prev.intValue()); + } if (retVal.size() >= limit) { return makeReturnResult(limit, retVal); } @@ -161,12 +167,12 @@ public class SearchQueryRunner implements QueryRunner> final Sequence cursors = adapter.makeCursors(filter, segment.getDataInterval(), QueryGranularity.ALL, descending); - final TreeSet retVal = cursors.accumulate( - Sets.newTreeSet(query.getSort().getComparator()), - new Accumulator, Cursor>() + final TreeMap retVal = cursors.accumulate( + Maps.newTreeMap(query.getSort().getComparator()), + new Accumulator, Cursor>() { @Override - public TreeSet accumulate(TreeSet set, Cursor cursor) + public TreeMap accumulate(TreeMap set, Cursor cursor) { if (set.size() >= limit) { return set; @@ -189,7 +195,11 @@ public class SearchQueryRunner implements QueryRunner> for (int i = 0; i < vals.size(); ++i) { final String dimVal = selector.lookupName(vals.get(i)); if (searchQuerySpec.accept(dimVal)) { - set.add(new SearchHit(entry.getKey(), dimVal)); + MutableInt counter = new MutableInt(1); + MutableInt prev = set.put(new SearchHit(entry.getKey(), dimVal), counter); + if (prev != null) { + counter.add(prev.intValue()); + } if (set.size() >= limit) { return set; } @@ -209,14 +219,26 @@ public class SearchQueryRunner implements QueryRunner> return makeReturnResult(limit, retVal); } - private Sequence> makeReturnResult(int limit, TreeSet retVal) + private Sequence> makeReturnResult( + int limit, TreeMap retVal) { + Iterable source = Iterables.transform( + retVal.entrySet(), new Function, SearchHit>() + { + @Override + public SearchHit apply(Map.Entry input) + { + SearchHit hit = input.getKey(); + return new SearchHit(hit.getDimension(), hit.getValue(), input.getValue().intValue()); + } + } + ); return Sequences.simple( ImmutableList.of( new Result( segment.getDataInterval().getStart(), new SearchResultValue( - Lists.newArrayList(new FunctionalIterable(retVal).limit(limit)) + Lists.newArrayList(new FunctionalIterable(source).limit(limit)) ) ) ) diff --git a/processing/src/main/java/io/druid/query/search/search/LexicographicSearchSortSpec.java b/processing/src/main/java/io/druid/query/search/search/LexicographicSearchSortSpec.java index bde4bddbc95..7fa6b3aecf9 100644 --- a/processing/src/main/java/io/druid/query/search/search/LexicographicSearchSortSpec.java +++ b/processing/src/main/java/io/druid/query/search/search/LexicographicSearchSortSpec.java @@ -50,6 +50,12 @@ public class LexicographicSearchSortSpec implements SearchSortSpec }; } + @Override + public byte[] getCacheKey() + { + return toString().getBytes(); + } + public String toString() { return "lexicographicSort"; diff --git a/processing/src/main/java/io/druid/query/search/search/SearchHit.java b/processing/src/main/java/io/druid/query/search/search/SearchHit.java index 4904c6dd107..faf4c18b9a2 100644 --- a/processing/src/main/java/io/druid/query/search/search/SearchHit.java +++ b/processing/src/main/java/io/druid/query/search/search/SearchHit.java @@ -30,15 +30,23 @@ public class SearchHit implements Comparable { private final String dimension; private final String value; + private final Integer count; @JsonCreator public SearchHit( @JsonProperty("dimension") String dimension, - @JsonProperty("value") String value + @JsonProperty("value") String value, + @JsonProperty("count") Integer count ) { this.dimension = checkNotNull(dimension); this.value = checkNotNull(value); + this.count = count; + } + + public SearchHit(String dimension, String value) + { + this(dimension, value, null); } @JsonProperty @@ -53,6 +61,12 @@ public class SearchHit implements Comparable return value; } + @JsonProperty + public Integer getCount() + { + return count; + } + @Override public int compareTo(SearchHit o) { @@ -99,6 +113,7 @@ public class SearchHit implements Comparable return "Hit{" + "dimension='" + dimension + '\'' + ", value='" + value + '\'' + + (count != null ? ", count='" + count + '\'' : "") + '}'; } } diff --git a/processing/src/main/java/io/druid/query/search/search/SearchSortSpec.java b/processing/src/main/java/io/druid/query/search/search/SearchSortSpec.java index c23fc1cf5d6..c5f426c177c 100644 --- a/processing/src/main/java/io/druid/query/search/search/SearchSortSpec.java +++ b/processing/src/main/java/io/druid/query/search/search/SearchSortSpec.java @@ -33,5 +33,7 @@ import java.util.Comparator; }) public interface SearchSortSpec { - public Comparator getComparator(); + Comparator getComparator(); + + byte[] getCacheKey(); } diff --git a/processing/src/main/java/io/druid/query/search/search/StrlenSearchSortSpec.java b/processing/src/main/java/io/druid/query/search/search/StrlenSearchSortSpec.java index 84d132a6f7c..4126b46cabf 100644 --- a/processing/src/main/java/io/druid/query/search/search/StrlenSearchSortSpec.java +++ b/processing/src/main/java/io/druid/query/search/search/StrlenSearchSortSpec.java @@ -52,6 +52,12 @@ public class StrlenSearchSortSpec implements SearchSortSpec }; } + @Override + public byte[] getCacheKey() + { + return toString().getBytes(); + } + public String toString() { return "stringLengthSort"; diff --git a/processing/src/test/java/io/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/io/druid/query/QueryRunnerTestHelper.java index 8f1d70f49fa..53f0d2c7a9d 100644 --- a/processing/src/test/java/io/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/io/druid/query/QueryRunnerTestHelper.java @@ -102,6 +102,8 @@ public class QueryRunnerTestHelper public static final String qualityDimension = "quality"; public static final String placementDimension = "placement"; public static final String placementishDimension = "placementish"; + public static final String partialNullDimension = "partial_null_column"; + public static final List dimensions = Lists.newArrayList( marketDimension, qualityDimension, diff --git a/processing/src/test/java/io/druid/query/search/SearchBinaryFnTest.java b/processing/src/test/java/io/druid/query/search/SearchBinaryFnTest.java index bb8d262c532..605bf763a00 100644 --- a/processing/src/test/java/io/druid/query/search/SearchBinaryFnTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchBinaryFnTest.java @@ -30,6 +30,8 @@ import org.junit.Assert; import org.junit.Test; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; import java.util.Iterator; import java.util.List; @@ -220,22 +222,25 @@ public class SearchBinaryFnTest @Test public void testStrlenMerge() { + StrlenSearchSortSpec searchSortSpec = new StrlenSearchSortSpec(); + Comparator c = searchSortSpec.getComparator(); + Result r1 = new Result( currTime, - new SearchResultValue(toHits("blah:thisislong")) + new SearchResultValue(toHits(c, "blah:thisislong")) ); Result r2 = new Result( currTime, - new SearchResultValue(toHits("blah:short")) + new SearchResultValue(toHits(c, "blah:short")) ); Result expected = new Result( currTime, - new SearchResultValue(toHits("blah:short", "blah:thisislong")) + new SearchResultValue(toHits(c, "blah:short", "blah:thisislong")) ); - Result actual = new SearchBinaryFn(new StrlenSearchSortSpec(), QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2); + Result actual = new SearchBinaryFn(searchSortSpec, QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2); Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp()); assertSearchMergeResult(expected.getValue(), actual.getValue()); } @@ -243,33 +248,37 @@ public class SearchBinaryFnTest @Test public void testStrlenMerge2() { + StrlenSearchSortSpec searchSortSpec = new StrlenSearchSortSpec(); + Comparator c = searchSortSpec.getComparator(); + Result r1 = new Result( currTime, - new SearchResultValue(toHits("blah:thisislong", "blah:short", "blah2:thisislong")) + new SearchResultValue(toHits(c, "blah:short", "blah:thisislong", "blah2:thisislong")) ); Result r2 = new Result( currTime, - new SearchResultValue(toHits("blah:short", "blah2:thisislong")) + new SearchResultValue(toHits(c, "blah:short", "blah2:thisislong")) ); Result expected = new Result( currTime, - new SearchResultValue(toHits("blah:short", "blah:thisislong", "blah2:thisislong")) + new SearchResultValue(toHits(c, "blah:short", "blah:thisislong", "blah2:thisislong")) ); - Result actual = new SearchBinaryFn(new StrlenSearchSortSpec(), QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2); + Result actual = new SearchBinaryFn(searchSortSpec, QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2); Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp()); - System.out.println("[SearchBinaryFnTest/testStrlenMerge2] " + actual.getValue()); assertSearchMergeResult(expected.getValue(), actual.getValue()); } - private List toHits(String... hits) { + // merge function expects input to be sorted as per comparator + private List toHits(Comparator comparator, String... hits) { List result = new ArrayList<>(); for (String hit : hits) { int index = hit.indexOf(':'); result.add(new SearchHit(hit.substring(0, index), hit.substring(index + 1))); } + Collections.sort(result, comparator); return result; } diff --git a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java index 23da1561577..78af49e4723 100644 --- a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java @@ -19,25 +19,34 @@ package io.druid.query.search; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; +import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; +import com.metamx.common.logger.Logger; import io.druid.query.Druids; +import io.druid.query.Query; import io.druid.query.QueryRunner; import io.druid.query.QueryRunnerTestHelper; import io.druid.query.Result; import io.druid.query.dimension.ExtractionDimensionSpec; import io.druid.query.extraction.LookupExtractionFn; import io.druid.query.extraction.MapLookupExtractor; +import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.ExtractionDimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SelectorDimFilter; import io.druid.query.search.search.FragmentSearchQuerySpec; import io.druid.query.search.search.SearchHit; import io.druid.query.search.search.SearchQuery; import io.druid.query.search.search.SearchQueryConfig; +import io.druid.query.search.search.StrlenSearchSortSpec; +import io.druid.query.spec.MultipleIntervalSegmentSpec; +import io.druid.segment.TestHelper; import org.joda.time.DateTime; +import org.joda.time.Interval; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; @@ -45,27 +54,27 @@ import org.junit.runners.Parameterized; import java.io.IOException; import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; /** */ @RunWith(Parameterized.class) public class SearchQueryRunnerTest { + private static final Logger LOG = new Logger(SearchQueryRunnerTest.class); + private static final SearchQueryQueryToolChest toolChest = new SearchQueryQueryToolChest( + new SearchQueryConfig(), + QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator() + ); + @Parameterized.Parameters public static Iterable constructorFeeder() throws IOException { return QueryRunnerTestHelper.transformToConstructionFeeder( QueryRunnerTestHelper.makeQueryRunners( new SearchQueryRunnerFactory( - new SearchQueryQueryToolChest( - new SearchQueryConfig(), - QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator() - ), + toolChest, QueryRunnerTestHelper.NOOP_QUERYWATCHER ) ) @@ -81,6 +90,23 @@ public class SearchQueryRunnerTest this.runner = runner; } + @Test + public void testSearchHitSerDe() throws Exception + { + for (SearchHit hit : Arrays.asList(new SearchHit("dim1", "val1"), new SearchHit("dim2", "val2", 3))) { + SearchHit read = TestHelper.JSON_MAPPER.readValue( + TestHelper.JSON_MAPPER.writeValueAsString(hit), + SearchHit.class + ); + Assert.assertEquals(hit, read); + if (hit.getCount() == null) { + Assert.assertNull(read.getCount()); + } else { + Assert.assertEquals(hit.getCount(), read.getCount()); + } + } + } + @Test public void testSearch() { @@ -91,15 +117,60 @@ public class SearchQueryRunnerTest .query("a") .build(); - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put( - QueryRunnerTestHelper.qualityDimension, - Sets.newHashSet("automotive", "mezzanine", "travel", "health", "entertainment") + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 279)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 186)); + + checkSearchQuery(searchQuery, expectedHits); + } + + @Test + public void testSearchWithCardinality() + { + final SearchQuery searchQuery = Druids.newSearchQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .intervals(QueryRunnerTestHelper.fullOnInterval) + .query("a") + .build(); + + // double the value + QueryRunner mergedRunner = toolChest.mergeResults( + new QueryRunner>() + { + @Override + public Sequence> run( + Query> query, Map responseContext + ) + { + final Query> query1 = searchQuery.withQuerySegmentSpec( + new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-01-12/2011-02-28"))) + ); + final Query> query2 = searchQuery.withQuerySegmentSpec( + new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-03-01/2011-04-15"))) + ); + return Sequences.concat(runner.run(query1, responseContext), runner.run(query2, responseContext)); + } + } ); - expectedResults.put(QueryRunnerTestHelper.marketDimension, Sets.newHashSet("total_market")); - expectedResults.put(QueryRunnerTestHelper.placementishDimension, Sets.newHashSet("a")); - expectedResults.put("partial_null_column", Sets.newHashSet("value")); - checkSearchQuery(searchQuery, expectedResults); + + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 186)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 558)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 186)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 186)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 186)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 372)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 186)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 372)); + + checkSearchQuery(searchQuery, mergedRunner, expectedHits); } @Test @@ -118,11 +189,37 @@ public class SearchQueryRunnerTest .query("e") .build(); - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put(QueryRunnerTestHelper.placementDimension, Sets.newHashSet("preferred")); - expectedResults.put(QueryRunnerTestHelper.placementishDimension, Sets.newHashSet("e", "preferred")); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementDimension, "preferred", 1209)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "e", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "preferred", 1209)); - checkSearchQuery(searchQuery, expectedResults); + checkSearchQuery(searchQuery, expectedHits); + } + + @Test + public void testSearchSameValueInMultiDims2() + { + SearchQuery searchQuery = Druids.newSearchQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .intervals(QueryRunnerTestHelper.fullOnInterval) + .dimensions( + Arrays.asList( + QueryRunnerTestHelper.placementDimension, + QueryRunnerTestHelper.placementishDimension + ) + ) + .sortSpec(new StrlenSearchSortSpec()) + .query("e") + .build(); + + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "e", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementDimension, "preferred", 1209)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "preferred", 1209)); + + checkSearchQuery(searchQuery, expectedHits); } @Test @@ -135,23 +232,21 @@ public class SearchQueryRunnerTest .query(new FragmentSearchQuerySpec(Arrays.asList("auto", "ve"))) .build(); - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put(QueryRunnerTestHelper.qualityDimension, Sets.newHashSet("automotive")); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93)); - checkSearchQuery(searchQuery, expectedResults); + checkSearchQuery(searchQuery, expectedHits); } @Test public void testSearchWithDimensionQuality() { - Map> expectedResults = new HashMap>(); - expectedResults.put( - QueryRunnerTestHelper.qualityDimension, new HashSet( - Arrays.asList( - "automotive", "mezzanine", "travel", "health", "entertainment" - ) - ) - ); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 279)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 93)); checkSearchQuery( Druids.newSearchQueryBuilder() @@ -161,15 +256,15 @@ public class SearchQueryRunnerTest .intervals(QueryRunnerTestHelper.fullOnInterval) .query("a") .build(), - expectedResults + expectedHits ); } @Test public void testSearchWithDimensionProvider() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put(QueryRunnerTestHelper.marketDimension, new HashSet(Arrays.asList("total_market"))); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186)); checkSearchQuery( Druids.newSearchQueryBuilder() @@ -179,28 +274,20 @@ public class SearchQueryRunnerTest .intervals(QueryRunnerTestHelper.fullOnInterval) .query("a") .build(), - expectedResults + expectedHits ); } @Test public void testSearchWithDimensionsQualityAndProvider() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.putAll( - ImmutableMap.>of( - QueryRunnerTestHelper.qualityDimension, - new HashSet( - Arrays.asList( - "automotive", "mezzanine", "travel", "health", "entertainment" - ) - ), - QueryRunnerTestHelper.marketDimension, - new HashSet( - Arrays.asList("total_market") - ) - ) - ); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 279)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 93)); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186)); checkSearchQuery( Druids.newSearchQueryBuilder() @@ -215,15 +302,15 @@ public class SearchQueryRunnerTest .intervals(QueryRunnerTestHelper.fullOnInterval) .query("a") .build(), - expectedResults + expectedHits ); } @Test public void testSearchWithDimensionsPlacementAndProvider() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put(QueryRunnerTestHelper.marketDimension, new HashSet(Arrays.asList("total_market"))); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186)); checkSearchQuery( Druids.newSearchQueryBuilder() @@ -238,7 +325,7 @@ public class SearchQueryRunnerTest .intervals(QueryRunnerTestHelper.fullOnInterval) .query("mark") .build(), - expectedResults + expectedHits ); } @@ -247,25 +334,29 @@ public class SearchQueryRunnerTest public void testSearchWithExtractionFilter1() { final String automotiveSnowman = "automotive☃"; - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put( - QueryRunnerTestHelper.qualityDimension, new HashSet(Arrays.asList(automotiveSnowman)) - ); - + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, automotiveSnowman, 93)); final LookupExtractionFn lookupExtractionFn = new LookupExtractionFn( - new MapLookupExtractor(ImmutableMap.of("automotive", automotiveSnowman), false), - true, - null, - true, - false + new MapLookupExtractor(ImmutableMap.of("automotive", automotiveSnowman), false), + true, + null, + true, + false ); checkSearchQuery( Druids.newSearchQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) - .filters(new ExtractionDimFilter(QueryRunnerTestHelper.qualityDimension, automotiveSnowman, lookupExtractionFn, null)) + .filters( + new ExtractionDimFilter( + QueryRunnerTestHelper.qualityDimension, + automotiveSnowman, + lookupExtractionFn, + null + ) + ) .intervals(QueryRunnerTestHelper.fullOnInterval) .dimensions( new ExtractionDimensionSpec( @@ -277,36 +368,38 @@ public class SearchQueryRunnerTest ) .query("☃") .build(), - expectedResults + expectedHits ); } @Test public void testSearchWithSingleFilter1() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put( - QueryRunnerTestHelper.qualityDimension, new HashSet(Arrays.asList("automotive")) - ); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 93)); checkSearchQuery( Druids.newSearchQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.allGran) - .filters(QueryRunnerTestHelper.qualityDimension, "automotive") + .filters( + new AndDimFilter( + Arrays.asList( + new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "total_market"), + new SelectorDimFilter(QueryRunnerTestHelper.qualityDimension, "mezzanine")))) .intervals(QueryRunnerTestHelper.fullOnInterval) .dimensions(QueryRunnerTestHelper.qualityDimension) .query("a") .build(), - expectedResults + expectedHits ); } @Test public void testSearchWithSingleFilter2() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put(QueryRunnerTestHelper.marketDimension, new HashSet(Arrays.asList("total_market"))); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186)); checkSearchQuery( Druids.newSearchQueryBuilder() @@ -317,15 +410,15 @@ public class SearchQueryRunnerTest .dimensions(QueryRunnerTestHelper.marketDimension) .query("a") .build(), - expectedResults + expectedHits ); } @Test public void testSearchMultiAndFilter() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put(QueryRunnerTestHelper.qualityDimension, new HashSet(Arrays.asList("automotive"))); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93)); DimFilter filter = Druids.newAndDimFilterBuilder() .fields( @@ -351,15 +444,15 @@ public class SearchQueryRunnerTest .intervals(QueryRunnerTestHelper.fullOnInterval) .query("a") .build(), - expectedResults + expectedHits ); } @Test public void testSearchWithMultiOrFilter() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - expectedResults.put(QueryRunnerTestHelper.qualityDimension, new HashSet(Arrays.asList("automotive"))); + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93)); DimFilter filter = Druids.newOrDimFilterBuilder() .fields( @@ -385,14 +478,14 @@ public class SearchQueryRunnerTest .intervals(QueryRunnerTestHelper.fullOnInterval) .query("a") .build(), - expectedResults + expectedHits ); } @Test public void testSearchWithEmptyResults() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + List expectedHits = Lists.newLinkedList(); checkSearchQuery( Druids.newSearchQueryBuilder() @@ -401,14 +494,14 @@ public class SearchQueryRunnerTest .intervals(QueryRunnerTestHelper.fullOnInterval) .query("abcd123") .build(), - expectedResults + expectedHits ); } @Test public void testSearchWithFilterEmptyResults() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + List expectedHits = Lists.newLinkedList(); DimFilter filter = Druids.newAndDimFilterBuilder() .fields( @@ -433,7 +526,7 @@ public class SearchQueryRunnerTest .intervals(QueryRunnerTestHelper.fullOnInterval) .query("a") .build(), - expectedResults + expectedHits ); } @@ -441,7 +534,7 @@ public class SearchQueryRunnerTest @Test public void testSearchNonExistingDimension() { - Map> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + List expectedHits = Lists.newLinkedList(); checkSearchQuery( Druids.newSearchQueryBuilder() @@ -451,45 +544,64 @@ public class SearchQueryRunnerTest .dimensions("does_not_exist") .query("a") .build(), - expectedResults + expectedHits ); } - private void checkSearchQuery(SearchQuery searchQuery, Map> expectedResults) + private void checkSearchQuery(Query searchQuery, List expectedResults) + { + checkSearchQuery(searchQuery, runner, expectedResults); + } + + private void checkSearchQuery(Query searchQuery, QueryRunner runner, List expectedResults) { - HashMap context = new HashMap(); Iterable> results = Sequences.toList( - runner.run(searchQuery, context), + runner.run(searchQuery, ImmutableMap.of()), Lists.>newArrayList() ); - + List copy = ImmutableList.copyOf(expectedResults); for (Result result : results) { Assert.assertEquals(new DateTime("2011-01-12T00:00:00.000Z"), result.getTimestamp()); Assert.assertTrue(result.getValue() instanceof Iterable); Iterable resultValues = result.getValue(); for (SearchHit resultValue : resultValues) { - String dimension = resultValue.getDimension(); - String theValue = resultValue.getValue(); - Assert.assertTrue( - String.format("Result had unknown dimension[%s]", dimension), - expectedResults.containsKey(dimension) - ); - - Set expectedSet = expectedResults.get(dimension); - Assert.assertTrue( - String.format("Couldn't remove dim[%s], value[%s]", dimension, theValue), expectedSet.remove(theValue) - ); + int index = expectedResults.indexOf(resultValue); + if (index < 0) { + fail( + copy, results, + "No result found containing " + resultValue.getDimension() + " and " + resultValue.getValue() + ); + } + SearchHit expected = expectedResults.remove(index); + if (!resultValue.toString().equals(expected.toString())) { + fail( + copy, results, + "Invalid count for " + resultValue + ".. which was expected to be " + expected.getCount() + ); + } } } - - for (Map.Entry> entry : expectedResults.entrySet()) { - Assert.assertTrue( - String.format( - "Dimension[%s] should have had everything removed, still has[%s]", entry.getKey(), entry.getValue() - ), - entry.getValue().isEmpty() - ); + if (!expectedResults.isEmpty()) { + fail(copy, results, "Some expected results are not shown: " + expectedResults); } } + + private void fail( + List expectedResults, + Iterable> results, String errorMsg + ) + { + LOG.info("Expected.."); + for (SearchHit expected : expectedResults) { + LOG.info(expected.toString()); + } + LOG.info("Result.."); + for (Result r : results) { + for (SearchHit v : r.getValue()) { + LOG.info(v.toString()); + } + } + Assert.fail(errorMsg); + } }