Merge pull request #2260 from navis/cardinality-for-searchquery

Support cardinality for search query
This commit is contained in:
Fangjin Yang 2016-03-14 13:24:40 -07:00
commit dbdbacaa18
12 changed files with 380 additions and 163 deletions

View File

@ -39,6 +39,7 @@ There are several main parts to a search query:
|searchDimensions|The dimensions to run the search over. Excluding this means the search is run over all dimensions.|no|
|query|See [SearchQuerySpec](../querying/searchqueryspec.html).|yes|
|sort|An object specifying how the results of the search should be sorted. Two possible types here are "lexicographic" (the default sort) and "strlen".|no|
|computeCount|Include appearance count of each value in result. False by default.|no|
|context|See [Context](../querying/query-context.html)|no|
The format of the result is:

View File

@ -44,6 +44,7 @@ import io.druid.query.search.search.FragmentSearchQuerySpec;
import io.druid.query.search.search.InsensitiveContainsSearchQuerySpec;
import io.druid.query.search.search.SearchQuery;
import io.druid.query.search.search.SearchQuerySpec;
import io.druid.query.search.search.SearchSortSpec;
import io.druid.query.select.PagingSpec;
import io.druid.query.select.SelectQuery;
import io.druid.query.spec.LegacySegmentSpec;
@ -547,6 +548,7 @@ public class Druids
private QuerySegmentSpec querySegmentSpec;
private List<DimensionSpec> dimensions;
private SearchQuerySpec querySpec;
private SearchSortSpec sortSpec;
private Map<String, Object> context;
public SearchQueryBuilder()
@ -571,7 +573,7 @@ public class Druids
querySegmentSpec,
dimensions,
querySpec,
null,
sortSpec,
context
);
}
@ -735,6 +737,12 @@ public class Druids
return fragments(q, false);
}
public SearchQueryBuilder sortSpec(SearchSortSpec sortSpec)
{
this.sortSpec = sortSpec;
return this;
}
public SearchQueryBuilder fragments(List<String> q, boolean caseSensitive)
{
Preconditions.checkNotNull(q, "no value");

View File

@ -21,15 +21,16 @@ package io.druid.query.search;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.metamx.common.guava.nary.BinaryFn;
import io.druid.granularity.AllGranularity;
import io.druid.granularity.QueryGranularity;
import io.druid.query.Result;
import io.druid.query.search.search.SearchHit;
import io.druid.query.search.search.SearchSortSpec;
import org.joda.time.DateTime;
import java.util.TreeSet;
import java.util.Arrays;
import java.util.List;
/**
*/
@ -62,24 +63,53 @@ public class SearchBinaryFn
return arg1;
}
final int limit = gran instanceof AllGranularity ? this.limit : -1;
SearchResultValue arg1Vals = arg1.getValue();
SearchResultValue arg2Vals = arg2.getValue();
TreeSet<SearchHit> results = Sets.newTreeSet(searchSortSpec.getComparator());
results.addAll(Lists.newArrayList(arg1Vals));
results.addAll(Lists.newArrayList(arg2Vals));
Iterable<SearchHit> merged = Iterables.mergeSorted(
Arrays.asList(arg1Vals, arg2Vals),
searchSortSpec.getComparator()
);
return (gran instanceof AllGranularity)
? new Result<SearchResultValue>(
arg1.getTimestamp(), new SearchResultValue(
Lists.newArrayList(
Iterables.limit(results, limit)
)
)
)
: new Result<SearchResultValue>(
gran.toDateTime(gran.truncate(arg1.getTimestamp().getMillis())),
new SearchResultValue(Lists.newArrayList(results))
);
int maxSize = arg1Vals.getValue().size() + arg2Vals.getValue().size();
if (limit > 0) {
maxSize = Math.min(limit, maxSize);
}
List<SearchHit> results = Lists.newArrayListWithExpectedSize(maxSize);
SearchHit prev = null;
for (SearchHit searchHit : merged) {
if (prev == null) {
prev = searchHit;
continue;
}
if (prev.equals(searchHit)) {
if (prev.getCount() != null) {
prev = new SearchHit(
prev.getDimension(),
prev.getValue(),
prev.getCount() + searchHit.getCount()
);
}
} else {
results.add(prev);
prev = searchHit;
if (limit > 0 && results.size() >= limit) {
break;
}
}
}
if (prev != null && (limit < 0 || results.size() < limit)) {
results.add(prev);
}
final DateTime timestamp = gran instanceof AllGranularity
? arg1.getTimestamp()
: gran.toDateTime(gran.truncate(arg1.getTimestamp().getMillis()));
return new Result<SearchResultValue>(timestamp, new SearchResultValue(results));
}
}

View File

@ -157,16 +157,20 @@ public class SearchQueryQueryToolChest extends QueryToolChest<Result<SearchResul
++index;
}
final byte[] sortSpecBytes = query.getSort().getCacheKey();
final ByteBuffer queryCacheKey = ByteBuffer
.allocate(
1 + 4 + granularityBytes.length + filterBytes.length +
querySpecBytes.length + dimensionsBytesSize
querySpecBytes.length + dimensionsBytesSize + sortSpecBytes.length
)
.put(SEARCH_QUERY)
.put(Ints.toByteArray(query.getLimit()))
.put(granularityBytes)
.put(filterBytes)
.put(querySpecBytes);
.put(querySpecBytes)
.put(sortSpecBytes)
;
for (byte[] bytes : dimensionsBytes) {
queryCacheKey.put(bytes);

View File

@ -19,12 +19,12 @@
package io.druid.query.search;
import com.google.common.base.Function;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.common.ISE;
@ -55,11 +55,12 @@ import io.druid.segment.column.BitmapIndex;
import io.druid.segment.column.Column;
import io.druid.segment.data.IndexedInts;
import io.druid.segment.filter.Filters;
import org.apache.commons.lang.mutable.MutableInt;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import java.util.TreeMap;
/**
*/
@ -94,7 +95,7 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
final QueryableIndex index = segment.asQueryableIndex();
if (index != null) {
final TreeSet<SearchHit> retVal = Sets.newTreeSet(query.getSort().getComparator());
final TreeMap<SearchHit, MutableInt> retVal = Maps.newTreeMap(query.getSort().getComparator());
Iterable<DimensionSpec> dimsToSearch;
if (dimensions == null || dimensions.isEmpty()) {
@ -105,13 +106,8 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions();
final ImmutableBitmap baseFilter;
if (filter == null) {
baseFilter = bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), index.getNumRows());
} else {
final ColumnSelectorBitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(bitmapFactory, index);
baseFilter = filter.getBitmapIndex(selector);
}
final ImmutableBitmap baseFilter =
filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index));
for (DimensionSpec dimension : dimsToSearch) {
final Column column = index.getColumn(dimension.getDimension());
@ -127,9 +123,19 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
if (bitmapIndex != null) {
for (int i = 0; i < bitmapIndex.getCardinality(); ++i) {
String dimVal = Strings.nullToEmpty(extractionFn.apply(bitmapIndex.getValue(i)));
if (searchQuerySpec.accept(dimVal) &&
bitmapFactory.intersection(Arrays.asList(baseFilter, bitmapIndex.getBitmap(i))).size() > 0) {
retVal.add(new SearchHit(dimension.getOutputName(), dimVal));
if (!searchQuerySpec.accept(dimVal)) {
continue;
}
ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
if (baseFilter != null) {
bitmap = bitmapFactory.intersection(Arrays.asList(baseFilter, bitmap));
}
if (bitmap.size() > 0) {
MutableInt counter = new MutableInt(bitmap.size());
MutableInt prev = retVal.put(new SearchHit(dimension.getOutputName(), dimVal), counter);
if (prev != null) {
counter.add(prev.intValue());
}
if (retVal.size() >= limit) {
return makeReturnResult(limit, retVal);
}
@ -161,12 +167,12 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
final Sequence<Cursor> cursors = adapter.makeCursors(filter, segment.getDataInterval(), QueryGranularity.ALL, descending);
final TreeSet<SearchHit> retVal = cursors.accumulate(
Sets.newTreeSet(query.getSort().getComparator()),
new Accumulator<TreeSet<SearchHit>, Cursor>()
final TreeMap<SearchHit, MutableInt> retVal = cursors.accumulate(
Maps.<SearchHit, SearchHit, MutableInt>newTreeMap(query.getSort().getComparator()),
new Accumulator<TreeMap<SearchHit, MutableInt>, Cursor>()
{
@Override
public TreeSet<SearchHit> accumulate(TreeSet<SearchHit> set, Cursor cursor)
public TreeMap<SearchHit, MutableInt> accumulate(TreeMap<SearchHit, MutableInt> set, Cursor cursor)
{
if (set.size() >= limit) {
return set;
@ -189,7 +195,11 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
for (int i = 0; i < vals.size(); ++i) {
final String dimVal = selector.lookupName(vals.get(i));
if (searchQuerySpec.accept(dimVal)) {
set.add(new SearchHit(entry.getKey(), dimVal));
MutableInt counter = new MutableInt(1);
MutableInt prev = set.put(new SearchHit(entry.getKey(), dimVal), counter);
if (prev != null) {
counter.add(prev.intValue());
}
if (set.size() >= limit) {
return set;
}
@ -209,14 +219,26 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
return makeReturnResult(limit, retVal);
}
private Sequence<Result<SearchResultValue>> makeReturnResult(int limit, TreeSet<SearchHit> retVal)
private Sequence<Result<SearchResultValue>> makeReturnResult(
int limit, TreeMap<SearchHit, MutableInt> retVal)
{
Iterable<SearchHit> source = Iterables.transform(
retVal.entrySet(), new Function<Map.Entry<SearchHit, MutableInt>, SearchHit>()
{
@Override
public SearchHit apply(Map.Entry<SearchHit, MutableInt> input)
{
SearchHit hit = input.getKey();
return new SearchHit(hit.getDimension(), hit.getValue(), input.getValue().intValue());
}
}
);
return Sequences.simple(
ImmutableList.of(
new Result<SearchResultValue>(
segment.getDataInterval().getStart(),
new SearchResultValue(
Lists.newArrayList(new FunctionalIterable<SearchHit>(retVal).limit(limit))
Lists.newArrayList(new FunctionalIterable<SearchHit>(source).limit(limit))
)
)
)

View File

@ -50,6 +50,12 @@ public class LexicographicSearchSortSpec implements SearchSortSpec
};
}
@Override
public byte[] getCacheKey()
{
return toString().getBytes();
}
public String toString()
{
return "lexicographicSort";

View File

@ -30,15 +30,23 @@ public class SearchHit implements Comparable<SearchHit>
{
private final String dimension;
private final String value;
private final Integer count;
@JsonCreator
public SearchHit(
@JsonProperty("dimension") String dimension,
@JsonProperty("value") String value
@JsonProperty("value") String value,
@JsonProperty("count") Integer count
)
{
this.dimension = checkNotNull(dimension);
this.value = checkNotNull(value);
this.count = count;
}
public SearchHit(String dimension, String value)
{
this(dimension, value, null);
}
@JsonProperty
@ -53,6 +61,12 @@ public class SearchHit implements Comparable<SearchHit>
return value;
}
@JsonProperty
public Integer getCount()
{
return count;
}
@Override
public int compareTo(SearchHit o)
{
@ -99,6 +113,7 @@ public class SearchHit implements Comparable<SearchHit>
return "Hit{" +
"dimension='" + dimension + '\'' +
", value='" + value + '\'' +
(count != null ? ", count='" + count + '\'' : "") +
'}';
}
}

View File

@ -33,5 +33,7 @@ import java.util.Comparator;
})
public interface SearchSortSpec
{
public Comparator<SearchHit> getComparator();
Comparator<SearchHit> getComparator();
byte[] getCacheKey();
}

View File

@ -52,6 +52,12 @@ public class StrlenSearchSortSpec implements SearchSortSpec
};
}
@Override
public byte[] getCacheKey()
{
return toString().getBytes();
}
public String toString()
{
return "stringLengthSort";

View File

@ -102,6 +102,8 @@ public class QueryRunnerTestHelper
public static final String qualityDimension = "quality";
public static final String placementDimension = "placement";
public static final String placementishDimension = "placementish";
public static final String partialNullDimension = "partial_null_column";
public static final List<String> dimensions = Lists.newArrayList(
marketDimension,
qualityDimension,

View File

@ -30,6 +30,8 @@ import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
@ -220,22 +222,25 @@ public class SearchBinaryFnTest
@Test
public void testStrlenMerge()
{
StrlenSearchSortSpec searchSortSpec = new StrlenSearchSortSpec();
Comparator<SearchHit> c = searchSortSpec.getComparator();
Result<SearchResultValue> r1 = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits("blah:thisislong"))
new SearchResultValue(toHits(c, "blah:thisislong"))
);
Result<SearchResultValue> r2 = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits("blah:short"))
new SearchResultValue(toHits(c, "blah:short"))
);
Result<SearchResultValue> expected = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits("blah:short", "blah:thisislong"))
new SearchResultValue(toHits(c, "blah:short", "blah:thisislong"))
);
Result<SearchResultValue> actual = new SearchBinaryFn(new StrlenSearchSortSpec(), QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2);
Result<SearchResultValue> actual = new SearchBinaryFn(searchSortSpec, QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2);
Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp());
assertSearchMergeResult(expected.getValue(), actual.getValue());
}
@ -243,33 +248,37 @@ public class SearchBinaryFnTest
@Test
public void testStrlenMerge2()
{
StrlenSearchSortSpec searchSortSpec = new StrlenSearchSortSpec();
Comparator<SearchHit> c = searchSortSpec.getComparator();
Result<SearchResultValue> r1 = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits("blah:thisislong", "blah:short", "blah2:thisislong"))
new SearchResultValue(toHits(c, "blah:short", "blah:thisislong", "blah2:thisislong"))
);
Result<SearchResultValue> r2 = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits("blah:short", "blah2:thisislong"))
new SearchResultValue(toHits(c, "blah:short", "blah2:thisislong"))
);
Result<SearchResultValue> expected = new Result<SearchResultValue>(
currTime,
new SearchResultValue(toHits("blah:short", "blah:thisislong", "blah2:thisislong"))
new SearchResultValue(toHits(c, "blah:short", "blah:thisislong", "blah2:thisislong"))
);
Result<SearchResultValue> actual = new SearchBinaryFn(new StrlenSearchSortSpec(), QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2);
Result<SearchResultValue> actual = new SearchBinaryFn(searchSortSpec, QueryGranularity.ALL, Integer.MAX_VALUE).apply(r1, r2);
Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp());
System.out.println("[SearchBinaryFnTest/testStrlenMerge2] " + actual.getValue());
assertSearchMergeResult(expected.getValue(), actual.getValue());
}
private List<SearchHit> toHits(String... hits) {
// merge function expects input to be sorted as per comparator
private List<SearchHit> toHits(Comparator<SearchHit> comparator, String... hits) {
List<SearchHit> result = new ArrayList<>();
for (String hit : hits) {
int index = hit.indexOf(':');
result.add(new SearchHit(hit.substring(0, index), hit.substring(index + 1)));
}
Collections.sort(result, comparator);
return result;
}

View File

@ -19,25 +19,34 @@
package io.druid.query.search;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.metamx.common.guava.Sequence;
import com.metamx.common.guava.Sequences;
import com.metamx.common.logger.Logger;
import io.druid.query.Druids;
import io.druid.query.Query;
import io.druid.query.QueryRunner;
import io.druid.query.QueryRunnerTestHelper;
import io.druid.query.Result;
import io.druid.query.dimension.ExtractionDimensionSpec;
import io.druid.query.extraction.LookupExtractionFn;
import io.druid.query.extraction.MapLookupExtractor;
import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.ExtractionDimFilter;
import io.druid.query.filter.RegexDimFilter;
import io.druid.query.filter.SelectorDimFilter;
import io.druid.query.search.search.FragmentSearchQuerySpec;
import io.druid.query.search.search.SearchHit;
import io.druid.query.search.search.SearchQuery;
import io.druid.query.search.search.SearchQueryConfig;
import io.druid.query.search.search.StrlenSearchSortSpec;
import io.druid.query.spec.MultipleIntervalSegmentSpec;
import io.druid.segment.TestHelper;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -45,27 +54,27 @@ import org.junit.runners.Parameterized;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
*/
@RunWith(Parameterized.class)
public class SearchQueryRunnerTest
{
private static final Logger LOG = new Logger(SearchQueryRunnerTest.class);
private static final SearchQueryQueryToolChest toolChest = new SearchQueryQueryToolChest(
new SearchQueryConfig(),
QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()
);
@Parameterized.Parameters
public static Iterable<Object[]> constructorFeeder() throws IOException
{
return QueryRunnerTestHelper.transformToConstructionFeeder(
QueryRunnerTestHelper.makeQueryRunners(
new SearchQueryRunnerFactory(
new SearchQueryQueryToolChest(
new SearchQueryConfig(),
QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()
),
toolChest,
QueryRunnerTestHelper.NOOP_QUERYWATCHER
)
)
@ -81,6 +90,23 @@ public class SearchQueryRunnerTest
this.runner = runner;
}
@Test
public void testSearchHitSerDe() throws Exception
{
for (SearchHit hit : Arrays.asList(new SearchHit("dim1", "val1"), new SearchHit("dim2", "val2", 3))) {
SearchHit read = TestHelper.JSON_MAPPER.readValue(
TestHelper.JSON_MAPPER.writeValueAsString(hit),
SearchHit.class
);
Assert.assertEquals(hit, read);
if (hit.getCount() == null) {
Assert.assertNull(read.getCount());
} else {
Assert.assertEquals(hit.getCount(), read.getCount());
}
}
}
@Test
public void testSearch()
{
@ -91,15 +117,60 @@ public class SearchQueryRunnerTest
.query("a")
.build();
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(
QueryRunnerTestHelper.qualityDimension,
Sets.newHashSet("automotive", "mezzanine", "travel", "health", "entertainment")
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 279));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 186));
checkSearchQuery(searchQuery, expectedHits);
}
@Test
public void testSearchWithCardinality()
{
final SearchQuery searchQuery = Druids.newSearchQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("a")
.build();
// double the value
QueryRunner mergedRunner = toolChest.mergeResults(
new QueryRunner<Result<SearchResultValue>>()
{
@Override
public Sequence<Result<SearchResultValue>> run(
Query<Result<SearchResultValue>> query, Map<String, Object> responseContext
)
{
final Query<Result<SearchResultValue>> query1 = searchQuery.withQuerySegmentSpec(
new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-01-12/2011-02-28")))
);
final Query<Result<SearchResultValue>> query2 = searchQuery.withQuerySegmentSpec(
new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-03-01/2011-04-15")))
);
return Sequences.concat(runner.run(query1, responseContext), runner.run(query2, responseContext));
}
}
);
expectedResults.put(QueryRunnerTestHelper.marketDimension, Sets.newHashSet("total_market"));
expectedResults.put(QueryRunnerTestHelper.placementishDimension, Sets.newHashSet("a"));
expectedResults.put("partial_null_column", Sets.newHashSet("value"));
checkSearchQuery(searchQuery, expectedResults);
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 558));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 372));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 372));
checkSearchQuery(searchQuery, mergedRunner, expectedHits);
}
@Test
@ -118,11 +189,37 @@ public class SearchQueryRunnerTest
.query("e")
.build();
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(QueryRunnerTestHelper.placementDimension, Sets.newHashSet("preferred"));
expectedResults.put(QueryRunnerTestHelper.placementishDimension, Sets.newHashSet("e", "preferred"));
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementDimension, "preferred", 1209));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "e", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "preferred", 1209));
checkSearchQuery(searchQuery, expectedResults);
checkSearchQuery(searchQuery, expectedHits);
}
@Test
public void testSearchSameValueInMultiDims2()
{
SearchQuery searchQuery = Druids.newSearchQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.intervals(QueryRunnerTestHelper.fullOnInterval)
.dimensions(
Arrays.asList(
QueryRunnerTestHelper.placementDimension,
QueryRunnerTestHelper.placementishDimension
)
)
.sortSpec(new StrlenSearchSortSpec())
.query("e")
.build();
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "e", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementDimension, "preferred", 1209));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "preferred", 1209));
checkSearchQuery(searchQuery, expectedHits);
}
@Test
@ -135,23 +232,21 @@ public class SearchQueryRunnerTest
.query(new FragmentSearchQuerySpec(Arrays.asList("auto", "ve")))
.build();
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(QueryRunnerTestHelper.qualityDimension, Sets.newHashSet("automotive"));
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93));
checkSearchQuery(searchQuery, expectedResults);
checkSearchQuery(searchQuery, expectedHits);
}
@Test
public void testSearchWithDimensionQuality()
{
Map<String, Set<String>> expectedResults = new HashMap<String, Set<String>>();
expectedResults.put(
QueryRunnerTestHelper.qualityDimension, new HashSet<String>(
Arrays.asList(
"automotive", "mezzanine", "travel", "health", "entertainment"
)
)
);
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 279));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 93));
checkSearchQuery(
Druids.newSearchQueryBuilder()
@ -161,15 +256,15 @@ public class SearchQueryRunnerTest
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("a")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchWithDimensionProvider()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(QueryRunnerTestHelper.marketDimension, new HashSet<String>(Arrays.asList("total_market")));
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186));
checkSearchQuery(
Druids.newSearchQueryBuilder()
@ -179,28 +274,20 @@ public class SearchQueryRunnerTest
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("a")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchWithDimensionsQualityAndProvider()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.putAll(
ImmutableMap.<String, Set<String>>of(
QueryRunnerTestHelper.qualityDimension,
new HashSet<String>(
Arrays.asList(
"automotive", "mezzanine", "travel", "health", "entertainment"
)
),
QueryRunnerTestHelper.marketDimension,
new HashSet<String>(
Arrays.asList("total_market")
)
)
);
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 279));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 93));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186));
checkSearchQuery(
Druids.newSearchQueryBuilder()
@ -215,15 +302,15 @@ public class SearchQueryRunnerTest
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("a")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchWithDimensionsPlacementAndProvider()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(QueryRunnerTestHelper.marketDimension, new HashSet<String>(Arrays.asList("total_market")));
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186));
checkSearchQuery(
Druids.newSearchQueryBuilder()
@ -238,7 +325,7 @@ public class SearchQueryRunnerTest
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("mark")
.build(),
expectedResults
expectedHits
);
}
@ -247,25 +334,29 @@ public class SearchQueryRunnerTest
public void testSearchWithExtractionFilter1()
{
final String automotiveSnowman = "automotive☃";
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(
QueryRunnerTestHelper.qualityDimension, new HashSet<String>(Arrays.asList(automotiveSnowman))
);
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, automotiveSnowman, 93));
final LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(
new MapLookupExtractor(ImmutableMap.of("automotive", automotiveSnowman), false),
true,
null,
true,
false
new MapLookupExtractor(ImmutableMap.of("automotive", automotiveSnowman), false),
true,
null,
true,
false
);
checkSearchQuery(
Druids.newSearchQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.filters(new ExtractionDimFilter(QueryRunnerTestHelper.qualityDimension, automotiveSnowman, lookupExtractionFn, null))
.filters(
new ExtractionDimFilter(
QueryRunnerTestHelper.qualityDimension,
automotiveSnowman,
lookupExtractionFn,
null
)
)
.intervals(QueryRunnerTestHelper.fullOnInterval)
.dimensions(
new ExtractionDimensionSpec(
@ -277,36 +368,38 @@ public class SearchQueryRunnerTest
)
.query("")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchWithSingleFilter1()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(
QueryRunnerTestHelper.qualityDimension, new HashSet<String>(Arrays.asList("automotive"))
);
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 93));
checkSearchQuery(
Druids.newSearchQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.filters(QueryRunnerTestHelper.qualityDimension, "automotive")
.filters(
new AndDimFilter(
Arrays.<DimFilter>asList(
new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "total_market"),
new SelectorDimFilter(QueryRunnerTestHelper.qualityDimension, "mezzanine"))))
.intervals(QueryRunnerTestHelper.fullOnInterval)
.dimensions(QueryRunnerTestHelper.qualityDimension)
.query("a")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchWithSingleFilter2()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(QueryRunnerTestHelper.marketDimension, new HashSet<String>(Arrays.asList("total_market")));
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 186));
checkSearchQuery(
Druids.newSearchQueryBuilder()
@ -317,15 +410,15 @@ public class SearchQueryRunnerTest
.dimensions(QueryRunnerTestHelper.marketDimension)
.query("a")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchMultiAndFilter()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(QueryRunnerTestHelper.qualityDimension, new HashSet<String>(Arrays.asList("automotive")));
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93));
DimFilter filter = Druids.newAndDimFilterBuilder()
.fields(
@ -351,15 +444,15 @@ public class SearchQueryRunnerTest
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("a")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchWithMultiOrFilter()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
expectedResults.put(QueryRunnerTestHelper.qualityDimension, new HashSet<String>(Arrays.asList("automotive")));
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 93));
DimFilter filter = Druids.newOrDimFilterBuilder()
.fields(
@ -385,14 +478,14 @@ public class SearchQueryRunnerTest
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("a")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchWithEmptyResults()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
List<SearchHit> expectedHits = Lists.newLinkedList();
checkSearchQuery(
Druids.newSearchQueryBuilder()
@ -401,14 +494,14 @@ public class SearchQueryRunnerTest
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("abcd123")
.build(),
expectedResults
expectedHits
);
}
@Test
public void testSearchWithFilterEmptyResults()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
List<SearchHit> expectedHits = Lists.newLinkedList();
DimFilter filter = Druids.newAndDimFilterBuilder()
.fields(
@ -433,7 +526,7 @@ public class SearchQueryRunnerTest
.intervals(QueryRunnerTestHelper.fullOnInterval)
.query("a")
.build(),
expectedResults
expectedHits
);
}
@ -441,7 +534,7 @@ public class SearchQueryRunnerTest
@Test
public void testSearchNonExistingDimension()
{
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
List<SearchHit> expectedHits = Lists.newLinkedList();
checkSearchQuery(
Druids.newSearchQueryBuilder()
@ -451,45 +544,64 @@ public class SearchQueryRunnerTest
.dimensions("does_not_exist")
.query("a")
.build(),
expectedResults
expectedHits
);
}
private void checkSearchQuery(SearchQuery searchQuery, Map<String, Set<String>> expectedResults)
private void checkSearchQuery(Query searchQuery, List<SearchHit> expectedResults)
{
checkSearchQuery(searchQuery, runner, expectedResults);
}
private void checkSearchQuery(Query searchQuery, QueryRunner runner, List<SearchHit> expectedResults)
{
HashMap<String,List> context = new HashMap<String, List>();
Iterable<Result<SearchResultValue>> results = Sequences.toList(
runner.run(searchQuery, context),
runner.run(searchQuery, ImmutableMap.of()),
Lists.<Result<SearchResultValue>>newArrayList()
);
List<SearchHit> copy = ImmutableList.copyOf(expectedResults);
for (Result<SearchResultValue> result : results) {
Assert.assertEquals(new DateTime("2011-01-12T00:00:00.000Z"), result.getTimestamp());
Assert.assertTrue(result.getValue() instanceof Iterable);
Iterable<SearchHit> resultValues = result.getValue();
for (SearchHit resultValue : resultValues) {
String dimension = resultValue.getDimension();
String theValue = resultValue.getValue();
Assert.assertTrue(
String.format("Result had unknown dimension[%s]", dimension),
expectedResults.containsKey(dimension)
);
Set<String> expectedSet = expectedResults.get(dimension);
Assert.assertTrue(
String.format("Couldn't remove dim[%s], value[%s]", dimension, theValue), expectedSet.remove(theValue)
);
int index = expectedResults.indexOf(resultValue);
if (index < 0) {
fail(
copy, results,
"No result found containing " + resultValue.getDimension() + " and " + resultValue.getValue()
);
}
SearchHit expected = expectedResults.remove(index);
if (!resultValue.toString().equals(expected.toString())) {
fail(
copy, results,
"Invalid count for " + resultValue + ".. which was expected to be " + expected.getCount()
);
}
}
}
for (Map.Entry<String, Set<String>> entry : expectedResults.entrySet()) {
Assert.assertTrue(
String.format(
"Dimension[%s] should have had everything removed, still has[%s]", entry.getKey(), entry.getValue()
),
entry.getValue().isEmpty()
);
if (!expectedResults.isEmpty()) {
fail(copy, results, "Some expected results are not shown: " + expectedResults);
}
}
private void fail(
List<SearchHit> expectedResults,
Iterable<Result<SearchResultValue>> results, String errorMsg
)
{
LOG.info("Expected..");
for (SearchHit expected : expectedResults) {
LOG.info(expected.toString());
}
LOG.info("Result..");
for (Result<SearchResultValue> r : results) {
for (SearchHit v : r.getValue()) {
LOG.info(v.toString());
}
}
Assert.fail(errorMsg);
}
}