Interval bug fix for search query (#2903)

* support query granularity and interval for search query

* skip unncessary bitmap calculation when query interval contains whole the data interval of the given segments.

* use binary search to find start and end index for the given interval

* fix based on comment

* bug fix based on the review comments and add unit tests
This commit is contained in:
Keuntae Park 2016-09-01 00:22:44 +09:00 committed by Nishant
parent 48dce88aab
commit 0076b5fc1a
3 changed files with 100 additions and 12 deletions

View File

@ -27,13 +27,14 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.common.IAE;
import com.metamx.common.ISE;
import com.metamx.common.guava.Accumulator;
import com.metamx.common.guava.FunctionalIterable;
import com.metamx.common.guava.Sequence;
import com.metamx.common.guava.Sequences;
import com.metamx.emitter.EmittingLogger;
import io.druid.granularity.QueryGranularities;
import io.druid.query.Druids;
import io.druid.query.Query;
import io.druid.query.QueryRunner;
@ -53,9 +54,11 @@ import io.druid.segment.Segment;
import io.druid.segment.StorageAdapter;
import io.druid.segment.column.BitmapIndex;
import io.druid.segment.column.Column;
import io.druid.segment.column.GenericColumn;
import io.druid.segment.data.IndexedInts;
import io.druid.segment.filter.Filters;
import org.apache.commons.lang.mutable.MutableInt;
import org.joda.time.Interval;
import java.util.Arrays;
import java.util.List;
@ -90,6 +93,11 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
final SearchQuerySpec searchQuerySpec = query.getQuery();
final int limit = query.getLimit();
final boolean descending = query.isDescending();
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
if (intervals.size() != 1) {
throw new IAE("Should only have one interval, got[%s]", intervals);
}
final Interval interval = intervals.get(0);
// Closing this will cause segfaults in unit tests.
final QueryableIndex index = segment.asQueryableIndex();
@ -109,6 +117,26 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
final ImmutableBitmap baseFilter =
filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index));
ImmutableBitmap timeFilteredBitmap;
if (!interval.contains(segment.getDataInterval())) {
MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap();
final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME);
final GenericColumn timeValues = timeColumn.getGenericColumn();
int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true));
int endIndex = Math.min(timeValues.length() - 1, getStartIndexOfTime(timeValues, interval.getEndMillis(), false));
for (int i = startIndex; i <= endIndex; i++) {
timeBitmap.add(i);
}
final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap);
timeFilteredBitmap =
(baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter);
} else {
timeFilteredBitmap = baseFilter;
}
for (DimensionSpec dimension : dimsToSearch) {
final Column column = index.getColumn(dimension.getDimension());
if (column == null) {
@ -127,8 +155,8 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
continue;
}
ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
if (baseFilter != null) {
bitmap = bitmapFactory.intersection(Arrays.asList(baseFilter, bitmap));
if (timeFilteredBitmap != null) {
bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap));
}
if (bitmap.size() > 0) {
MutableInt counter = new MutableInt(bitmap.size());
@ -165,7 +193,7 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
dimsToSearch = dimensions;
}
final Sequence<Cursor> cursors = adapter.makeCursors(filter, segment.getDataInterval(), QueryGranularities.ALL, descending);
final Sequence<Cursor> cursors = adapter.makeCursors(filter, interval, query.getGranularity(), descending);
final TreeMap<SearchHit, MutableInt> retVal = cursors.accumulate(
Maps.<SearchHit, SearchHit, MutableInt>newTreeMap(query.getSort().getComparator()),
@ -219,6 +247,36 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
return makeReturnResult(limit, retVal);
}
protected int getStartIndexOfTime(GenericColumn timeValues, long time, boolean inclusive)
{
int low = 0;
int high = timeValues.length() - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
long midVal = timeValues.getLongSingleValueRow(mid);
if (midVal < time)
low = mid + 1;
else if (midVal > time)
high = mid - 1;
else { // key found
int i;
// rewind the index of the same time values
for (i = mid - 1; i >= 0; i--) {
long prev = timeValues.getLongSingleValueRow(i);
if (time != prev) {
break;
}
}
return inclusive ? i + 1 : i;
}
}
// key not found.
// return insert index
return inclusive ? low : low - 1;
}
private Sequence<Result<SearchResultValue>> makeReturnResult(
int limit, TreeMap<SearchHit, MutableInt> retVal)
{

View File

@ -163,14 +163,14 @@ public class SearchQueryRunnerTest
);
List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 558));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 372));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 186));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 372));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 273));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 182));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 182));
checkSearchQuery(searchQuery, mergedRunner, expectedHits);
}

View File

@ -19,6 +19,7 @@
package io.druid.query.search;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
@ -36,6 +37,7 @@ import io.druid.segment.QueryableIndexSegment;
import io.druid.segment.TestIndex;
import io.druid.segment.incremental.IncrementalIndex;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -155,6 +157,34 @@ public class SearchQueryRunnerWithCaseTest
checkSearchQuery(searchQuery, expectedResults);
}
@Test
public void testSearchIntervals()
{
SearchQuery searchQuery;
Druids.SearchQueryBuilder builder = testBuilder()
.dimensions(Arrays.asList(qualityDimension))
.intervals("2011-01-12T00:00:00.000Z/2011-01-13T00:00:00.000Z");
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
searchQuery = builder.query("otive").build();
expectedResults.put(qualityDimension, Sets.newHashSet("AutoMotive"));
checkSearchQuery(searchQuery, expectedResults);
}
@Test
public void testSearchNoOverrappingIntervals()
{
SearchQuery searchQuery;
Druids.SearchQueryBuilder builder = testBuilder()
.dimensions(Arrays.asList(qualityDimension))
.intervals("2011-01-10T00:00:00.000Z/2011-01-11T00:00:00.000Z");
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
searchQuery = builder.query("business").build();
expectedResults.put(qualityDimension, Sets.<String>newHashSet());
checkSearchQuery(searchQuery, expectedResults);
}
@Test
public void testFragmentSearch()
{