Interval bug fix for search query (#2903)

* support query granularity and interval for search query

* skip unncessary bitmap calculation when query interval contains whole the data interval of the given segments.

* use binary search to find start and end index for the given interval

* fix based on comment

* bug fix based on the review comments and add unit tests
This commit is contained in:
Keuntae Park 2016-09-01 00:22:44 +09:00 committed by Nishant
parent 48dce88aab
commit 0076b5fc1a
3 changed files with 100 additions and 12 deletions

View File

@ -27,13 +27,14 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.metamx.collections.bitmap.BitmapFactory; import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.common.IAE;
import com.metamx.common.ISE; import com.metamx.common.ISE;
import com.metamx.common.guava.Accumulator; import com.metamx.common.guava.Accumulator;
import com.metamx.common.guava.FunctionalIterable; import com.metamx.common.guava.FunctionalIterable;
import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequence;
import com.metamx.common.guava.Sequences; import com.metamx.common.guava.Sequences;
import com.metamx.emitter.EmittingLogger; import com.metamx.emitter.EmittingLogger;
import io.druid.granularity.QueryGranularities;
import io.druid.query.Druids; import io.druid.query.Druids;
import io.druid.query.Query; import io.druid.query.Query;
import io.druid.query.QueryRunner; import io.druid.query.QueryRunner;
@ -53,9 +54,11 @@ import io.druid.segment.Segment;
import io.druid.segment.StorageAdapter; import io.druid.segment.StorageAdapter;
import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.BitmapIndex;
import io.druid.segment.column.Column; import io.druid.segment.column.Column;
import io.druid.segment.column.GenericColumn;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
import io.druid.segment.filter.Filters; import io.druid.segment.filter.Filters;
import org.apache.commons.lang.mutable.MutableInt; import org.apache.commons.lang.mutable.MutableInt;
import org.joda.time.Interval;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -90,6 +93,11 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
final SearchQuerySpec searchQuerySpec = query.getQuery(); final SearchQuerySpec searchQuerySpec = query.getQuery();
final int limit = query.getLimit(); final int limit = query.getLimit();
final boolean descending = query.isDescending(); final boolean descending = query.isDescending();
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
if (intervals.size() != 1) {
throw new IAE("Should only have one interval, got[%s]", intervals);
}
final Interval interval = intervals.get(0);
// Closing this will cause segfaults in unit tests. // Closing this will cause segfaults in unit tests.
final QueryableIndex index = segment.asQueryableIndex(); final QueryableIndex index = segment.asQueryableIndex();
@ -109,6 +117,26 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
final ImmutableBitmap baseFilter = final ImmutableBitmap baseFilter =
filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index)); filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index));
ImmutableBitmap timeFilteredBitmap;
if (!interval.contains(segment.getDataInterval())) {
MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap();
final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME);
final GenericColumn timeValues = timeColumn.getGenericColumn();
int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true));
int endIndex = Math.min(timeValues.length() - 1, getStartIndexOfTime(timeValues, interval.getEndMillis(), false));
for (int i = startIndex; i <= endIndex; i++) {
timeBitmap.add(i);
}
final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap);
timeFilteredBitmap =
(baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter);
} else {
timeFilteredBitmap = baseFilter;
}
for (DimensionSpec dimension : dimsToSearch) { for (DimensionSpec dimension : dimsToSearch) {
final Column column = index.getColumn(dimension.getDimension()); final Column column = index.getColumn(dimension.getDimension());
if (column == null) { if (column == null) {
@ -127,8 +155,8 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
continue; continue;
} }
ImmutableBitmap bitmap = bitmapIndex.getBitmap(i); ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
if (baseFilter != null) { if (timeFilteredBitmap != null) {
bitmap = bitmapFactory.intersection(Arrays.asList(baseFilter, bitmap)); bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap));
} }
if (bitmap.size() > 0) { if (bitmap.size() > 0) {
MutableInt counter = new MutableInt(bitmap.size()); MutableInt counter = new MutableInt(bitmap.size());
@ -165,7 +193,7 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
dimsToSearch = dimensions; dimsToSearch = dimensions;
} }
final Sequence<Cursor> cursors = adapter.makeCursors(filter, segment.getDataInterval(), QueryGranularities.ALL, descending); final Sequence<Cursor> cursors = adapter.makeCursors(filter, interval, query.getGranularity(), descending);
final TreeMap<SearchHit, MutableInt> retVal = cursors.accumulate( final TreeMap<SearchHit, MutableInt> retVal = cursors.accumulate(
Maps.<SearchHit, SearchHit, MutableInt>newTreeMap(query.getSort().getComparator()), Maps.<SearchHit, SearchHit, MutableInt>newTreeMap(query.getSort().getComparator()),
@ -219,6 +247,36 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
return makeReturnResult(limit, retVal); return makeReturnResult(limit, retVal);
} }
protected int getStartIndexOfTime(GenericColumn timeValues, long time, boolean inclusive)
{
int low = 0;
int high = timeValues.length() - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
long midVal = timeValues.getLongSingleValueRow(mid);
if (midVal < time)
low = mid + 1;
else if (midVal > time)
high = mid - 1;
else { // key found
int i;
// rewind the index of the same time values
for (i = mid - 1; i >= 0; i--) {
long prev = timeValues.getLongSingleValueRow(i);
if (time != prev) {
break;
}
}
return inclusive ? i + 1 : i;
}
}
// key not found.
// return insert index
return inclusive ? low : low - 1;
}
private Sequence<Result<SearchResultValue>> makeReturnResult( private Sequence<Result<SearchResultValue>> makeReturnResult(
int limit, TreeMap<SearchHit, MutableInt> retVal) int limit, TreeMap<SearchHit, MutableInt> retVal)
{ {

View File

@ -163,14 +163,14 @@ public class SearchQueryRunnerTest
); );
List<SearchHit> expectedHits = Lists.newLinkedList(); List<SearchHit> expectedHits = Lists.newLinkedList();
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 186)); expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 558)); expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 273));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 186)); expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 186)); expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 186)); expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 372)); expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 182));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 186)); expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 91));
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 372)); expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 182));
checkSearchQuery(searchQuery, mergedRunner, expectedHits); checkSearchQuery(searchQuery, mergedRunner, expectedHits);
} }

View File

@ -19,6 +19,7 @@
package io.druid.query.search; package io.druid.query.search;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
@ -36,6 +37,7 @@ import io.druid.segment.QueryableIndexSegment;
import io.druid.segment.TestIndex; import io.druid.segment.TestIndex;
import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.incremental.IncrementalIndex;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.joda.time.Interval;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
@ -155,6 +157,34 @@ public class SearchQueryRunnerWithCaseTest
checkSearchQuery(searchQuery, expectedResults); checkSearchQuery(searchQuery, expectedResults);
} }
@Test
public void testSearchIntervals()
{
SearchQuery searchQuery;
Druids.SearchQueryBuilder builder = testBuilder()
.dimensions(Arrays.asList(qualityDimension))
.intervals("2011-01-12T00:00:00.000Z/2011-01-13T00:00:00.000Z");
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
searchQuery = builder.query("otive").build();
expectedResults.put(qualityDimension, Sets.newHashSet("AutoMotive"));
checkSearchQuery(searchQuery, expectedResults);
}
@Test
public void testSearchNoOverrappingIntervals()
{
SearchQuery searchQuery;
Druids.SearchQueryBuilder builder = testBuilder()
.dimensions(Arrays.asList(qualityDimension))
.intervals("2011-01-10T00:00:00.000Z/2011-01-11T00:00:00.000Z");
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
searchQuery = builder.query("business").build();
expectedResults.put(qualityDimension, Sets.<String>newHashSet());
checkSearchQuery(searchQuery, expectedResults);
}
@Test @Test
public void testFragmentSearch() public void testFragmentSearch()
{ {