mirror of https://github.com/apache/druid.git
Interval bug fix for search query (#2903)
* support query granularity and interval for search query * skip unncessary bitmap calculation when query interval contains whole the data interval of the given segments. * use binary search to find start and end index for the given interval * fix based on comment * bug fix based on the review comments and add unit tests
This commit is contained in:
parent
48dce88aab
commit
0076b5fc1a
|
@ -27,13 +27,14 @@ import com.google.common.collect.Lists;
|
|||
import com.google.common.collect.Maps;
|
||||
import com.metamx.collections.bitmap.BitmapFactory;
|
||||
import com.metamx.collections.bitmap.ImmutableBitmap;
|
||||
import com.metamx.collections.bitmap.MutableBitmap;
|
||||
import com.metamx.common.IAE;
|
||||
import com.metamx.common.ISE;
|
||||
import com.metamx.common.guava.Accumulator;
|
||||
import com.metamx.common.guava.FunctionalIterable;
|
||||
import com.metamx.common.guava.Sequence;
|
||||
import com.metamx.common.guava.Sequences;
|
||||
import com.metamx.emitter.EmittingLogger;
|
||||
import io.druid.granularity.QueryGranularities;
|
||||
import io.druid.query.Druids;
|
||||
import io.druid.query.Query;
|
||||
import io.druid.query.QueryRunner;
|
||||
|
@ -53,9 +54,11 @@ import io.druid.segment.Segment;
|
|||
import io.druid.segment.StorageAdapter;
|
||||
import io.druid.segment.column.BitmapIndex;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.column.GenericColumn;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
import io.druid.segment.filter.Filters;
|
||||
import org.apache.commons.lang.mutable.MutableInt;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -90,6 +93,11 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
|||
final SearchQuerySpec searchQuerySpec = query.getQuery();
|
||||
final int limit = query.getLimit();
|
||||
final boolean descending = query.isDescending();
|
||||
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
|
||||
if (intervals.size() != 1) {
|
||||
throw new IAE("Should only have one interval, got[%s]", intervals);
|
||||
}
|
||||
final Interval interval = intervals.get(0);
|
||||
|
||||
// Closing this will cause segfaults in unit tests.
|
||||
final QueryableIndex index = segment.asQueryableIndex();
|
||||
|
@ -109,6 +117,26 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
|||
final ImmutableBitmap baseFilter =
|
||||
filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index));
|
||||
|
||||
ImmutableBitmap timeFilteredBitmap;
|
||||
if (!interval.contains(segment.getDataInterval())) {
|
||||
MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap();
|
||||
final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME);
|
||||
final GenericColumn timeValues = timeColumn.getGenericColumn();
|
||||
|
||||
int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true));
|
||||
int endIndex = Math.min(timeValues.length() - 1, getStartIndexOfTime(timeValues, interval.getEndMillis(), false));
|
||||
|
||||
for (int i = startIndex; i <= endIndex; i++) {
|
||||
timeBitmap.add(i);
|
||||
}
|
||||
|
||||
final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap);
|
||||
timeFilteredBitmap =
|
||||
(baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter);
|
||||
} else {
|
||||
timeFilteredBitmap = baseFilter;
|
||||
}
|
||||
|
||||
for (DimensionSpec dimension : dimsToSearch) {
|
||||
final Column column = index.getColumn(dimension.getDimension());
|
||||
if (column == null) {
|
||||
|
@ -127,8 +155,8 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
|||
continue;
|
||||
}
|
||||
ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
|
||||
if (baseFilter != null) {
|
||||
bitmap = bitmapFactory.intersection(Arrays.asList(baseFilter, bitmap));
|
||||
if (timeFilteredBitmap != null) {
|
||||
bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap));
|
||||
}
|
||||
if (bitmap.size() > 0) {
|
||||
MutableInt counter = new MutableInt(bitmap.size());
|
||||
|
@ -165,7 +193,7 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
|||
dimsToSearch = dimensions;
|
||||
}
|
||||
|
||||
final Sequence<Cursor> cursors = adapter.makeCursors(filter, segment.getDataInterval(), QueryGranularities.ALL, descending);
|
||||
final Sequence<Cursor> cursors = adapter.makeCursors(filter, interval, query.getGranularity(), descending);
|
||||
|
||||
final TreeMap<SearchHit, MutableInt> retVal = cursors.accumulate(
|
||||
Maps.<SearchHit, SearchHit, MutableInt>newTreeMap(query.getSort().getComparator()),
|
||||
|
@ -219,6 +247,36 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
|||
return makeReturnResult(limit, retVal);
|
||||
}
|
||||
|
||||
protected int getStartIndexOfTime(GenericColumn timeValues, long time, boolean inclusive)
|
||||
{
|
||||
int low = 0;
|
||||
int high = timeValues.length() - 1;
|
||||
|
||||
while (low <= high) {
|
||||
int mid = (low + high) >>> 1;
|
||||
long midVal = timeValues.getLongSingleValueRow(mid);
|
||||
|
||||
if (midVal < time)
|
||||
low = mid + 1;
|
||||
else if (midVal > time)
|
||||
high = mid - 1;
|
||||
else { // key found
|
||||
int i;
|
||||
// rewind the index of the same time values
|
||||
for (i = mid - 1; i >= 0; i--) {
|
||||
long prev = timeValues.getLongSingleValueRow(i);
|
||||
if (time != prev) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return inclusive ? i + 1 : i;
|
||||
}
|
||||
}
|
||||
// key not found.
|
||||
// return insert index
|
||||
return inclusive ? low : low - 1;
|
||||
}
|
||||
|
||||
private Sequence<Result<SearchResultValue>> makeReturnResult(
|
||||
int limit, TreeMap<SearchHit, MutableInt> retVal)
|
||||
{
|
||||
|
|
|
@ -163,14 +163,14 @@ public class SearchQueryRunnerTest
|
|||
);
|
||||
|
||||
List<SearchHit> expectedHits = Lists.newLinkedList();
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 186));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 558));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 186));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 186));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 186));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 372));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 186));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 372));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 91));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 273));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 91));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 91));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 91));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 182));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 91));
|
||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 182));
|
||||
|
||||
checkSearchQuery(searchQuery, mergedRunner, expectedHits);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package io.druid.query.search;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
@ -36,6 +37,7 @@ import io.druid.segment.QueryableIndexSegment;
|
|||
import io.druid.segment.TestIndex;
|
||||
import io.druid.segment.incremental.IncrementalIndex;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.Interval;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
@ -155,6 +157,34 @@ public class SearchQueryRunnerWithCaseTest
|
|||
checkSearchQuery(searchQuery, expectedResults);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSearchIntervals()
|
||||
{
|
||||
SearchQuery searchQuery;
|
||||
Druids.SearchQueryBuilder builder = testBuilder()
|
||||
.dimensions(Arrays.asList(qualityDimension))
|
||||
.intervals("2011-01-12T00:00:00.000Z/2011-01-13T00:00:00.000Z");
|
||||
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
|
||||
|
||||
searchQuery = builder.query("otive").build();
|
||||
expectedResults.put(qualityDimension, Sets.newHashSet("AutoMotive"));
|
||||
checkSearchQuery(searchQuery, expectedResults);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSearchNoOverrappingIntervals()
|
||||
{
|
||||
SearchQuery searchQuery;
|
||||
Druids.SearchQueryBuilder builder = testBuilder()
|
||||
.dimensions(Arrays.asList(qualityDimension))
|
||||
.intervals("2011-01-10T00:00:00.000Z/2011-01-11T00:00:00.000Z");
|
||||
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
|
||||
|
||||
searchQuery = builder.query("business").build();
|
||||
expectedResults.put(qualityDimension, Sets.<String>newHashSet());
|
||||
checkSearchQuery(searchQuery, expectedResults);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFragmentSearch()
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue