mirror of https://github.com/apache/druid.git
Interval bug fix for search query (#2903)
* support query granularity and interval for search query * skip unncessary bitmap calculation when query interval contains whole the data interval of the given segments. * use binary search to find start and end index for the given interval * fix based on comment * bug fix based on the review comments and add unit tests
This commit is contained in:
parent
48dce88aab
commit
0076b5fc1a
|
@ -27,13 +27,14 @@ import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import com.metamx.collections.bitmap.BitmapFactory;
|
import com.metamx.collections.bitmap.BitmapFactory;
|
||||||
import com.metamx.collections.bitmap.ImmutableBitmap;
|
import com.metamx.collections.bitmap.ImmutableBitmap;
|
||||||
|
import com.metamx.collections.bitmap.MutableBitmap;
|
||||||
|
import com.metamx.common.IAE;
|
||||||
import com.metamx.common.ISE;
|
import com.metamx.common.ISE;
|
||||||
import com.metamx.common.guava.Accumulator;
|
import com.metamx.common.guava.Accumulator;
|
||||||
import com.metamx.common.guava.FunctionalIterable;
|
import com.metamx.common.guava.FunctionalIterable;
|
||||||
import com.metamx.common.guava.Sequence;
|
import com.metamx.common.guava.Sequence;
|
||||||
import com.metamx.common.guava.Sequences;
|
import com.metamx.common.guava.Sequences;
|
||||||
import com.metamx.emitter.EmittingLogger;
|
import com.metamx.emitter.EmittingLogger;
|
||||||
import io.druid.granularity.QueryGranularities;
|
|
||||||
import io.druid.query.Druids;
|
import io.druid.query.Druids;
|
||||||
import io.druid.query.Query;
|
import io.druid.query.Query;
|
||||||
import io.druid.query.QueryRunner;
|
import io.druid.query.QueryRunner;
|
||||||
|
@ -53,9 +54,11 @@ import io.druid.segment.Segment;
|
||||||
import io.druid.segment.StorageAdapter;
|
import io.druid.segment.StorageAdapter;
|
||||||
import io.druid.segment.column.BitmapIndex;
|
import io.druid.segment.column.BitmapIndex;
|
||||||
import io.druid.segment.column.Column;
|
import io.druid.segment.column.Column;
|
||||||
|
import io.druid.segment.column.GenericColumn;
|
||||||
import io.druid.segment.data.IndexedInts;
|
import io.druid.segment.data.IndexedInts;
|
||||||
import io.druid.segment.filter.Filters;
|
import io.druid.segment.filter.Filters;
|
||||||
import org.apache.commons.lang.mutable.MutableInt;
|
import org.apache.commons.lang.mutable.MutableInt;
|
||||||
|
import org.joda.time.Interval;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -90,6 +93,11 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
||||||
final SearchQuerySpec searchQuerySpec = query.getQuery();
|
final SearchQuerySpec searchQuerySpec = query.getQuery();
|
||||||
final int limit = query.getLimit();
|
final int limit = query.getLimit();
|
||||||
final boolean descending = query.isDescending();
|
final boolean descending = query.isDescending();
|
||||||
|
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
|
||||||
|
if (intervals.size() != 1) {
|
||||||
|
throw new IAE("Should only have one interval, got[%s]", intervals);
|
||||||
|
}
|
||||||
|
final Interval interval = intervals.get(0);
|
||||||
|
|
||||||
// Closing this will cause segfaults in unit tests.
|
// Closing this will cause segfaults in unit tests.
|
||||||
final QueryableIndex index = segment.asQueryableIndex();
|
final QueryableIndex index = segment.asQueryableIndex();
|
||||||
|
@ -109,6 +117,26 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
||||||
final ImmutableBitmap baseFilter =
|
final ImmutableBitmap baseFilter =
|
||||||
filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index));
|
filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index));
|
||||||
|
|
||||||
|
ImmutableBitmap timeFilteredBitmap;
|
||||||
|
if (!interval.contains(segment.getDataInterval())) {
|
||||||
|
MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap();
|
||||||
|
final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME);
|
||||||
|
final GenericColumn timeValues = timeColumn.getGenericColumn();
|
||||||
|
|
||||||
|
int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true));
|
||||||
|
int endIndex = Math.min(timeValues.length() - 1, getStartIndexOfTime(timeValues, interval.getEndMillis(), false));
|
||||||
|
|
||||||
|
for (int i = startIndex; i <= endIndex; i++) {
|
||||||
|
timeBitmap.add(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap);
|
||||||
|
timeFilteredBitmap =
|
||||||
|
(baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter);
|
||||||
|
} else {
|
||||||
|
timeFilteredBitmap = baseFilter;
|
||||||
|
}
|
||||||
|
|
||||||
for (DimensionSpec dimension : dimsToSearch) {
|
for (DimensionSpec dimension : dimsToSearch) {
|
||||||
final Column column = index.getColumn(dimension.getDimension());
|
final Column column = index.getColumn(dimension.getDimension());
|
||||||
if (column == null) {
|
if (column == null) {
|
||||||
|
@ -127,8 +155,8 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
|
ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
|
||||||
if (baseFilter != null) {
|
if (timeFilteredBitmap != null) {
|
||||||
bitmap = bitmapFactory.intersection(Arrays.asList(baseFilter, bitmap));
|
bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap));
|
||||||
}
|
}
|
||||||
if (bitmap.size() > 0) {
|
if (bitmap.size() > 0) {
|
||||||
MutableInt counter = new MutableInt(bitmap.size());
|
MutableInt counter = new MutableInt(bitmap.size());
|
||||||
|
@ -165,7 +193,7 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
||||||
dimsToSearch = dimensions;
|
dimsToSearch = dimensions;
|
||||||
}
|
}
|
||||||
|
|
||||||
final Sequence<Cursor> cursors = adapter.makeCursors(filter, segment.getDataInterval(), QueryGranularities.ALL, descending);
|
final Sequence<Cursor> cursors = adapter.makeCursors(filter, interval, query.getGranularity(), descending);
|
||||||
|
|
||||||
final TreeMap<SearchHit, MutableInt> retVal = cursors.accumulate(
|
final TreeMap<SearchHit, MutableInt> retVal = cursors.accumulate(
|
||||||
Maps.<SearchHit, SearchHit, MutableInt>newTreeMap(query.getSort().getComparator()),
|
Maps.<SearchHit, SearchHit, MutableInt>newTreeMap(query.getSort().getComparator()),
|
||||||
|
@ -219,6 +247,36 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
|
||||||
return makeReturnResult(limit, retVal);
|
return makeReturnResult(limit, retVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected int getStartIndexOfTime(GenericColumn timeValues, long time, boolean inclusive)
|
||||||
|
{
|
||||||
|
int low = 0;
|
||||||
|
int high = timeValues.length() - 1;
|
||||||
|
|
||||||
|
while (low <= high) {
|
||||||
|
int mid = (low + high) >>> 1;
|
||||||
|
long midVal = timeValues.getLongSingleValueRow(mid);
|
||||||
|
|
||||||
|
if (midVal < time)
|
||||||
|
low = mid + 1;
|
||||||
|
else if (midVal > time)
|
||||||
|
high = mid - 1;
|
||||||
|
else { // key found
|
||||||
|
int i;
|
||||||
|
// rewind the index of the same time values
|
||||||
|
for (i = mid - 1; i >= 0; i--) {
|
||||||
|
long prev = timeValues.getLongSingleValueRow(i);
|
||||||
|
if (time != prev) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return inclusive ? i + 1 : i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// key not found.
|
||||||
|
// return insert index
|
||||||
|
return inclusive ? low : low - 1;
|
||||||
|
}
|
||||||
|
|
||||||
private Sequence<Result<SearchResultValue>> makeReturnResult(
|
private Sequence<Result<SearchResultValue>> makeReturnResult(
|
||||||
int limit, TreeMap<SearchHit, MutableInt> retVal)
|
int limit, TreeMap<SearchHit, MutableInt> retVal)
|
||||||
{
|
{
|
||||||
|
|
|
@ -163,14 +163,14 @@ public class SearchQueryRunnerTest
|
||||||
);
|
);
|
||||||
|
|
||||||
List<SearchHit> expectedHits = Lists.newLinkedList();
|
List<SearchHit> expectedHits = Lists.newLinkedList();
|
||||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 186));
|
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "automotive", 91));
|
||||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 558));
|
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "mezzanine", 273));
|
||||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 186));
|
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "travel", 91));
|
||||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 186));
|
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "health", 91));
|
||||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 186));
|
expectedHits.add(new SearchHit(QueryRunnerTestHelper.qualityDimension, "entertainment", 91));
|
||||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 372));
|
expectedHits.add(new SearchHit(QueryRunnerTestHelper.marketDimension, "total_market", 182));
|
||||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 186));
|
expectedHits.add(new SearchHit(QueryRunnerTestHelper.placementishDimension, "a", 91));
|
||||||
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 372));
|
expectedHits.add(new SearchHit(QueryRunnerTestHelper.partialNullDimension, "value", 182));
|
||||||
|
|
||||||
checkSearchQuery(searchQuery, mergedRunner, expectedHits);
|
checkSearchQuery(searchQuery, mergedRunner, expectedHits);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
package io.druid.query.search;
|
package io.druid.query.search;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
@ -36,6 +37,7 @@ import io.druid.segment.QueryableIndexSegment;
|
||||||
import io.druid.segment.TestIndex;
|
import io.druid.segment.TestIndex;
|
||||||
import io.druid.segment.incremental.IncrementalIndex;
|
import io.druid.segment.incremental.IncrementalIndex;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
import org.joda.time.Interval;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
|
@ -155,6 +157,34 @@ public class SearchQueryRunnerWithCaseTest
|
||||||
checkSearchQuery(searchQuery, expectedResults);
|
checkSearchQuery(searchQuery, expectedResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSearchIntervals()
|
||||||
|
{
|
||||||
|
SearchQuery searchQuery;
|
||||||
|
Druids.SearchQueryBuilder builder = testBuilder()
|
||||||
|
.dimensions(Arrays.asList(qualityDimension))
|
||||||
|
.intervals("2011-01-12T00:00:00.000Z/2011-01-13T00:00:00.000Z");
|
||||||
|
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
|
||||||
|
|
||||||
|
searchQuery = builder.query("otive").build();
|
||||||
|
expectedResults.put(qualityDimension, Sets.newHashSet("AutoMotive"));
|
||||||
|
checkSearchQuery(searchQuery, expectedResults);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSearchNoOverrappingIntervals()
|
||||||
|
{
|
||||||
|
SearchQuery searchQuery;
|
||||||
|
Druids.SearchQueryBuilder builder = testBuilder()
|
||||||
|
.dimensions(Arrays.asList(qualityDimension))
|
||||||
|
.intervals("2011-01-10T00:00:00.000Z/2011-01-11T00:00:00.000Z");
|
||||||
|
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
|
||||||
|
|
||||||
|
searchQuery = builder.query("business").build();
|
||||||
|
expectedResults.put(qualityDimension, Sets.<String>newHashSet());
|
||||||
|
checkSearchQuery(searchQuery, expectedResults);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFragmentSearch()
|
public void testFragmentSearch()
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue