add virtual column support to search query (#12720)

This commit is contained in:
Clint Wylie 2022-07-04 21:58:10 -07:00 committed by GitHub
parent 97a926fb29
commit 36e38b319b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 121 additions and 48 deletions

View File

@ -318,6 +318,7 @@ public class Druids
private int limit;
private QuerySegmentSpec querySegmentSpec;
private List<DimensionSpec> dimensions;
private VirtualColumns virtualColumns;
private SearchQuerySpec querySpec;
private SearchSortSpec sortSpec;
private Map<String, Object> context;
@ -330,6 +331,7 @@ public class Druids
limit = 0;
querySegmentSpec = null;
dimensions = null;
virtualColumns = null;
querySpec = null;
sortSpec = null;
context = null;
@ -344,6 +346,7 @@ public class Druids
limit,
querySegmentSpec,
dimensions,
virtualColumns,
querySpec,
sortSpec,
context
@ -359,6 +362,7 @@ public class Druids
.limit(query.getLimit())
.intervals(query.getQuerySegmentSpec())
.dimensions(query.getDimensions())
.virtualColumns(query.getVirtualColumns())
.query(query.getQuery())
.sortSpec(query.getSort())
.context(query.getContext());
@ -436,6 +440,18 @@ public class Druids
return this;
}
public SearchQueryBuilder virtualColumns(VirtualColumn... vc)
{
virtualColumns = VirtualColumns.create(Arrays.asList(vc));
return this;
}
public SearchQueryBuilder virtualColumns(VirtualColumns vc)
{
virtualColumns = vc;
return this;
}
public SearchQueryBuilder dimensions(List<DimensionSpec> d)
{
dimensions = d;

View File

@ -27,7 +27,6 @@ import org.apache.druid.segment.ColumnSelectorColumnIndexSelector;
import org.apache.druid.segment.DeprecatedQueryableIndexColumnSelector;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
@ -59,7 +58,7 @@ public class AutoStrategy extends SearchStrategy
final ColumnSelector columnSelector = new DeprecatedQueryableIndexColumnSelector(index);
final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector(
index.getBitmapFactoryForDimensions(),
VirtualColumns.EMPTY,
query.getVirtualColumns(),
columnSelector
);

View File

@ -30,7 +30,6 @@ import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.StorageAdapter;
import org.apache.druid.segment.VirtualColumns;
import org.joda.time.Interval;
import java.util.List;
@ -90,7 +89,7 @@ public class CursorOnlyStrategy extends SearchStrategy
final Sequence<Cursor> cursors = adapter.makeCursors(
filter,
interval,
VirtualColumns.EMPTY,
query.getVirtualColumns(),
query.getGranularity(),
query.isDescending(),
null

View File

@ -20,6 +20,7 @@
package org.apache.druid.query.search;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.granularity.Granularities;
@ -33,9 +34,11 @@ import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.segment.VirtualColumns;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
*/
@ -46,6 +49,8 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
private final DimFilter dimFilter;
private final SearchSortSpec sortSpec;
private final List<DimensionSpec> dimensions;
private final VirtualColumns virtualColumns;
private final SearchQuerySpec querySpec;
private final int limit;
@ -57,6 +62,7 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
@JsonProperty("limit") int limit,
@JsonProperty("intervals") QuerySegmentSpec querySegmentSpec,
@JsonProperty("searchDimensions") List<DimensionSpec> dimensions,
@JsonProperty("virtualColumns") VirtualColumns virtualColumns,
@JsonProperty("query") SearchQuerySpec querySpec,
@JsonProperty("sort") SearchSortSpec sortSpec,
@JsonProperty("context") Map<String, Object> context
@ -69,6 +75,7 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
this.sortSpec = sortSpec == null ? DEFAULT_SORT_SPEC : sortSpec;
this.limit = (limit == 0) ? 1000 : limit;
this.dimensions = dimensions;
this.virtualColumns = VirtualColumns.nullToEmpty(virtualColumns);
this.querySpec = querySpec == null ? new AllSearchQuerySpec() : querySpec;
}
@ -127,6 +134,14 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
return dimensions;
}
@JsonProperty
@Override
@JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = VirtualColumns.JsonIncludeFilter.class)
public VirtualColumns getVirtualColumns()
{
return virtualColumns;
}
@JsonProperty("query")
public SearchQuerySpec getQuery()
{
@ -152,6 +167,7 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
", dimFilter=" + dimFilter +
", granularity='" + getGranularity() + '\'' +
", dimensions=" + dimensions +
", virtualColumns=" + virtualColumns +
", querySpec=" + querySpec +
", querySegmentSpec=" + getQuerySegmentSpec() +
", limit=" + limit +
@ -173,34 +189,25 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
SearchQuery that = (SearchQuery) o;
if (limit != that.limit) {
return false;
}
if (dimFilter != null ? !dimFilter.equals(that.dimFilter) : that.dimFilter != null) {
return false;
}
if (dimensions != null ? !dimensions.equals(that.dimensions) : that.dimensions != null) {
return false;
}
if (querySpec != null ? !querySpec.equals(that.querySpec) : that.querySpec != null) {
return false;
}
if (sortSpec != null ? !sortSpec.equals(that.sortSpec) : that.sortSpec != null) {
return false;
}
return true;
return limit == that.limit &&
Objects.equals(dimFilter, that.dimFilter) &&
Objects.equals(dimensions, that.dimensions) &&
Objects.equals(virtualColumns, that.virtualColumns) &&
Objects.equals(querySpec, that.querySpec) &&
Objects.equals(sortSpec, that.sortSpec);
}
@Override
public int hashCode()
{
int result = super.hashCode();
result = 31 * result + (dimFilter != null ? dimFilter.hashCode() : 0);
result = 31 * result + (sortSpec != null ? sortSpec.hashCode() : 0);
result = 31 * result + (dimensions != null ? dimensions.hashCode() : 0);
result = 31 * result + (querySpec != null ? querySpec.hashCode() : 0);
result = 31 * result + limit;
return result;
return Objects.hash(
super.hashCode(),
dimFilter,
sortSpec,
dimensions,
virtualColumns,
querySpec,
limit
);
}
}

View File

@ -19,11 +19,9 @@
package org.apache.druid.query.search;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
@ -234,14 +232,10 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
)
{
Iterable<SearchHit> source = Iterables.transform(
retVal.object2IntEntrySet(), new Function<Object2IntMap.Entry<SearchHit>, SearchHit>()
{
@Override
public SearchHit apply(Object2IntMap.Entry<SearchHit> input)
{
SearchHit hit = input.getKey();
return new SearchHit(hit.getDimension(), hit.getValue(), input.getIntValue());
}
retVal.object2IntEntrySet(),
input -> {
SearchHit hit = input.getKey();
return new SearchHit(hit.getDimension(), hit.getValue(), input.getIntValue());
}
);

View File

@ -45,6 +45,7 @@ import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
import org.apache.druid.segment.column.NumericColumn;
import org.apache.druid.segment.virtual.VirtualizedColumnInspector;
import org.joda.time.Interval;
import java.util.ArrayList;
@ -77,14 +78,18 @@ public class UseIndexesStrategy extends SearchStrategy
if (index != null) {
// pair of bitmap dims and non-bitmap dims
final Pair<List<DimensionSpec>, List<DimensionSpec>> pair = partitionDimensionList(adapter, searchDims);
final Pair<List<DimensionSpec>, List<DimensionSpec>> pair = partitionDimensionList(
adapter,
query.getVirtualColumns(),
searchDims
);
final List<DimensionSpec> bitmapSuppDims = pair.lhs;
final List<DimensionSpec> nonBitmapSuppDims = pair.rhs;
if (bitmapSuppDims.size() > 0) {
final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector(
index.getBitmapFactoryForDimensions(),
VirtualColumns.EMPTY,
query.getVirtualColumns(),
new DeprecatedQueryableIndexColumnSelector(index)
);
@ -94,7 +99,13 @@ public class UseIndexesStrategy extends SearchStrategy
// the cursor-based plan. This can be more optimized. One possible optimization is generating a bitmap index
// from the non-bitmap-support filter, and then use it to compute the filtered result by intersecting bitmaps.
if (filter == null || filter.getBitmapColumnIndex(selector) != null) {
final ImmutableBitmap timeFilteredBitmap = makeTimeFilteredBitmap(index, segment, filter, interval);
final ImmutableBitmap timeFilteredBitmap = makeTimeFilteredBitmap(
index,
segment,
query.getVirtualColumns(),
filter,
interval
);
builder.add(new IndexOnlyExecutor(query, segment, timeFilteredBitmap, bitmapSuppDims));
} else {
// Fall back to cursor-based execution strategy
@ -118,6 +129,7 @@ public class UseIndexesStrategy extends SearchStrategy
*/
private static Pair<List<DimensionSpec>, List<DimensionSpec>> partitionDimensionList(
StorageAdapter adapter,
VirtualColumns virtualColumns,
List<DimensionSpec> dimensions
)
{
@ -127,9 +139,10 @@ public class UseIndexesStrategy extends SearchStrategy
adapter.getAvailableDimensions(),
dimensions
);
VirtualizedColumnInspector columnInspector = new VirtualizedColumnInspector(adapter, virtualColumns);
for (DimensionSpec spec : dimsToSearch) {
ColumnCapabilities capabilities = adapter.getColumnCapabilities(spec.getDimension());
ColumnCapabilities capabilities = columnInspector.getColumnCapabilities(spec.getDimension());
if (capabilities == null) {
continue;
}
@ -147,6 +160,7 @@ public class UseIndexesStrategy extends SearchStrategy
static ImmutableBitmap makeTimeFilteredBitmap(
final QueryableIndex index,
final Segment segment,
final VirtualColumns virtualColumns,
final Filter filter,
final Interval interval
)
@ -158,7 +172,7 @@ public class UseIndexesStrategy extends SearchStrategy
} else {
final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector(
index.getBitmapFactoryForDimensions(),
VirtualColumns.EMPTY,
virtualColumns,
new DeprecatedQueryableIndexColumnSelector(index)
);
final BitmapColumnIndex columnIndex = filter.getBitmapColumnIndex(selector);
@ -249,18 +263,20 @@ public class UseIndexesStrategy extends SearchStrategy
final QueryableIndex index = segment.asQueryableIndex();
Preconditions.checkArgument(index != null, "Index should not be null");
ColumnSelectorColumnIndexSelector indexSelector = new ColumnSelectorColumnIndexSelector(
index.getBitmapFactoryForDimensions(),
query.getVirtualColumns(),
new DeprecatedQueryableIndexColumnSelector(index)
);
final Object2IntRBTreeMap<SearchHit> retVal = new Object2IntRBTreeMap<>(query.getSort().getComparator());
retVal.defaultReturnValue(0);
final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions();
for (DimensionSpec dimension : dimsToSearch) {
final ColumnHolder columnHolder = index.getColumnHolder(dimension.getDimension());
if (columnHolder == null) {
continue;
}
final ColumnIndexSupplier indexSupplier = columnHolder.getIndexSupplier();
final ColumnIndexSupplier indexSupplier = indexSelector.getIndexSupplier(dimension.getDimension());
ExtractionFn extractionFn = dimension.getExtractionFn();
if (extractionFn == null) {

View File

@ -30,6 +30,7 @@ import org.apache.druid.query.Druids;
import org.apache.druid.query.Result;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.segment.VirtualColumns;
import org.junit.Assert;
import org.junit.Test;
@ -48,6 +49,7 @@ public class SearchQueryQueryToolChestTest
1,
new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2015-01-01/2015-01-02"))),
ImmutableList.of(Druids.DIMENSION_IDENTITY.apply("dim1")),
VirtualColumns.EMPTY,
new FragmentSearchQuerySpec(ImmutableList.of("a", "b")),
null,
null

View File

@ -21,6 +21,7 @@ package org.apache.druid.query.search;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.MapBasedInputRow;
import org.apache.druid.java.util.common.DateTimes;
@ -59,6 +60,7 @@ import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
import org.apache.druid.segment.virtual.ListFilteredVirtualColumn;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.apache.druid.timeline.SegmentId;
import org.junit.Assert;
@ -784,6 +786,44 @@ public class SearchQueryRunnerTest extends InitializedNullHandlingTest
checkSearchQuery(searchQuery, noHit);
}
@Test
public void testSearchSameValueInMultiDimsVirtualColumns()
{
SearchQuery searchQuery = Druids.newSearchQueryBuilder()
.dataSource(QueryRunnerTestHelper.DATA_SOURCE)
.granularity(QueryRunnerTestHelper.ALL_GRAN)
.intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC)
.dimensions(
Arrays.asList(
"v0",
"v1"
)
)
.virtualColumns(
new ListFilteredVirtualColumn(
"v0",
DefaultDimensionSpec.of(QueryRunnerTestHelper.PLACEMENT_DIMENSION),
ImmutableSet.of("preferred"),
true
),
new ListFilteredVirtualColumn(
"v1",
DefaultDimensionSpec.of(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION),
ImmutableSet.of("e"),
true
)
)
.query("e")
.build();
List<SearchHit> expectedHits = new ArrayList<>();
// same results as testSearchSameValueInMultiDims except v1 is missing a 'preferred' since is filtered to just e
expectedHits.add(new SearchHit("v0", "preferred", 1209));
expectedHits.add(new SearchHit("v1", "e", 93));
checkSearchQuery(searchQuery, expectedHits);
}
private void checkSearchQuery(Query searchQuery, List<SearchHit> expectedResults)
{
checkSearchQuery(searchQuery, runner, expectedResults);