add virtual column support to search query (#12720)

This commit is contained in:
Clint Wylie 2022-07-04 21:58:10 -07:00 committed by GitHub
parent 97a926fb29
commit 36e38b319b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 121 additions and 48 deletions

View File

@ -318,6 +318,7 @@ public class Druids
private int limit; private int limit;
private QuerySegmentSpec querySegmentSpec; private QuerySegmentSpec querySegmentSpec;
private List<DimensionSpec> dimensions; private List<DimensionSpec> dimensions;
private VirtualColumns virtualColumns;
private SearchQuerySpec querySpec; private SearchQuerySpec querySpec;
private SearchSortSpec sortSpec; private SearchSortSpec sortSpec;
private Map<String, Object> context; private Map<String, Object> context;
@ -330,6 +331,7 @@ public class Druids
limit = 0; limit = 0;
querySegmentSpec = null; querySegmentSpec = null;
dimensions = null; dimensions = null;
virtualColumns = null;
querySpec = null; querySpec = null;
sortSpec = null; sortSpec = null;
context = null; context = null;
@ -344,6 +346,7 @@ public class Druids
limit, limit,
querySegmentSpec, querySegmentSpec,
dimensions, dimensions,
virtualColumns,
querySpec, querySpec,
sortSpec, sortSpec,
context context
@ -359,6 +362,7 @@ public class Druids
.limit(query.getLimit()) .limit(query.getLimit())
.intervals(query.getQuerySegmentSpec()) .intervals(query.getQuerySegmentSpec())
.dimensions(query.getDimensions()) .dimensions(query.getDimensions())
.virtualColumns(query.getVirtualColumns())
.query(query.getQuery()) .query(query.getQuery())
.sortSpec(query.getSort()) .sortSpec(query.getSort())
.context(query.getContext()); .context(query.getContext());
@ -436,6 +440,18 @@ public class Druids
return this; return this;
} }
public SearchQueryBuilder virtualColumns(VirtualColumn... vc)
{
virtualColumns = VirtualColumns.create(Arrays.asList(vc));
return this;
}
public SearchQueryBuilder virtualColumns(VirtualColumns vc)
{
virtualColumns = vc;
return this;
}
public SearchQueryBuilder dimensions(List<DimensionSpec> d) public SearchQueryBuilder dimensions(List<DimensionSpec> d)
{ {
dimensions = d; dimensions = d;

View File

@ -27,7 +27,6 @@ import org.apache.druid.segment.ColumnSelectorColumnIndexSelector;
import org.apache.druid.segment.DeprecatedQueryableIndexColumnSelector; import org.apache.druid.segment.DeprecatedQueryableIndexColumnSelector;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.Segment; import org.apache.druid.segment.Segment;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
@ -59,7 +58,7 @@ public class AutoStrategy extends SearchStrategy
final ColumnSelector columnSelector = new DeprecatedQueryableIndexColumnSelector(index); final ColumnSelector columnSelector = new DeprecatedQueryableIndexColumnSelector(index);
final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector( final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector(
index.getBitmapFactoryForDimensions(), index.getBitmapFactoryForDimensions(),
VirtualColumns.EMPTY, query.getVirtualColumns(),
columnSelector columnSelector
); );

View File

@ -30,7 +30,6 @@ import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.Segment; import org.apache.druid.segment.Segment;
import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.StorageAdapter;
import org.apache.druid.segment.VirtualColumns;
import org.joda.time.Interval; import org.joda.time.Interval;
import java.util.List; import java.util.List;
@ -90,7 +89,7 @@ public class CursorOnlyStrategy extends SearchStrategy
final Sequence<Cursor> cursors = adapter.makeCursors( final Sequence<Cursor> cursors = adapter.makeCursors(
filter, filter,
interval, interval,
VirtualColumns.EMPTY, query.getVirtualColumns(),
query.getGranularity(), query.getGranularity(),
query.isDescending(), query.isDescending(),
null null

View File

@ -20,6 +20,7 @@
package org.apache.druid.query.search; package org.apache.druid.query.search;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularities;
@ -33,9 +34,11 @@ import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.spec.QuerySegmentSpec; import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.segment.VirtualColumns;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
/** /**
*/ */
@ -46,6 +49,8 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
private final DimFilter dimFilter; private final DimFilter dimFilter;
private final SearchSortSpec sortSpec; private final SearchSortSpec sortSpec;
private final List<DimensionSpec> dimensions; private final List<DimensionSpec> dimensions;
private final VirtualColumns virtualColumns;
private final SearchQuerySpec querySpec; private final SearchQuerySpec querySpec;
private final int limit; private final int limit;
@ -57,6 +62,7 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
@JsonProperty("limit") int limit, @JsonProperty("limit") int limit,
@JsonProperty("intervals") QuerySegmentSpec querySegmentSpec, @JsonProperty("intervals") QuerySegmentSpec querySegmentSpec,
@JsonProperty("searchDimensions") List<DimensionSpec> dimensions, @JsonProperty("searchDimensions") List<DimensionSpec> dimensions,
@JsonProperty("virtualColumns") VirtualColumns virtualColumns,
@JsonProperty("query") SearchQuerySpec querySpec, @JsonProperty("query") SearchQuerySpec querySpec,
@JsonProperty("sort") SearchSortSpec sortSpec, @JsonProperty("sort") SearchSortSpec sortSpec,
@JsonProperty("context") Map<String, Object> context @JsonProperty("context") Map<String, Object> context
@ -69,6 +75,7 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
this.sortSpec = sortSpec == null ? DEFAULT_SORT_SPEC : sortSpec; this.sortSpec = sortSpec == null ? DEFAULT_SORT_SPEC : sortSpec;
this.limit = (limit == 0) ? 1000 : limit; this.limit = (limit == 0) ? 1000 : limit;
this.dimensions = dimensions; this.dimensions = dimensions;
this.virtualColumns = VirtualColumns.nullToEmpty(virtualColumns);
this.querySpec = querySpec == null ? new AllSearchQuerySpec() : querySpec; this.querySpec = querySpec == null ? new AllSearchQuerySpec() : querySpec;
} }
@ -127,6 +134,14 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
return dimensions; return dimensions;
} }
@JsonProperty
@Override
@JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = VirtualColumns.JsonIncludeFilter.class)
public VirtualColumns getVirtualColumns()
{
return virtualColumns;
}
@JsonProperty("query") @JsonProperty("query")
public SearchQuerySpec getQuery() public SearchQuerySpec getQuery()
{ {
@ -152,6 +167,7 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
", dimFilter=" + dimFilter + ", dimFilter=" + dimFilter +
", granularity='" + getGranularity() + '\'' + ", granularity='" + getGranularity() + '\'' +
", dimensions=" + dimensions + ", dimensions=" + dimensions +
", virtualColumns=" + virtualColumns +
", querySpec=" + querySpec + ", querySpec=" + querySpec +
", querySegmentSpec=" + getQuerySegmentSpec() + ", querySegmentSpec=" + getQuerySegmentSpec() +
", limit=" + limit + ", limit=" + limit +
@ -173,34 +189,25 @@ public class SearchQuery extends BaseQuery<Result<SearchResultValue>>
SearchQuery that = (SearchQuery) o; SearchQuery that = (SearchQuery) o;
if (limit != that.limit) { return limit == that.limit &&
return false; Objects.equals(dimFilter, that.dimFilter) &&
} Objects.equals(dimensions, that.dimensions) &&
if (dimFilter != null ? !dimFilter.equals(that.dimFilter) : that.dimFilter != null) { Objects.equals(virtualColumns, that.virtualColumns) &&
return false; Objects.equals(querySpec, that.querySpec) &&
} Objects.equals(sortSpec, that.sortSpec);
if (dimensions != null ? !dimensions.equals(that.dimensions) : that.dimensions != null) {
return false;
}
if (querySpec != null ? !querySpec.equals(that.querySpec) : that.querySpec != null) {
return false;
}
if (sortSpec != null ? !sortSpec.equals(that.sortSpec) : that.sortSpec != null) {
return false;
}
return true;
} }
@Override @Override
public int hashCode() public int hashCode()
{ {
int result = super.hashCode(); return Objects.hash(
result = 31 * result + (dimFilter != null ? dimFilter.hashCode() : 0); super.hashCode(),
result = 31 * result + (sortSpec != null ? sortSpec.hashCode() : 0); dimFilter,
result = 31 * result + (dimensions != null ? dimensions.hashCode() : 0); sortSpec,
result = 31 * result + (querySpec != null ? querySpec.hashCode() : 0); dimensions,
result = 31 * result + limit; virtualColumns,
return result; querySpec,
limit
);
} }
} }

View File

@ -19,11 +19,9 @@
package org.apache.druid.query.search; package org.apache.druid.query.search;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap; import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.ISE;
@ -234,15 +232,11 @@ public class SearchQueryRunner implements QueryRunner<Result<SearchResultValue>>
) )
{ {
Iterable<SearchHit> source = Iterables.transform( Iterable<SearchHit> source = Iterables.transform(
retVal.object2IntEntrySet(), new Function<Object2IntMap.Entry<SearchHit>, SearchHit>() retVal.object2IntEntrySet(),
{ input -> {
@Override
public SearchHit apply(Object2IntMap.Entry<SearchHit> input)
{
SearchHit hit = input.getKey(); SearchHit hit = input.getKey();
return new SearchHit(hit.getDimension(), hit.getValue(), input.getIntValue()); return new SearchHit(hit.getDimension(), hit.getValue(), input.getIntValue());
} }
}
); );
return Sequences.simple( return Sequences.simple(

View File

@ -45,6 +45,7 @@ import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.column.NumericColumn;
import org.apache.druid.segment.virtual.VirtualizedColumnInspector;
import org.joda.time.Interval; import org.joda.time.Interval;
import java.util.ArrayList; import java.util.ArrayList;
@ -77,14 +78,18 @@ public class UseIndexesStrategy extends SearchStrategy
if (index != null) { if (index != null) {
// pair of bitmap dims and non-bitmap dims // pair of bitmap dims and non-bitmap dims
final Pair<List<DimensionSpec>, List<DimensionSpec>> pair = partitionDimensionList(adapter, searchDims); final Pair<List<DimensionSpec>, List<DimensionSpec>> pair = partitionDimensionList(
adapter,
query.getVirtualColumns(),
searchDims
);
final List<DimensionSpec> bitmapSuppDims = pair.lhs; final List<DimensionSpec> bitmapSuppDims = pair.lhs;
final List<DimensionSpec> nonBitmapSuppDims = pair.rhs; final List<DimensionSpec> nonBitmapSuppDims = pair.rhs;
if (bitmapSuppDims.size() > 0) { if (bitmapSuppDims.size() > 0) {
final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector( final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector(
index.getBitmapFactoryForDimensions(), index.getBitmapFactoryForDimensions(),
VirtualColumns.EMPTY, query.getVirtualColumns(),
new DeprecatedQueryableIndexColumnSelector(index) new DeprecatedQueryableIndexColumnSelector(index)
); );
@ -94,7 +99,13 @@ public class UseIndexesStrategy extends SearchStrategy
// the cursor-based plan. This can be more optimized. One possible optimization is generating a bitmap index // the cursor-based plan. This can be more optimized. One possible optimization is generating a bitmap index
// from the non-bitmap-support filter, and then use it to compute the filtered result by intersecting bitmaps. // from the non-bitmap-support filter, and then use it to compute the filtered result by intersecting bitmaps.
if (filter == null || filter.getBitmapColumnIndex(selector) != null) { if (filter == null || filter.getBitmapColumnIndex(selector) != null) {
final ImmutableBitmap timeFilteredBitmap = makeTimeFilteredBitmap(index, segment, filter, interval); final ImmutableBitmap timeFilteredBitmap = makeTimeFilteredBitmap(
index,
segment,
query.getVirtualColumns(),
filter,
interval
);
builder.add(new IndexOnlyExecutor(query, segment, timeFilteredBitmap, bitmapSuppDims)); builder.add(new IndexOnlyExecutor(query, segment, timeFilteredBitmap, bitmapSuppDims));
} else { } else {
// Fall back to cursor-based execution strategy // Fall back to cursor-based execution strategy
@ -118,6 +129,7 @@ public class UseIndexesStrategy extends SearchStrategy
*/ */
private static Pair<List<DimensionSpec>, List<DimensionSpec>> partitionDimensionList( private static Pair<List<DimensionSpec>, List<DimensionSpec>> partitionDimensionList(
StorageAdapter adapter, StorageAdapter adapter,
VirtualColumns virtualColumns,
List<DimensionSpec> dimensions List<DimensionSpec> dimensions
) )
{ {
@ -127,9 +139,10 @@ public class UseIndexesStrategy extends SearchStrategy
adapter.getAvailableDimensions(), adapter.getAvailableDimensions(),
dimensions dimensions
); );
VirtualizedColumnInspector columnInspector = new VirtualizedColumnInspector(adapter, virtualColumns);
for (DimensionSpec spec : dimsToSearch) { for (DimensionSpec spec : dimsToSearch) {
ColumnCapabilities capabilities = adapter.getColumnCapabilities(spec.getDimension()); ColumnCapabilities capabilities = columnInspector.getColumnCapabilities(spec.getDimension());
if (capabilities == null) { if (capabilities == null) {
continue; continue;
} }
@ -147,6 +160,7 @@ public class UseIndexesStrategy extends SearchStrategy
static ImmutableBitmap makeTimeFilteredBitmap( static ImmutableBitmap makeTimeFilteredBitmap(
final QueryableIndex index, final QueryableIndex index,
final Segment segment, final Segment segment,
final VirtualColumns virtualColumns,
final Filter filter, final Filter filter,
final Interval interval final Interval interval
) )
@ -158,7 +172,7 @@ public class UseIndexesStrategy extends SearchStrategy
} else { } else {
final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector( final ColumnIndexSelector selector = new ColumnSelectorColumnIndexSelector(
index.getBitmapFactoryForDimensions(), index.getBitmapFactoryForDimensions(),
VirtualColumns.EMPTY, virtualColumns,
new DeprecatedQueryableIndexColumnSelector(index) new DeprecatedQueryableIndexColumnSelector(index)
); );
final BitmapColumnIndex columnIndex = filter.getBitmapColumnIndex(selector); final BitmapColumnIndex columnIndex = filter.getBitmapColumnIndex(selector);
@ -249,18 +263,20 @@ public class UseIndexesStrategy extends SearchStrategy
final QueryableIndex index = segment.asQueryableIndex(); final QueryableIndex index = segment.asQueryableIndex();
Preconditions.checkArgument(index != null, "Index should not be null"); Preconditions.checkArgument(index != null, "Index should not be null");
ColumnSelectorColumnIndexSelector indexSelector = new ColumnSelectorColumnIndexSelector(
index.getBitmapFactoryForDimensions(),
query.getVirtualColumns(),
new DeprecatedQueryableIndexColumnSelector(index)
);
final Object2IntRBTreeMap<SearchHit> retVal = new Object2IntRBTreeMap<>(query.getSort().getComparator()); final Object2IntRBTreeMap<SearchHit> retVal = new Object2IntRBTreeMap<>(query.getSort().getComparator());
retVal.defaultReturnValue(0); retVal.defaultReturnValue(0);
final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions(); final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions();
for (DimensionSpec dimension : dimsToSearch) { for (DimensionSpec dimension : dimsToSearch) {
final ColumnHolder columnHolder = index.getColumnHolder(dimension.getDimension());
if (columnHolder == null) {
continue;
}
final ColumnIndexSupplier indexSupplier = columnHolder.getIndexSupplier(); final ColumnIndexSupplier indexSupplier = indexSelector.getIndexSupplier(dimension.getDimension());
ExtractionFn extractionFn = dimension.getExtractionFn(); ExtractionFn extractionFn = dimension.getExtractionFn();
if (extractionFn == null) { if (extractionFn == null) {

View File

@ -30,6 +30,7 @@ import org.apache.druid.query.Druids;
import org.apache.druid.query.Result; import org.apache.druid.query.Result;
import org.apache.druid.query.TableDataSource; import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.segment.VirtualColumns;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
@ -48,6 +49,7 @@ public class SearchQueryQueryToolChestTest
1, 1,
new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2015-01-01/2015-01-02"))), new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2015-01-01/2015-01-02"))),
ImmutableList.of(Druids.DIMENSION_IDENTITY.apply("dim1")), ImmutableList.of(Druids.DIMENSION_IDENTITY.apply("dim1")),
VirtualColumns.EMPTY,
new FragmentSearchQuerySpec(ImmutableList.of("a", "b")), new FragmentSearchQuerySpec(ImmutableList.of("a", "b")),
null, null,
null null

View File

@ -21,6 +21,7 @@ package org.apache.druid.query.search;
import com.google.common.base.Suppliers; import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.MapBasedInputRow; import org.apache.druid.data.input.MapBasedInputRow;
import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.DateTimes;
@ -59,6 +60,7 @@ import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.incremental.OnheapIncrementalIndex; import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
import org.apache.druid.segment.virtual.ListFilteredVirtualColumn;
import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.testing.InitializedNullHandlingTest;
import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.SegmentId;
import org.junit.Assert; import org.junit.Assert;
@ -784,6 +786,44 @@ public class SearchQueryRunnerTest extends InitializedNullHandlingTest
checkSearchQuery(searchQuery, noHit); checkSearchQuery(searchQuery, noHit);
} }
@Test
public void testSearchSameValueInMultiDimsVirtualColumns()
{
SearchQuery searchQuery = Druids.newSearchQueryBuilder()
.dataSource(QueryRunnerTestHelper.DATA_SOURCE)
.granularity(QueryRunnerTestHelper.ALL_GRAN)
.intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC)
.dimensions(
Arrays.asList(
"v0",
"v1"
)
)
.virtualColumns(
new ListFilteredVirtualColumn(
"v0",
DefaultDimensionSpec.of(QueryRunnerTestHelper.PLACEMENT_DIMENSION),
ImmutableSet.of("preferred"),
true
),
new ListFilteredVirtualColumn(
"v1",
DefaultDimensionSpec.of(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION),
ImmutableSet.of("e"),
true
)
)
.query("e")
.build();
List<SearchHit> expectedHits = new ArrayList<>();
// same results as testSearchSameValueInMultiDims except v1 is missing a 'preferred' since is filtered to just e
expectedHits.add(new SearchHit("v0", "preferred", 1209));
expectedHits.add(new SearchHit("v1", "e", 93));
checkSearchQuery(searchQuery, expectedHits);
}
private void checkSearchQuery(Query searchQuery, List<SearchHit> expectedResults) private void checkSearchQuery(Query searchQuery, List<SearchHit> expectedResults)
{ {
checkSearchQuery(searchQuery, runner, expectedResults); checkSearchQuery(searchQuery, runner, expectedResults);