From e012d5c41bbc5a056e3c99bf6073ac19ba523c7d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 14 Sep 2020 19:29:35 -0700 Subject: [PATCH] allow vectorized query engines to utilize vectorized virtual columns (#10388) * allow vectorized query engines to utilize vectorized virtual column implementations * javadoc, refactor, checkstyle * intellij inspection and more javadoc * better * review stuffs * fix incorrect refactor, thanks tests * minor adjustments --- .../epinephelinae/GroupByQueryEngineV2.java | 21 +- .../vector/VectorGroupByEngine.java | 50 ++- .../QueryableIndexCursorSequenceBuilder.java | 51 ++- .../segment/QueryableIndexStorageAdapter.java | 5 +- .../apache/druid/segment/VirtualColumn.java | 187 ++++++++-- .../apache/druid/segment/VirtualColumns.java | 253 +++++++++---- ...yableIndexVectorColumnSelectorFactory.java | 225 ++++++++---- .../AlwaysTwoCounterAggregatorFactory.java | 257 ++++++++++++++ .../AlwaysTwoVectorizedVirtualColumn.java | 321 +++++++++++++++++ .../virtual/VectorizedVirtualColumnTest.java | 334 ++++++++++++++++++ 10 files changed, 1493 insertions(+), 211 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java create mode 100644 processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java create mode 100644 processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index b4cda9b54d9..f170550a102 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -55,6 +55,7 @@ import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.apache.druid.query.groupby.strategy.GroupByStrategyV2; import org.apache.druid.query.ordering.StringComparator; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; @@ -74,7 +75,6 @@ import java.nio.ByteBuffer; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; -import java.util.function.Function; import java.util.stream.Stream; /** @@ -231,7 +231,7 @@ public class GroupByQueryEngineV2 processingBuffer, fudgeTimestamp, dims, - isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions(), false), + isAllSingleValueDims(columnSelectorFactory, query.getDimensions()), cardinalityForArrayAggregation ); } else { @@ -242,7 +242,7 @@ public class GroupByQueryEngineV2 processingBuffer, fudgeTimestamp, dims, - isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions(), false) + isAllSingleValueDims(columnSelectorFactory, query.getDimensions()) ); } } @@ -319,13 +319,11 @@ public class GroupByQueryEngineV2 /** * Checks whether all "dimensions" are either single-valued, or if allowed, nonexistent. Since non-existent column * selectors will show up as full of nulls they are effectively single valued, however they can also be null during - * broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonExistent' is sort of a hack to let - * the vectorized engine, which only operates on actual segments, to still work in this case for non-existent columns. + * broker merge, for example with an 'inline' datasource subquery. */ public static boolean isAllSingleValueDims( - final Function capabilitiesFunction, - final List dimensions, - final boolean missingMeansNonExistent + final ColumnInspector inspector, + final List dimensions ) { return dimensions @@ -338,10 +336,9 @@ public class GroupByQueryEngineV2 return false; } - // Now check column capabilities. - final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); - return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) || - (missingMeansNonExistent && columnCapabilities == null); + // Now check column capabilities, which must be present and explicitly not multi-valued + final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); + return columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse(); }); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index 99f14ae6c3c..e2f2e756be9 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -42,6 +42,8 @@ import org.apache.druid.query.groupby.epinephelinae.VectorGrouper; import org.apache.druid.query.vector.VectorCursorGranularizer; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorCursor; @@ -55,6 +57,7 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; +import java.util.function.Function; import java.util.stream.Collectors; public class VectorGroupByEngine @@ -70,24 +73,47 @@ public class VectorGroupByEngine @Nullable final Filter filter ) { - // Multi-value dimensions are not yet supported. - // - // Two notes here about how we're handling this check: - // 1) After multi-value dimensions are supported, we could alter "GroupByQueryEngineV2.isAllSingleValueDims" - // to accept a ColumnSelectorFactory, which makes more sense than using a StorageAdapter (see #8013). - // 2) Technically using StorageAdapter here is bad since it only looks at real columns, but they might - // be shadowed by virtual columns (again, see #8013). But it's fine for now since adapter.canVectorize - // always returns false if there are any virtual columns. - // - // This situation should sort itself out pretty well once this engine supports multi-valued columns. Then we - // won't have to worry about having this all-single-value-dims check here. + Function capabilitiesFunction = name -> + query.getVirtualColumns().getColumnCapabilitiesWithFallback(adapter, name); - return GroupByQueryEngineV2.isAllSingleValueDims(adapter::getColumnCapabilities, query.getDimensions(), true) + return canVectorizeDimensions(capabilitiesFunction, query.getDimensions()) && query.getDimensions().stream().allMatch(DimensionSpec::canVectorize) && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(adapter)) && adapter.canVectorize(filter, query.getVirtualColumns(), false); } + public static boolean canVectorizeDimensions( + final Function capabilitiesFunction, + final List dimensions + ) + { + return dimensions + .stream() + .allMatch( + dimension -> { + if (dimension.mustDecorate()) { + // group by on multi value dimensions are not currently supported + // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. + // To be safe, we must return false here. + return false; + } + + // Now check column capabilities. + final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); + // null here currently means the column does not exist, nil columns can be vectorized + if (columnCapabilities == null) { + return true; + } + // strings must be single valued, dictionary encoded, and have unique dictionary entries + if (ValueType.STRING.equals(columnCapabilities.getType())) { + return columnCapabilities.hasMultipleValues().isFalse() && + columnCapabilities.isDictionaryEncoded().isTrue() && + columnCapabilities.areDictionaryValuesUnique().isTrue(); + } + return columnCapabilities.hasMultipleValues().isFalse(); + }); + } + public static Sequence process( final GroupByQuery query, final StorageAdapter storageAdapter, diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java index dfd2fa624a9..e1a6c6d7c80 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java @@ -193,7 +193,6 @@ public class QueryableIndexCursorSequenceBuilder public VectorCursor buildVectorized(final int vectorSize) { // Sanity check - matches QueryableIndexStorageAdapter.canVectorize - Preconditions.checkState(virtualColumns.size() == 0, "virtualColumns.size == 0"); Preconditions.checkState(!descending, "!descending"); final Map columnCache = new HashMap<>(); @@ -229,17 +228,15 @@ public class QueryableIndexCursorSequenceBuilder ? new NoFilterVectorOffset(vectorSize, startOffset, endOffset) : new BitmapVectorOffset(vectorSize, filterBitmap, startOffset, endOffset); + // baseColumnSelectorFactory using baseOffset is the column selector for filtering. + final VectorColumnSelectorFactory baseColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset( + columnCache, + baseOffset, + closer + ); if (postFilter == null) { - return new QueryableIndexVectorCursor(index, baseOffset, closer, columnCache, vectorSize); + return new QueryableIndexVectorCursor(baseColumnSelectorFactory, baseOffset, vectorSize, closer); } else { - // baseColumnSelectorFactory using baseOffset is the column selector for filtering. - final VectorColumnSelectorFactory baseColumnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory( - index, - baseOffset, - closer, - columnCache - ); - final VectorOffset filteredOffset = FilteredVectorOffset.create( baseOffset, baseColumnSelectorFactory, @@ -254,10 +251,31 @@ public class QueryableIndexCursorSequenceBuilder // object will get hit twice for some of the values (anything that matched the filter). This is probably most // noticeable if it causes thrashing of decompression buffers due to out-of-order reads. I haven't observed // this directly but it seems possible in principle. - return new QueryableIndexVectorCursor(index, filteredOffset, closer, columnCache, vectorSize); + // baseColumnSelectorFactory using baseOffset is the column selector for filtering. + final VectorColumnSelectorFactory filteredColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset( + columnCache, + filteredOffset, + closer + ); + return new QueryableIndexVectorCursor(filteredColumnSelectorFactory, filteredOffset, vectorSize, closer); } } + VectorColumnSelectorFactory makeVectorColumnSelectorFactoryForOffset( + Map columnCache, + VectorOffset baseOffset, + Closer closer + ) + { + return new QueryableIndexVectorColumnSelectorFactory( + index, + baseOffset, + closer, + columnCache, + virtualColumns + ); + } + /** * Search the time column using binary search. Benchmarks on various other approaches (linear search, binary * search that switches to linear at various closeness thresholds) indicated that a pure binary search worked best. @@ -318,17 +336,16 @@ public class QueryableIndexCursorSequenceBuilder private final VectorColumnSelectorFactory columnSelectorFactory; public QueryableIndexVectorCursor( - final QueryableIndex index, + final VectorColumnSelectorFactory vectorColumnSelectorFactory, final VectorOffset offset, - final Closer closer, - final Map columnCache, - final int vectorSize + final int vectorSize, + final Closer closer ) { + this.columnSelectorFactory = vectorColumnSelectorFactory; + this.vectorSize = vectorSize; this.offset = offset; this.closer = closer; - this.vectorSize = vectorSize; - this.columnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory(index, offset, closer, columnCache); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index e9aa01e07c9..2ab9e7a536a 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -219,9 +219,8 @@ public class QueryableIndexStorageAdapter implements StorageAdapter } } - // 1) Virtual columns can't vectorize yet - // 2) Vector cursors can't iterate backwards yet - return virtualColumns.size() == 0 && !descending; + // vector cursors can't iterate backwards yet + return virtualColumns.canVectorize(this) && !descending; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java index 217f72f8362..f7299dbad01 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java @@ -26,14 +26,21 @@ import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import javax.annotation.Nullable; import java.util.List; /** - * Virtual columns are "views" created over a ColumnSelectorFactory or ColumnSelector. They can potentially draw from multiple - * underlying columns, although they always present themselves as if they were a single column. + * Virtual columns are "views" created over a {@link ColumnSelectorFactory} or {@link ColumnSelector}. They can + * potentially draw from multiple underlying columns, although they always present themselves as if they were a single + * column. * * A virtual column object will be shared amongst threads and must be thread safe. The selectors returned * from the various makeXXXSelector methods need not be thread safe. @@ -56,59 +63,173 @@ public interface VirtualColumn extends Cacheable * virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which * is useful if this column uses dot notation. The virtual column is expected to apply any * necessary decoration from the dimensionSpec. - * - * @param dimensionSpec the dimensionSpec this column was referenced with - * @param factory column selector factory - * - * @return the selector, must not be null */ DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory factory); /** - * Returns similar DimensionSelector object as returned by {@link #makeDimensionSelector(DimensionSpec, ColumnSelectorFactory)} - * except this method has full access to underlying column and can potentially provide a more efficient implementation. + * Returns similar {@link DimensionSelector} object as returned by + * {@link #makeDimensionSelector(DimensionSpec, ColumnSelectorFactory)} except this method has full access to the + * underlying column and can potentially provide a more efficient implementation. * - * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in - * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. - * - * @param dimensionSpec - * @param columnSelector - * @param offset - * @return the selector + * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called + * in query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. */ @SuppressWarnings("unused") @Nullable - default DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelector columnSelector, ReadableOffset offset) + default DimensionSelector makeDimensionSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableOffset offset + ) { return null; } /** - * Build a selector corresponding to this virtual column. Also provides the name that the + * Build a {@link ColumnValueSelector} corresponding to this virtual column. Also provides the name that the * virtual column was referenced with, which is useful if this column uses dot notation. - * - * @param columnName the name this virtual column was referenced with - * @param factory column selector factory - * - * @return the selector, must not be null */ ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory); /** - * Returns similar ColumnValueSelector object as returned by {@link #makeColumnValueSelector(String, ColumnSelectorFactory)} - * except this method has full access to underlying column and can potentially provide a more efficient implementation. + * Returns similar {@link ColumnValueSelector} object as returned by + * {@link #makeColumnValueSelector(String, ColumnSelectorFactory)} except this method has full access to the + * underlying column and can potentially provide a more efficient implementation. * - * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in - * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. - * - * @param columnName - * @param columnSelector - * @param offset - * @return the selector + * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called + * in query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. */ @SuppressWarnings("unused") @Nullable - default ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelector columnSelector, ReadableOffset offset) + default ColumnValueSelector makeColumnValueSelector( + String columnName, + ColumnSelector columnSelector, + ReadableOffset offset + ) + { + return null; + } + + default boolean canVectorize(ColumnInspector inspector) + { + return false; + } + + /** + * Build a {@link SingleValueDimensionVectorSelector} corresponding to this virtual column. Also provides the name + * that the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful if this + * column uses dot notation. The virtual column is expected to apply any necessary decoration from the + * {@link DimensionSpec}. + */ + default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + throw new UnsupportedOperationException("not supported"); + } + + /** + * Returns similar {@link SingleValueDimensionVectorSelector} object as returned by + * {@link #makeSingleValueVectorDimensionSelector(DimensionSpec, ColumnSelector, ReadableVectorOffset)} except this + * method has full access to the underlying column and can potentially provide a more efficient implementation. + * + * Users of this interface must ensure to first call this method whenever possible. + */ + @SuppressWarnings("unused") + @Nullable + default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + return null; + } + + /** + * Build a {@link MultiValueDimensionVectorSelector} corresponding to this virtual column. Also provides + * the name that the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful + * if this column uses dot notation. The virtual column is expected to apply any necessary decoration from the + * {@link DimensionSpec}. + */ + default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + throw new UnsupportedOperationException("not supported"); + } + + /** + * Returns similar {@link SingleValueDimensionVectorSelector} object as returned by + * {@link #makeSingleValueVectorDimensionSelector(DimensionSpec, ColumnSelector, ReadableVectorOffset)} except this + * method has full access to the underlying column and can potentially provide a more efficient implementation. + * + * Users of this interface must ensure to first call this method whenever possible. + */ + @SuppressWarnings("unused") + @Nullable + default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + return null; + } + + + /** + * Build a {@link VectorValueSelector} corresponding to this virtual column. Also provides the name that the + * virtual column was referenced with, which is useful if this column uses dot notation. + */ + default VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory) + { + throw new UnsupportedOperationException("not supported"); + } + + /** + * Returns similar {@link VectorValueSelector} object as returned by + * {@link #makeVectorValueSelector(String, VectorColumnSelectorFactory)} except this method has full access to the + * underlying column and can potentially provide a more efficient implementation. + * + * Users of this interface must ensure to first call this method whenever possible. + */ + @SuppressWarnings("unused") + @Nullable + default VectorValueSelector makeVectorValueSelector( + String columnName, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + return null; + } + + /** + * Build a {@link VectorObjectSelector} corresponding to this virtual column. Also provides the name that the + * virtual column was referenced with, which is useful if this column uses dot notation. + */ + default VectorObjectSelector makeVectorObjectSelector(String columnName, VectorColumnSelectorFactory factory) + { + throw new UnsupportedOperationException("not supported"); + } + + /** + * Returns similar {@link VectorObjectSelector} object as returned by + * {@link #makeVectorObjectSelector(String, VectorColumnSelectorFactory)} except this method has full access to the + * underlying column and can potentially provide a more efficient implementation. + * + * Users of this interface must ensure to first call this method whenever possible. + */ + @SuppressWarnings("unused") + @Nullable + default VectorObjectSelector makeVectorObjectSelector( + String columnName, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) { return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index 7a8d7d4fa8f..db57bcd8ee7 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -35,6 +35,12 @@ import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.virtual.VirtualizedColumnSelectorFactory; import javax.annotation.Nullable; @@ -152,42 +158,36 @@ public class VirtualColumns implements Cacheable return withDotSupport.get(baseColumnName); } + @Nullable + public BitmapIndex getBitmapIndex(String columnName, ColumnSelector columnSelector) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + return virtualColumn.capabilities(columnName).hasBitmapIndexes() ? virtualColumn.getBitmapIndex( + columnName, + columnSelector + ) : null; + } + /** * Create a dimension (string) selector. * - * @param dimensionSpec the dimensionSpec for this selector - * @param factory base column selector factory - * - * @return selector - * * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} */ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory factory) { - final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); - } else { - final DimensionSelector selector = virtualColumn.makeDimensionSelector(dimensionSpec, factory); - Preconditions.checkNotNull(selector, "selector"); - return selector; - } - } - - @Nullable - public BitmapIndex getBitmapIndex(String columnName, ColumnSelector columnSelector) - { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - return virtualColumn.capabilities(columnName).hasBitmapIndexes() ? virtualColumn.getBitmapIndex( - columnName, - columnSelector - ) : null; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + final DimensionSelector selector = virtualColumn.makeDimensionSelector(dimensionSpec, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; } + /** + * Try to create an optimized dimension (string) selector directly from a {@link ColumnSelector}. If this method + * returns null, callers should try to fallback to + * {@link #makeDimensionSelector(DimensionSpec, ColumnSelectorFactory)} instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ @Nullable public DimensionSelector makeDimensionSelector( DimensionSpec dimensionSpec, @@ -195,14 +195,29 @@ public class VirtualColumns implements Cacheable ReadableOffset offset ) { - final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); - } else { - return virtualColumn.makeDimensionSelector(dimensionSpec, columnSelector, offset); - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + return virtualColumn.makeDimensionSelector(dimensionSpec, columnSelector, offset); } + /** + * Create a column value selector. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + final ColumnValueSelector selector = virtualColumn.makeColumnValueSelector(columnName, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; + } + + /** + * Try to create an optimized value selector directly from a {@link ColumnSelector}. If this method returns null, + * callers should try to fallback to {@link #makeColumnValueSelector(String, ColumnSelectorFactory)} instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ @Nullable public ColumnValueSelector makeColumnValueSelector( String columnName, @@ -210,34 +225,149 @@ public class VirtualColumns implements Cacheable ReadableOffset offset ) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - return virtualColumn.makeColumnValueSelector(columnName, columnSelector, offset); - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + return virtualColumn.makeColumnValueSelector(columnName, columnSelector, offset); + } + + public boolean canVectorize(ColumnInspector columnInspector) + { + return virtualColumns.stream().allMatch(virtualColumn -> virtualColumn.canVectorize(columnInspector)); } /** - * Create a column value selector. - * - * @param columnName column mame - * @param factory base column selector factory - * - * @return selector + * Create a single value dimension vector (string) selector. * * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} */ - public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory) + public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - final ColumnValueSelector selector = virtualColumn.makeColumnValueSelector(columnName, factory); - Preconditions.checkNotNull(selector, "selector"); - return selector; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + final SingleValueDimensionVectorSelector selector = virtualColumn.makeSingleValueVectorDimensionSelector( + dimensionSpec, + factory + ); + Preconditions.checkNotNull(selector, "selector"); + return selector; + } + + /** + * Try to create an optimized single value dimension (string) vector selector, directly from a + * {@link ColumnSelector}. If this method returns null, callers should try to fallback to + * {@link #makeSingleValueDimensionVectorSelector(DimensionSpec, VectorColumnSelectorFactory)} instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + @Nullable + public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + return virtualColumn.makeSingleValueVectorDimensionSelector(dimensionSpec, columnSelector, offset); + } + + /** + * Create a multi value dimension vector (string) selector. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + final MultiValueDimensionVectorSelector selector = virtualColumn.makeMultiValueVectorDimensionSelector( + dimensionSpec, + factory + ); + Preconditions.checkNotNull(selector, "selector"); + return selector; + } + + /** + * Try to create an optimized multi value dimension (string) vector selector, directly from a + * {@link ColumnSelector}. If this method returns null, callers should try to fallback to + * {@link #makeMultiValueDimensionVectorSelector(DimensionSpec, VectorColumnSelectorFactory)} instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + @Nullable + public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + return virtualColumn.makeMultiValueVectorDimensionSelector(dimensionSpec, columnSelector, offset); + } + + /** + * Create a column vector value selector. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + final VectorValueSelector selector = virtualColumn.makeVectorValueSelector(columnName, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; + } + + /** + * Try to create an optimized vector value selector directly from a {@link ColumnSelector}. If this method returns + * null, callers should try to fallback to {@link #makeVectorValueSelector(String, VectorColumnSelectorFactory)} + * instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + @Nullable + public VectorValueSelector makeVectorValueSelector( + String columnName, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + return virtualColumn.makeVectorValueSelector(columnName, columnSelector, offset); + } + + /** + * Create a column vector object selector. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + public VectorObjectSelector makeVectorObjectSelector(String columnName, VectorColumnSelectorFactory factory) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + final VectorObjectSelector selector = virtualColumn.makeVectorObjectSelector(columnName, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; + } + + /** + * Try to create an optimized vector object selector directly from a {@link ColumnSelector}.If this method returns + * null, callers should try to fallback to {@link #makeVectorObjectSelector(String, VectorColumnSelectorFactory)} + * instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + @Nullable + public VectorObjectSelector makeVectorObjectSelector( + String columnName, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + return virtualColumn.makeVectorObjectSelector(columnName, columnSelector, offset); } @Nullable @@ -273,11 +403,6 @@ public class VirtualColumns implements Cacheable return virtualColumns.toArray(new VirtualColumn[0]); } - public int size() - { - return virtualColumns.size(); - } - public ColumnSelectorFactory wrap(final ColumnSelectorFactory baseFactory) { if (virtualColumns.isEmpty()) { @@ -294,6 +419,15 @@ public class VirtualColumns implements Cacheable return new CacheKeyBuilder((byte) 0).appendCacheablesIgnoringOrder(virtualColumns).build(); } + private VirtualColumn getVirtualColumnForSelector(String columnName) + { + VirtualColumn virtualColumn = getVirtualColumn(columnName); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", columnName); + } + return virtualColumn; + } + private void detectCycles(VirtualColumn virtualColumn, @Nullable Set columnNames) { // Copy columnNames to avoid modifying it @@ -339,4 +473,5 @@ public class VirtualColumns implements Cacheable { return virtualColumns.toString(); } + } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index 48c56c9ef6d..a92f0fad91e 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; @@ -33,9 +34,11 @@ import org.apache.druid.segment.column.ValueType; import javax.annotation.Nullable; import java.util.HashMap; import java.util.Map; +import java.util.function.Function; public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSelectorFactory { + private final VirtualColumns virtualColumns; private final QueryableIndex index; private final ReadableVectorOffset offset; private final Closer closer; @@ -52,12 +55,14 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe final QueryableIndex index, final ReadableVectorOffset offset, final Closer closer, - final Map columnCache + final Map columnCache, + final VirtualColumns virtualColumns ) { this.index = index; this.offset = offset; this.closer = closer; + this.virtualColumns = virtualColumns; this.columnCache = columnCache; this.singleValueDimensionSelectorCache = new HashMap<>(); this.multiValueDimensionSelectorCache = new HashMap<>(); @@ -77,34 +82,53 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe if (!dimensionSpec.canVectorize()) { throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec); } - - return multiValueDimensionSelectorCache.computeIfAbsent( - dimensionSpec, - spec -> { - final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); - if (holder == null - || holder.getCapabilities().isDictionaryEncoded().isFalse() - || holder.getCapabilities().getType() != ValueType.STRING - || holder.getCapabilities().hasMultipleValues().isFalse()) { - throw new ISE( - "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", - spec.getDimension() - ); - } - - @SuppressWarnings("unchecked") - final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) - getCachedColumn(spec.getDimension()); - - // dictionaryEncodedColumn is not null because of holder null check above - assert dictionaryEncodedColumn != null; - final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector( - offset - ); - - return spec.decorate(selector); + Function mappingFunction = spec -> { + if (virtualColumns.exists(spec.getDimension())) { + MultiValueDimensionVectorSelector dimensionSelector = virtualColumns.makeMultiValueDimensionVectorSelector( + dimensionSpec, + index, + offset + ); + if (dimensionSelector == null) { + return virtualColumns.makeMultiValueDimensionVectorSelector(dimensionSpec, this); + } else { + return dimensionSelector; } - ); + } + + final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); + if (holder == null + || holder.getCapabilities().isDictionaryEncoded().isFalse() + || holder.getCapabilities().getType() != ValueType.STRING + || holder.getCapabilities().hasMultipleValues().isFalse()) { + throw new ISE( + "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", + spec.getDimension() + ); + } + + @SuppressWarnings("unchecked") + final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) + getCachedColumn(spec.getDimension()); + + // dictionaryEncodedColumn is not null because of holder null check above + assert dictionaryEncodedColumn != null; + final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector( + offset + ); + + return spec.decorate(selector); + }; + + // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through + // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above. + MultiValueDimensionVectorSelector selector = multiValueDimensionSelectorCache.get(dimensionSpec); + if (selector == null) { + selector = mappingFunction.apply(dimensionSpec); + multiValueDimensionSelectorCache.put(dimensionSpec, selector); + } + + return selector; } @Override @@ -114,66 +138,114 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec); } - return singleValueDimensionSelectorCache.computeIfAbsent( - dimensionSpec, - spec -> { - final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); - if (holder == null - || !holder.getCapabilities().isDictionaryEncoded().isTrue() - || holder.getCapabilities().getType() != ValueType.STRING) { - // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. - return NilVectorSelector.create(offset); - } - - if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { - // Asking for a single-value dimension selector on a multi-value column gets you an error. - throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension()); - } - - @SuppressWarnings("unchecked") - final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) - getCachedColumn(spec.getDimension()); - - // dictionaryEncodedColumn is not null because of holder null check above - assert dictionaryEncodedColumn != null; - final SingleValueDimensionVectorSelector selector = - dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset); - - return spec.decorate(selector); + Function mappingFunction = spec -> { + if (virtualColumns.exists(spec.getDimension())) { + SingleValueDimensionVectorSelector dimensionSelector = virtualColumns.makeSingleValueDimensionVectorSelector( + dimensionSpec, + index, + offset + ); + if (dimensionSelector == null) { + return virtualColumns.makeSingleValueDimensionVectorSelector(dimensionSpec, this); + } else { + return dimensionSelector; } - ); + } + + final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); + if (holder == null + || !holder.getCapabilities().isDictionaryEncoded().isTrue() + || holder.getCapabilities().getType() != ValueType.STRING) { + // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. + return NilVectorSelector.create(offset); + } + + if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { + // Asking for a single-value dimension selector on a multi-value column gets you an error. + throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension()); + } + + @SuppressWarnings("unchecked") + final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) + getCachedColumn(spec.getDimension()); + + // dictionaryEncodedColumn is not null because of holder null check above + assert dictionaryEncodedColumn != null; + final SingleValueDimensionVectorSelector selector = + dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset); + + return spec.decorate(selector); + }; + + // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through + // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above. + SingleValueDimensionVectorSelector selector = singleValueDimensionSelectorCache.get(dimensionSpec); + if (selector == null) { + selector = mappingFunction.apply(dimensionSpec); + singleValueDimensionSelectorCache.put(dimensionSpec, selector); + } + + return selector; } @Override public VectorValueSelector makeValueSelector(final String columnName) { - return valueSelectorCache.computeIfAbsent( - columnName, - name -> { - final BaseColumn column = getCachedColumn(name); - if (column == null) { - return NilVectorSelector.create(offset); - } else { - return column.makeVectorValueSelector(offset); - } + Function mappingFunction = name -> { + if (virtualColumns.exists(columnName)) { + VectorValueSelector selector = virtualColumns.makeVectorValueSelector(columnName, index, offset); + if (selector == null) { + return virtualColumns.makeVectorValueSelector(columnName, this); + } else { + return selector; } - ); + } + final BaseColumn column = getCachedColumn(name); + if (column == null) { + return NilVectorSelector.create(offset); + } else { + return column.makeVectorValueSelector(offset); + } + }; + // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through + // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above. + VectorValueSelector columnValueSelector = valueSelectorCache.get(columnName); + if (columnValueSelector == null) { + columnValueSelector = mappingFunction.apply(columnName); + valueSelectorCache.put(columnName, columnValueSelector); + } + + return columnValueSelector; } @Override public VectorObjectSelector makeObjectSelector(final String columnName) { - return objectSelectorCache.computeIfAbsent( - columnName, - name -> { - final BaseColumn column = getCachedColumn(name); - if (column == null) { - return NilVectorSelector.create(offset); - } else { - return column.makeVectorObjectSelector(offset); - } + Function mappingFunction = name -> { + if (virtualColumns.exists(columnName)) { + VectorObjectSelector selector = virtualColumns.makeVectorObjectSelector(columnName, index, offset); + if (selector == null) { + return virtualColumns.makeVectorObjectSelector(columnName, this); + } else { + return selector; } - ); + } + final BaseColumn column = getCachedColumn(name); + if (column == null) { + return NilVectorSelector.create(offset); + } else { + return column.makeVectorObjectSelector(offset); + } + }; + // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through + // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above. + VectorObjectSelector columnValueSelector = objectSelectorCache.get(columnName); + if (columnValueSelector == null) { + columnValueSelector = mappingFunction.apply(columnName); + objectSelectorCache.put(columnName, columnValueSelector); + } + + return columnValueSelector; } @Nullable @@ -193,6 +265,9 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe @Override public ColumnCapabilities getColumnCapabilities(final String columnName) { + if (virtualColumns.exists(columnName)) { + return virtualColumns.getColumnCapabilities(columnName); + } return QueryableIndexStorageAdapter.getColumnCapabilities(index, columnName); } } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java new file mode 100644 index 00000000000..f4411430051 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.CountVectorAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.junit.Assert; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.List; + +/** + * Specialized aggregator factory for testing the selectors produced by {@link AlwaysTwoVectorizedVirtualColumn}, used + * for counting the number of values read so that tests can ensure the correct number of values have been processed. A + * {@link AlwaysTwoCounterVectorAggregator} will be produced, backed by a type appropriate selector for a given + * {@link ColumnCapabilities}. + */ +public class AlwaysTwoCounterAggregatorFactory extends CountAggregatorFactory +{ + private final String fieldName; + + public AlwaysTwoCounterAggregatorFactory(String name, String field) + { + super(name); + this.fieldName = field; + } + + @Override + public Aggregator factorize(ColumnSelectorFactory metricFactory) + { + throw new IllegalStateException("don't call this"); + } + + @Override + public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) + { + throw new IllegalStateException("don't call this"); + } + + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) + { + ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); + switch (capabilities.getType()) { + case LONG: + case DOUBLE: + case FLOAT: + return new AlwaysTwoCounterVectorAggregator(selectorFactory.makeValueSelector(fieldName)); + case STRING: + if (capabilities.isDictionaryEncoded().isTrue()) { + if (capabilities.hasMultipleValues().isTrue()) { + return new AlwaysTwoCounterVectorAggregator(selectorFactory.makeMultiValueDimensionSelector( + DefaultDimensionSpec.of(fieldName))); + } + return new AlwaysTwoCounterVectorAggregator(selectorFactory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(fieldName))); + } + return new AlwaysTwoCounterVectorAggregator(selectorFactory.makeObjectSelector(fieldName)); + default: + throw new IllegalStateException("how did this happen"); + } + } + + public static class AlwaysTwoCounterVectorAggregator extends CountVectorAggregator + { + @Nullable + private final VectorValueSelector valueSelector; + @Nullable + private final VectorObjectSelector objectSelector; + @Nullable + private final SingleValueDimensionVectorSelector singleValueDimensionSelector; + @Nullable + private final MultiValueDimensionVectorSelector multiValueDimensionSelector; + + AlwaysTwoCounterVectorAggregator(VectorValueSelector valueSelector) + { + this(valueSelector, null, null, null); + } + + AlwaysTwoCounterVectorAggregator(VectorObjectSelector objectSelector) + { + this(null, objectSelector, null, null); + } + + AlwaysTwoCounterVectorAggregator(SingleValueDimensionVectorSelector dimSelector) + { + this(null, null, dimSelector, null); + } + + AlwaysTwoCounterVectorAggregator(MultiValueDimensionVectorSelector dimSelector) + { + this(null, null, null, dimSelector); + } + + AlwaysTwoCounterVectorAggregator( + @Nullable VectorValueSelector valueSelector, + @Nullable VectorObjectSelector objectSelector, + @Nullable SingleValueDimensionVectorSelector singleValueDimensionSelector, + @Nullable MultiValueDimensionVectorSelector multiValueDimensionSelector + ) + { + this.valueSelector = valueSelector; + this.objectSelector = objectSelector; + this.singleValueDimensionSelector = singleValueDimensionSelector; + this.multiValueDimensionSelector = multiValueDimensionSelector; + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + if (valueSelector != null) { + final long[] vector = valueSelector.getLongVector(); + + long count = 0; + for (int i = startRow; i < endRow; i++) { + Assert.assertEquals(2L, vector[i]); + count += 1; + } + + buf.putLong(position, buf.getLong(position) + count); + return; + } + if (objectSelector != null) { + final Object[] vector = objectSelector.getObjectVector(); + + long count = 0; + for (int i = startRow; i < endRow; i++) { + if (vector[i] instanceof List) { + Assert.assertEquals(ImmutableList.of("2", "2"), vector[i]); + count += 2; + } else { + Assert.assertEquals("2", vector[i]); + count += 1; + } + } + + buf.putLong(position, buf.getLong(position) + count); + return; + } + if (singleValueDimensionSelector != null) { + final int[] rowVector = singleValueDimensionSelector.getRowVector(); + + long count = 0; + for (int i = startRow; i < endRow; i++) { + Assert.assertEquals("2", singleValueDimensionSelector.lookupName(rowVector[i])); + count += 1; + } + + buf.putLong(position, buf.getLong(position) + count); + return; + } + if (multiValueDimensionSelector != null) { + final IndexedInts[] rowVector = multiValueDimensionSelector.getRowVector(); + + long count = 0; + for (int i = startRow; i < endRow; i++) { + //noinspection SSBasedInspection + for (int j = 0; j < rowVector[i].size(); j++) { + Assert.assertEquals("2", multiValueDimensionSelector.lookupName(rowVector[i].get(j))); + count += 1; + } + } + + buf.putLong(position, buf.getLong(position) + count); + return; + } + throw new IllegalStateException("how did this happen"); + } + + @Override + public void aggregate( + ByteBuffer buf, + int numRows, + int[] positions, + @Nullable int[] rows, + int positionOffset + ) + { + if (valueSelector != null) { + final long[] vector = valueSelector.getLongVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + Assert.assertEquals(2L, vector[i]); + buf.putLong(position, buf.getLong(position) + 1); + } + return; + } + if (objectSelector != null) { + final Object[] vector = objectSelector.getObjectVector(); + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + if (vector[i] instanceof List) { + Assert.assertEquals(ImmutableList.of("2", "2"), vector[i]); + buf.putLong(position, buf.getLong(position) + 2); + } else { + Assert.assertEquals("2", vector[i]); + buf.putLong(position, buf.getLong(position) + 1); + } + } + return; + } + if (singleValueDimensionSelector != null) { + final int[] rowVector = singleValueDimensionSelector.getRowVector(); + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + Assert.assertEquals("2", singleValueDimensionSelector.lookupName(rowVector[i])); + buf.putLong(position, buf.getLong(position) + 1); + } + return; + } + if (multiValueDimensionSelector != null) { + final IndexedInts[] rowVector = multiValueDimensionSelector.getRowVector(); + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + //noinspection SSBasedInspection + for (int j = 0; j < rowVector[i].size(); j++) { + Assert.assertEquals("2", multiValueDimensionSelector.lookupName(rowVector[i].get(j))); + buf.putLong(position, buf.getLong(position) + 1); + } + } + return; + } + throw new IllegalStateException("how did this happen"); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java new file mode 100644 index 00000000000..78d031bb520 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.data.ArrayBasedIndexedInts; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorSizeInspector; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.junit.Assert; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.List; + +/** + * {@link VirtualColumn} which only can produce all kinds of vector selectors and report any type of + * {@link ColumnCapabilities} + */ +public class AlwaysTwoVectorizedVirtualColumn implements VirtualColumn +{ + private final String outputName; + private final ColumnCapabilities capabilities; + private final boolean dictionaryEncoded; + + public AlwaysTwoVectorizedVirtualColumn( + String name, + ColumnCapabilities capabilites + ) + { + this.outputName = name; + this.capabilities = capabilites; + this.dictionaryEncoded = capabilites.isDictionaryEncoded().isTrue() && + capabilites.areDictionaryValuesUnique().isTrue(); + } + + @Override + public boolean canVectorize(ColumnInspector inspector) + { + Assert.assertNotNull(inspector); + return true; + } + + @Override + public String getOutputName() + { + return outputName; + } + + @Override + public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory factory) + { + throw new IllegalStateException("don't call this"); + } + + @Override + public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory) + { + throw new IllegalStateException("don't call this"); + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + Assert.assertEquals(outputName, dimensionSpec.getOutputName()); + return new SingleValueDimensionVectorSelector() + { + private final VectorSizeInspector inspector = factory.getVectorSizeInspector(); + private final int[] rowVector = new int[inspector.getMaxVectorSize()]; + + @Override + public int[] getRowVector() + { + + return rowVector; + } + + @Override + public int getValueCardinality() + { + return dictionaryEncoded ? 1 : CARDINALITY_UNKNOWN; + } + + @Nullable + @Override + public String lookupName(int id) + { + return "2"; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return dictionaryEncoded; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public int getMaxVectorSize() + { + return inspector.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return inspector.getCurrentVectorSize(); + } + }; + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + Assert.assertEquals(outputName, dimensionSpec.getOutputName()); + final IndexedInts[] rowVector = new IndexedInts[factory.getVectorSizeInspector().getMaxVectorSize()]; + Arrays.fill(rowVector, new ArrayBasedIndexedInts(new int[]{0, 0})); + return new MultiValueDimensionVectorSelector() + { + private final VectorSizeInspector inspector = factory.getVectorSizeInspector(); + + @Override + public IndexedInts[] getRowVector() + { + return rowVector; + } + + @Override + public int getValueCardinality() + { + return dictionaryEncoded ? 1 : CARDINALITY_UNKNOWN; + } + + @Nullable + @Override + public String lookupName(int id) + { + return "2"; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return dictionaryEncoded; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public int getMaxVectorSize() + { + return inspector.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return inspector.getCurrentVectorSize(); + } + }; + } + + @Override + public VectorValueSelector makeVectorValueSelector( + String columnName, + VectorColumnSelectorFactory factory + ) + { + Assert.assertEquals(outputName, columnName); + final long[] longs = new long[factory.getVectorSizeInspector().getMaxVectorSize()]; + final double[] doubles = new double[factory.getVectorSizeInspector().getMaxVectorSize()]; + final float[] floats = new float[factory.getVectorSizeInspector().getMaxVectorSize()]; + Arrays.fill(longs, 2L); + Arrays.fill(doubles, 2.0); + Arrays.fill(floats, 2.0f); + return new VectorValueSelector() + { + @Override + public long[] getLongVector() + { + return longs; + } + + @Override + public float[] getFloatVector() + { + return floats; + } + + @Override + public double[] getDoubleVector() + { + return doubles; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + return null; + } + + @Override + public int getMaxVectorSize() + { + return factory.getVectorSizeInspector().getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return factory.getVectorSizeInspector().getCurrentVectorSize(); + } + }; + } + + @Override + public VectorObjectSelector makeVectorObjectSelector( + String columnName, + VectorColumnSelectorFactory factory + ) + { + Assert.assertEquals(outputName, columnName); + final Object[] objects = new Object[factory.getVectorSizeInspector().getMaxVectorSize()]; + if (capabilities.hasMultipleValues().isTrue()) { + Arrays.fill(objects, ImmutableList.of("2", "2")); + } else { + Arrays.fill(objects, "2"); + } + return new VectorObjectSelector() + { + @Override + public int getMaxVectorSize() + { + return factory.getVectorSizeInspector().getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return factory.getVectorSizeInspector().getCurrentVectorSize(); + } + + @Override + public Object[] getObjectVector() + { + return objects; + } + }; + } + + @Override + public ColumnCapabilities capabilities(String columnName) + { + return capabilities; + } + + @Override + public List requiredColumns() + { + return ImmutableList.of(); + } + + @Override + public boolean usesDotNotation() + { + return false; + } + + @Override + public byte[] getCacheKey() + { + return new byte[0]; + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java new file mode 100644 index 00000000000..ed6d1b4fe8a --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.Result; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTestHelper; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.TestIndex; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.timeline.SegmentId; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class VectorizedVirtualColumnTest +{ + private static final String ALWAYS_TWO = "two"; + private static final String COUNT = "count"; + private static final Map CONTEXT = ImmutableMap.of(QueryContexts.VECTORIZE_KEY, "force"); + + @Rule + public final TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + private AggregationTestHelper groupByTestHelper; + private AggregationTestHelper timeseriesTestHelper; + private List segments = null; + + @Before + public void setup() + { + groupByTestHelper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + Collections.emptyList(), + new GroupByQueryConfig(), + tmpFolder + ); + timeseriesTestHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( + Collections.emptyList(), + tmpFolder + ); + QueryableIndexSegment queryableIndexSegment = new QueryableIndexSegment( + TestIndex.getMMappedTestIndex(), + SegmentId.dummy(QueryRunnerTestHelper.DATA_SOURCE) + ); + + segments = Lists.newArrayList(queryableIndexSegment, queryableIndexSegment); + } + + @Test + public void testGroupBySingleValueString() + { + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(false) + ); + } + + @Test + public void testGroupByMultiValueString() + { + // cannot currently group by string columns that might be multi valued + cannotVectorize(); + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(true) + ); + } + + @Test + public void testGroupByMultiValueStringUnknown() + { + // cannot currently group by string columns that might be multi valued + cannotVectorize(); + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + ); + } + + @Test + public void testGroupBySingleValueStringNotDictionaryEncoded() + { + // cannot currently group by string columns that are not dictionary encoded + cannotVectorize(); + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(false) + ); + } + + @Test + public void testGroupByMultiValueStringNotDictionaryEncoded() + { + // cannot currently group by string columns that might be multi valued + cannotVectorize(); + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(true) + ); + } + + @Test + public void testGroupByLong() + { + testGroupBy(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)); + } + + @Test + public void testGroupByDouble() + { + testGroupBy(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE)); + } + + @Test + public void testGroupByFloat() + { + testGroupBy(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT)); + } + + @Test + public void testTimeseriesSingleValueString() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(false) + ); + } + + @Test + public void testTimeseriesMultiValueString() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(true) + ); + } + + @Test + public void testTimeseriesMultiValueStringUnknown() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + ); + } + + @Test + public void testTimeseriesSingleValueStringNotDictionaryEncoded() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(false) + ); + } + + @Test + public void testTimeseriesMultiValueStringNotDictionaryEncoded() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(true) + ); + } + + @Test + public void testTimeseriesLong() + { + testTimeseries(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)); + } + + @Test + public void testTimeseriesDouble() + { + testTimeseries(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE)); + } + + @Test + public void testTimeseriesFloat() + { + testTimeseries(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT)); + } + + private void testTimeseries(ColumnCapabilities capabilities) + { + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .intervals("2000/2030") + .dataSource(QueryRunnerTestHelper.DATA_SOURCE) + .granularity(Granularities.ALL) + .virtualColumns(new AlwaysTwoVectorizedVirtualColumn(ALWAYS_TWO, capabilities)) + .aggregators(new AlwaysTwoCounterAggregatorFactory(COUNT, ALWAYS_TWO)) + .context(CONTEXT) + .build(); + + Sequence seq = timeseriesTestHelper.runQueryOnSegmentsObjs(segments, query); + + List> expectedResults = ImmutableList.of( + new Result<>( + DateTimes.of("2011-01-12T00:00:00.000Z"), + new TimeseriesResultValue( + ImmutableMap.of(COUNT, getCount(capabilities)) + ) + ) + ); + + TestHelper.assertExpectedObjects(expectedResults, seq.toList(), "failed"); + } + + private void testGroupBy(ColumnCapabilities capabilities) + { + GroupByQuery query = new GroupByQuery.Builder() + .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new AlwaysTwoVectorizedVirtualColumn(ALWAYS_TWO, capabilities) + ) + .addDimension(new DefaultDimensionSpec(ALWAYS_TWO, ALWAYS_TWO, capabilities.getType())) + .setAggregatorSpecs(new AlwaysTwoCounterAggregatorFactory(COUNT, ALWAYS_TWO)) + .setInterval("2000/2030") + .setContext(CONTEXT) + .addOrderByColumn(ALWAYS_TWO) + .build(); + + List rows = groupByTestHelper.runQueryOnSegmentsObjs(segments, query).toList(); + + List expectedRows = Collections.singletonList( + GroupByQueryRunnerTestHelper.createExpectedRow( + query, + "2000", + COUNT, + getCount(capabilities), + ALWAYS_TWO, + getTwo(capabilities) + ) + ); + + TestHelper.assertExpectedObjects(expectedRows, rows, "failed"); + } + + private long getCount(ColumnCapabilities capabilities) + { + long modifier = 1L; + if (capabilities.hasMultipleValues().isTrue()) { + modifier = 2L; + } + return 2418L * modifier; + } + + private Object getTwo(ColumnCapabilities capabilities) + { + switch (capabilities.getType()) { + case LONG: + return 2L; + case DOUBLE: + return 2.0; + case FLOAT: + return 2.0f; + case STRING: + default: + if (capabilities.hasMultipleValues().isTrue()) { + return ImmutableList.of("2", "2"); + } + return "2"; + } + } + + private void cannotVectorize() + { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("Cannot vectorize!"); + } +}