mirror of https://github.com/apache/druid.git
vectorize constant expressions with optimized selectors (#10440)
This commit is contained in:
parent
2be1ae128f
commit
753bce324b
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.vector;
|
||||
|
||||
import org.apache.druid.segment.IdLookup;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class ConstantVectorSelectors
|
||||
{
|
||||
public static VectorValueSelector vectorValueSelector(VectorSizeInspector inspector, @Nullable Number constant)
|
||||
{
|
||||
if (constant == null) {
|
||||
return NilVectorSelector.create(inspector);
|
||||
}
|
||||
final long[] longVector = new long[inspector.getMaxVectorSize()];
|
||||
final float[] floatVector = new float[inspector.getMaxVectorSize()];
|
||||
final double[] doubleVector = new double[inspector.getMaxVectorSize()];
|
||||
Arrays.fill(longVector, constant.longValue());
|
||||
Arrays.fill(floatVector, constant.floatValue());
|
||||
Arrays.fill(doubleVector, constant.doubleValue());
|
||||
return new VectorValueSelector()
|
||||
{
|
||||
@Override
|
||||
public long[] getLongVector()
|
||||
{
|
||||
return longVector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] getFloatVector()
|
||||
{
|
||||
return floatVector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double[] getDoubleVector()
|
||||
{
|
||||
return doubleVector;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public boolean[] getNullVector()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxVectorSize()
|
||||
{
|
||||
return inspector.getMaxVectorSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentVectorSize()
|
||||
{
|
||||
return inspector.getCurrentVectorSize();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static VectorObjectSelector vectorObjectSelector(
|
||||
VectorSizeInspector inspector,
|
||||
@Nullable Object object
|
||||
)
|
||||
{
|
||||
if (object == null) {
|
||||
return NilVectorSelector.create(inspector);
|
||||
}
|
||||
|
||||
final Object[] objects = new Object[inspector.getMaxVectorSize()];
|
||||
Arrays.fill(objects, object);
|
||||
|
||||
return new VectorObjectSelector()
|
||||
{
|
||||
@Override
|
||||
public Object[] getObjectVector()
|
||||
{
|
||||
return objects;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxVectorSize()
|
||||
{
|
||||
return inspector.getMaxVectorSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentVectorSize()
|
||||
{
|
||||
return inspector.getCurrentVectorSize();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static SingleValueDimensionVectorSelector singleValueDimensionVectorSelector(
|
||||
VectorSizeInspector inspector,
|
||||
@Nullable String value
|
||||
)
|
||||
{
|
||||
if (value == null) {
|
||||
return NilVectorSelector.create(inspector);
|
||||
}
|
||||
|
||||
final int[] row = new int[inspector.getMaxVectorSize()];
|
||||
return new SingleValueDimensionVectorSelector()
|
||||
{
|
||||
@Override
|
||||
public int[] getRowVector()
|
||||
{
|
||||
return row;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCardinality()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String lookupName(int id)
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean nameLookupPossibleInAdvance()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public IdLookup idLookup()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxVectorSize()
|
||||
{
|
||||
return inspector.getMaxVectorSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentVectorSize()
|
||||
{
|
||||
return inspector.getCurrentVectorSize();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -100,6 +100,11 @@ public class ExpressionPlan
|
|||
this.unappliedInputs = unappliedInputs;
|
||||
}
|
||||
|
||||
public boolean isConstant()
|
||||
{
|
||||
return analysis.getRequiredBindings().isEmpty();
|
||||
}
|
||||
|
||||
public Expr getExpression()
|
||||
{
|
||||
return expression;
|
||||
|
|
|
@ -23,8 +23,11 @@ import com.google.common.base.Preconditions;
|
|||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.math.expr.ExprType;
|
||||
import org.apache.druid.math.expr.vector.ExprVectorProcessor;
|
||||
import org.apache.druid.query.expression.ExprUtils;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.vector.ConstantVectorSelectors;
|
||||
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
|
||||
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
|
||||
import org.apache.druid.segment.vector.VectorObjectSelector;
|
||||
import org.apache.druid.segment.vector.VectorValueSelector;
|
||||
|
@ -38,6 +41,22 @@ public class ExpressionVectorSelectors
|
|||
// No instantiation.
|
||||
}
|
||||
|
||||
public static SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(
|
||||
VectorColumnSelectorFactory factory,
|
||||
Expr expression
|
||||
)
|
||||
{
|
||||
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
|
||||
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
|
||||
// only constant expressions are currently supported, nothing else should get here
|
||||
|
||||
if (plan.isConstant()) {
|
||||
String constant = plan.getExpression().eval(ExprUtils.nilBindings()).asString();
|
||||
return ConstantVectorSelectors.singleValueDimensionVectorSelector(factory.getVectorSizeInspector(), constant);
|
||||
}
|
||||
throw new IllegalStateException("Only constant expressions currently support dimension selectors");
|
||||
}
|
||||
|
||||
public static VectorValueSelector makeVectorValueSelector(
|
||||
VectorColumnSelectorFactory factory,
|
||||
Expr expression
|
||||
|
@ -45,6 +64,13 @@ public class ExpressionVectorSelectors
|
|||
{
|
||||
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
|
||||
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
|
||||
|
||||
if (plan.isConstant()) {
|
||||
return ConstantVectorSelectors.vectorValueSelector(
|
||||
factory.getVectorSizeInspector(),
|
||||
(Number) plan.getExpression().eval(ExprUtils.nilBindings()).value()
|
||||
);
|
||||
}
|
||||
final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory);
|
||||
final ExprVectorProcessor<?> processor = plan.getExpression().buildVectorized(bindings);
|
||||
return new ExpressionVectorValueSelector(processor, bindings);
|
||||
|
@ -57,6 +83,14 @@ public class ExpressionVectorSelectors
|
|||
{
|
||||
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
|
||||
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
|
||||
|
||||
if (plan.isConstant()) {
|
||||
return ConstantVectorSelectors.vectorObjectSelector(
|
||||
factory.getVectorSizeInspector(),
|
||||
plan.getExpression().eval(ExprUtils.nilBindings()).value()
|
||||
);
|
||||
}
|
||||
|
||||
final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory);
|
||||
final ExprVectorProcessor<?> processor = plan.getExpression().buildVectorized(bindings);
|
||||
return new ExpressionVectorObjectSelector(processor, bindings);
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.druid.segment.VirtualColumn;
|
|||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
|
||||
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
|
||||
import org.apache.druid.segment.vector.VectorObjectSelector;
|
||||
import org.apache.druid.segment.vector.VectorValueSelector;
|
||||
|
@ -146,6 +147,15 @@ public class ExpressionVirtualColumn implements VirtualColumn
|
|||
return plan.is(ExpressionPlan.Trait.VECTORIZABLE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector(
|
||||
DimensionSpec dimensionSpec,
|
||||
VectorColumnSelectorFactory factory
|
||||
)
|
||||
{
|
||||
return ExpressionVectorSelectors.makeSingleValueDimensionVectorSelector(factory, parsedExpression.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory)
|
||||
{
|
||||
|
@ -200,6 +210,9 @@ public class ExpressionVirtualColumn implements VirtualColumn
|
|||
return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(outputType);
|
||||
}
|
||||
|
||||
// array types shouldn't escape the expression system currently, so coerce anything past this point into some
|
||||
// style of string
|
||||
|
||||
// we don't have to check for unknown input here because output type is unable to be inferred if we don't know
|
||||
// the complete set of input types
|
||||
if (plan.any(ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.NEEDS_APPLIED)) {
|
||||
|
@ -207,7 +220,16 @@ public class ExpressionVirtualColumn implements VirtualColumn
|
|||
return new ColumnCapabilitiesImpl().setType(ValueType.STRING).setHasMultipleValues(true);
|
||||
}
|
||||
|
||||
// if we got here, lets call it single value string output
|
||||
// constant strings are supported as dimension selectors, set them as dictionary encoded and unique
|
||||
if (plan.isConstant()) {
|
||||
return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
|
||||
.setDictionaryEncoded(true)
|
||||
.setDictionaryValuesUnique(true)
|
||||
.setDictionaryValuesSorted(true)
|
||||
.setHasMultipleValues(false);
|
||||
}
|
||||
|
||||
// if we got here, lets call it single value string output, non-dictionary encoded
|
||||
return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
|
||||
.setHasMultipleValues(false)
|
||||
.setDictionaryEncoded(false);
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.druid.math.expr.Expr;
|
|||
import org.apache.druid.math.expr.ExprMacroTable;
|
||||
import org.apache.druid.math.expr.ExprType;
|
||||
import org.apache.druid.math.expr.Parser;
|
||||
import org.apache.druid.query.dimension.DefaultDimensionSpec;
|
||||
import org.apache.druid.query.expression.TestExprMacroTable;
|
||||
import org.apache.druid.segment.ColumnInspector;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
@ -39,6 +40,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
|
|||
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
|
||||
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
|
||||
import org.apache.druid.segment.generator.SegmentGenerator;
|
||||
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
|
||||
import org.apache.druid.segment.vector.VectorCursor;
|
||||
import org.apache.druid.segment.vector.VectorObjectSelector;
|
||||
import org.apache.druid.segment.vector.VectorValueSelector;
|
||||
|
@ -75,7 +77,11 @@ public class ExpressionVectorSelectorsTest
|
|||
"parse_long(string1)",
|
||||
"parse_long(string1) * double3",
|
||||
"parse_long(string5) * parse_long(string1)",
|
||||
"parse_long(string5) * parse_long(string1) * double3"
|
||||
"parse_long(string5) * parse_long(string1) * double3",
|
||||
"'string constant'",
|
||||
"1",
|
||||
"192412.24124",
|
||||
"null"
|
||||
);
|
||||
|
||||
private static final int ROWS_PER_SEGMENT = 100_000;
|
||||
|
@ -167,7 +173,8 @@ public class ExpressionVectorSelectorsTest
|
|||
)
|
||||
)
|
||||
);
|
||||
VectorCursor cursor = new QueryableIndexStorageAdapter(index).makeVectorCursor(
|
||||
final QueryableIndexStorageAdapter storageAdapter = new QueryableIndexStorageAdapter(index);
|
||||
VectorCursor cursor = storageAdapter.makeVectorCursor(
|
||||
null,
|
||||
index.getDataInterval(),
|
||||
virtualColumns,
|
||||
|
@ -176,40 +183,55 @@ public class ExpressionVectorSelectorsTest
|
|||
null
|
||||
);
|
||||
|
||||
VectorValueSelector selector = null;
|
||||
VectorObjectSelector objectSelector = null;
|
||||
if (outputType.isNumeric()) {
|
||||
selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
|
||||
} else {
|
||||
objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
|
||||
}
|
||||
int rowCount = 0;
|
||||
while (!cursor.isDone()) {
|
||||
boolean[] nulls;
|
||||
switch (outputType) {
|
||||
case LONG:
|
||||
nulls = selector.getNullVector();
|
||||
long[] longs = selector.getLongVector();
|
||||
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
|
||||
results.add(nulls != null && nulls[i] ? null : longs[i]);
|
||||
}
|
||||
break;
|
||||
case DOUBLE:
|
||||
nulls = selector.getNullVector();
|
||||
double[] doubles = selector.getDoubleVector();
|
||||
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
|
||||
results.add(nulls != null && nulls[i] ? null : doubles[i]);
|
||||
}
|
||||
break;
|
||||
case STRING:
|
||||
Object[] objects = objectSelector.getObjectVector();
|
||||
for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
|
||||
results.add(objects[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
ColumnCapabilities capabilities = virtualColumns.getColumnCapabilities(storageAdapter, "v");
|
||||
|
||||
cursor.advance();
|
||||
int rowCount = 0;
|
||||
if (capabilities.isDictionaryEncoded().isTrue()) {
|
||||
SingleValueDimensionVectorSelector selector = cursor.getColumnSelectorFactory().makeSingleValueDimensionSelector(
|
||||
DefaultDimensionSpec.of("v")
|
||||
);
|
||||
while (!cursor.isDone()) {
|
||||
int[] row = selector.getRowVector();
|
||||
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
|
||||
results.add(selector.lookupName(row[i]));
|
||||
}
|
||||
cursor.advance();
|
||||
}
|
||||
} else {
|
||||
VectorValueSelector selector = null;
|
||||
VectorObjectSelector objectSelector = null;
|
||||
if (outputType.isNumeric()) {
|
||||
selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
|
||||
} else {
|
||||
objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
|
||||
}
|
||||
while (!cursor.isDone()) {
|
||||
boolean[] nulls;
|
||||
switch (outputType) {
|
||||
case LONG:
|
||||
nulls = selector.getNullVector();
|
||||
long[] longs = selector.getLongVector();
|
||||
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
|
||||
results.add(nulls != null && nulls[i] ? null : longs[i]);
|
||||
}
|
||||
break;
|
||||
case DOUBLE:
|
||||
nulls = selector.getNullVector();
|
||||
double[] doubles = selector.getDoubleVector();
|
||||
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
|
||||
results.add(nulls != null && nulls[i] ? null : doubles[i]);
|
||||
}
|
||||
break;
|
||||
case STRING:
|
||||
Object[] objects = objectSelector.getObjectVector();
|
||||
for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
|
||||
results.add(objects[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
cursor.advance();
|
||||
}
|
||||
}
|
||||
closer.register(cursor);
|
||||
|
||||
|
@ -224,10 +246,15 @@ public class ExpressionVectorSelectorsTest
|
|||
|
||||
int rowCountCursor = cursors
|
||||
.map(nonVectorized -> {
|
||||
final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory()
|
||||
.makeColumnValueSelector("v");
|
||||
int rows = 0;
|
||||
while (!nonVectorized.isDone()) {
|
||||
Assert.assertEquals(StringUtils.format("Failed at row %s", rows), nonSelector.getObject(), results.get(rows));
|
||||
Assert.assertEquals(
|
||||
StringUtils.format("Failed at row %s", rows),
|
||||
nonSelector.getObject(),
|
||||
results.get(rows)
|
||||
);
|
||||
rows++;
|
||||
nonVectorized.advance();
|
||||
}
|
||||
|
|
|
@ -92,6 +92,7 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
|
|||
"SELECT SUM(PARSE_LONG(string1)) FROM foo",
|
||||
"SELECT SUM(PARSE_LONG(string3)) FROM foo",
|
||||
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
|
||||
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo WHERE string2 = '10' GROUP BY 1,2 ORDER BY 3",
|
||||
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
|
||||
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
|
||||
"SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
|
||||
|
|
Loading…
Reference in New Issue