vectorize constant expressions with optimized selectors (#10440)

This commit is contained in:
Clint Wylie 2020-09-29 13:19:06 -07:00 committed by GitHub
parent 2be1ae128f
commit 753bce324b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 299 additions and 38 deletions

View File

@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.vector;
import org.apache.druid.segment.IdLookup;
import javax.annotation.Nullable;
import java.util.Arrays;
public class ConstantVectorSelectors
{
public static VectorValueSelector vectorValueSelector(VectorSizeInspector inspector, @Nullable Number constant)
{
if (constant == null) {
return NilVectorSelector.create(inspector);
}
final long[] longVector = new long[inspector.getMaxVectorSize()];
final float[] floatVector = new float[inspector.getMaxVectorSize()];
final double[] doubleVector = new double[inspector.getMaxVectorSize()];
Arrays.fill(longVector, constant.longValue());
Arrays.fill(floatVector, constant.floatValue());
Arrays.fill(doubleVector, constant.doubleValue());
return new VectorValueSelector()
{
@Override
public long[] getLongVector()
{
return longVector;
}
@Override
public float[] getFloatVector()
{
return floatVector;
}
@Override
public double[] getDoubleVector()
{
return doubleVector;
}
@Nullable
@Override
public boolean[] getNullVector()
{
return null;
}
@Override
public int getMaxVectorSize()
{
return inspector.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return inspector.getCurrentVectorSize();
}
};
}
public static VectorObjectSelector vectorObjectSelector(
VectorSizeInspector inspector,
@Nullable Object object
)
{
if (object == null) {
return NilVectorSelector.create(inspector);
}
final Object[] objects = new Object[inspector.getMaxVectorSize()];
Arrays.fill(objects, object);
return new VectorObjectSelector()
{
@Override
public Object[] getObjectVector()
{
return objects;
}
@Override
public int getMaxVectorSize()
{
return inspector.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return inspector.getCurrentVectorSize();
}
};
}
public static SingleValueDimensionVectorSelector singleValueDimensionVectorSelector(
VectorSizeInspector inspector,
@Nullable String value
)
{
if (value == null) {
return NilVectorSelector.create(inspector);
}
final int[] row = new int[inspector.getMaxVectorSize()];
return new SingleValueDimensionVectorSelector()
{
@Override
public int[] getRowVector()
{
return row;
}
@Override
public int getValueCardinality()
{
return 1;
}
@Nullable
@Override
public String lookupName(int id)
{
return value;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return null;
}
@Override
public int getMaxVectorSize()
{
return inspector.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return inspector.getCurrentVectorSize();
}
};
}
}

View File

@ -100,6 +100,11 @@ public class ExpressionPlan
this.unappliedInputs = unappliedInputs;
}
public boolean isConstant()
{
return analysis.getRequiredBindings().isEmpty();
}
public Expr getExpression()
{
return expression;

View File

@ -23,8 +23,11 @@ import com.google.common.base.Preconditions;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.vector.ExprVectorProcessor;
import org.apache.druid.query.expression.ExprUtils;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.vector.ConstantVectorSelectors;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.segment.vector.VectorValueSelector;
@ -38,6 +41,22 @@ public class ExpressionVectorSelectors
// No instantiation.
}
public static SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(
VectorColumnSelectorFactory factory,
Expr expression
)
{
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
// only constant expressions are currently supported, nothing else should get here
if (plan.isConstant()) {
String constant = plan.getExpression().eval(ExprUtils.nilBindings()).asString();
return ConstantVectorSelectors.singleValueDimensionVectorSelector(factory.getVectorSizeInspector(), constant);
}
throw new IllegalStateException("Only constant expressions currently support dimension selectors");
}
public static VectorValueSelector makeVectorValueSelector(
VectorColumnSelectorFactory factory,
Expr expression
@ -45,6 +64,13 @@ public class ExpressionVectorSelectors
{
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
if (plan.isConstant()) {
return ConstantVectorSelectors.vectorValueSelector(
factory.getVectorSizeInspector(),
(Number) plan.getExpression().eval(ExprUtils.nilBindings()).value()
);
}
final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory);
final ExprVectorProcessor<?> processor = plan.getExpression().buildVectorized(bindings);
return new ExpressionVectorValueSelector(processor, bindings);
@ -57,6 +83,14 @@ public class ExpressionVectorSelectors
{
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
if (plan.isConstant()) {
return ConstantVectorSelectors.vectorObjectSelector(
factory.getVectorSizeInspector(),
plan.getExpression().eval(ExprUtils.nilBindings()).value()
);
}
final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory);
final ExprVectorProcessor<?> processor = plan.getExpression().buildVectorized(bindings);
return new ExpressionVectorObjectSelector(processor, bindings);

View File

@ -42,6 +42,7 @@ import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.segment.vector.VectorValueSelector;
@ -146,6 +147,15 @@ public class ExpressionVirtualColumn implements VirtualColumn
return plan.is(ExpressionPlan.Trait.VECTORIZABLE);
}
@Override
public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector(
DimensionSpec dimensionSpec,
VectorColumnSelectorFactory factory
)
{
return ExpressionVectorSelectors.makeSingleValueDimensionVectorSelector(factory, parsedExpression.get());
}
@Override
public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory)
{
@ -200,6 +210,9 @@ public class ExpressionVirtualColumn implements VirtualColumn
return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(outputType);
}
// array types shouldn't escape the expression system currently, so coerce anything past this point into some
// style of string
// we don't have to check for unknown input here because output type is unable to be inferred if we don't know
// the complete set of input types
if (plan.any(ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.NEEDS_APPLIED)) {
@ -207,7 +220,16 @@ public class ExpressionVirtualColumn implements VirtualColumn
return new ColumnCapabilitiesImpl().setType(ValueType.STRING).setHasMultipleValues(true);
}
// if we got here, lets call it single value string output
// constant strings are supported as dimension selectors, set them as dictionary encoded and unique
if (plan.isConstant()) {
return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setDictionaryEncoded(true)
.setDictionaryValuesUnique(true)
.setDictionaryValuesSorted(true)
.setHasMultipleValues(false);
}
// if we got here, lets call it single value string output, non-dictionary encoded
return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setHasMultipleValues(false)
.setDictionaryEncoded(false);

View File

@ -28,6 +28,7 @@ import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.ColumnValueSelector;
@ -39,6 +40,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorCursor;
import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.segment.vector.VectorValueSelector;
@ -75,7 +77,11 @@ public class ExpressionVectorSelectorsTest
"parse_long(string1)",
"parse_long(string1) * double3",
"parse_long(string5) * parse_long(string1)",
"parse_long(string5) * parse_long(string1) * double3"
"parse_long(string5) * parse_long(string1) * double3",
"'string constant'",
"1",
"192412.24124",
"null"
);
private static final int ROWS_PER_SEGMENT = 100_000;
@ -167,7 +173,8 @@ public class ExpressionVectorSelectorsTest
)
)
);
VectorCursor cursor = new QueryableIndexStorageAdapter(index).makeVectorCursor(
final QueryableIndexStorageAdapter storageAdapter = new QueryableIndexStorageAdapter(index);
VectorCursor cursor = storageAdapter.makeVectorCursor(
null,
index.getDataInterval(),
virtualColumns,
@ -176,40 +183,55 @@ public class ExpressionVectorSelectorsTest
null
);
VectorValueSelector selector = null;
VectorObjectSelector objectSelector = null;
if (outputType.isNumeric()) {
selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
} else {
objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
}
int rowCount = 0;
while (!cursor.isDone()) {
boolean[] nulls;
switch (outputType) {
case LONG:
nulls = selector.getNullVector();
long[] longs = selector.getLongVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(nulls != null && nulls[i] ? null : longs[i]);
}
break;
case DOUBLE:
nulls = selector.getNullVector();
double[] doubles = selector.getDoubleVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(nulls != null && nulls[i] ? null : doubles[i]);
}
break;
case STRING:
Object[] objects = objectSelector.getObjectVector();
for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
results.add(objects[i]);
}
break;
}
ColumnCapabilities capabilities = virtualColumns.getColumnCapabilities(storageAdapter, "v");
cursor.advance();
int rowCount = 0;
if (capabilities.isDictionaryEncoded().isTrue()) {
SingleValueDimensionVectorSelector selector = cursor.getColumnSelectorFactory().makeSingleValueDimensionSelector(
DefaultDimensionSpec.of("v")
);
while (!cursor.isDone()) {
int[] row = selector.getRowVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(selector.lookupName(row[i]));
}
cursor.advance();
}
} else {
VectorValueSelector selector = null;
VectorObjectSelector objectSelector = null;
if (outputType.isNumeric()) {
selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
} else {
objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
}
while (!cursor.isDone()) {
boolean[] nulls;
switch (outputType) {
case LONG:
nulls = selector.getNullVector();
long[] longs = selector.getLongVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(nulls != null && nulls[i] ? null : longs[i]);
}
break;
case DOUBLE:
nulls = selector.getNullVector();
double[] doubles = selector.getDoubleVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(nulls != null && nulls[i] ? null : doubles[i]);
}
break;
case STRING:
Object[] objects = objectSelector.getObjectVector();
for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
results.add(objects[i]);
}
break;
}
cursor.advance();
}
}
closer.register(cursor);
@ -224,10 +246,15 @@ public class ExpressionVectorSelectorsTest
int rowCountCursor = cursors
.map(nonVectorized -> {
final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory().makeColumnValueSelector("v");
final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory()
.makeColumnValueSelector("v");
int rows = 0;
while (!nonVectorized.isDone()) {
Assert.assertEquals(StringUtils.format("Failed at row %s", rows), nonSelector.getObject(), results.get(rows));
Assert.assertEquals(
StringUtils.format("Failed at row %s", rows),
nonSelector.getObject(),
results.get(rows)
);
rows++;
nonVectorized.advance();
}

View File

@ -92,6 +92,7 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
"SELECT SUM(PARSE_LONG(string1)) FROM foo",
"SELECT SUM(PARSE_LONG(string3)) FROM foo",
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo WHERE string2 = '10' GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",