From 753bce324bdf8c7c5b2b602f89c720749bfa6e22 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 29 Sep 2020 13:19:06 -0700 Subject: [PATCH] vectorize constant expressions with optimized selectors (#10440) --- .../vector/ConstantVectorSelectors.java | 172 ++++++++++++++++++ .../druid/segment/virtual/ExpressionPlan.java | 5 + .../virtual/ExpressionVectorSelectors.java | 34 ++++ .../virtual/ExpressionVirtualColumn.java | 24 ++- .../ExpressionVectorSelectorsTest.java | 101 ++++++---- .../SqlVectorizedExpressionSanityTest.java | 1 + 6 files changed, 299 insertions(+), 38 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java diff --git a/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java new file mode 100644 index 00000000000..c1e3c3b1f26 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.vector; + +import org.apache.druid.segment.IdLookup; + +import javax.annotation.Nullable; +import java.util.Arrays; + +public class ConstantVectorSelectors +{ + public static VectorValueSelector vectorValueSelector(VectorSizeInspector inspector, @Nullable Number constant) + { + if (constant == null) { + return NilVectorSelector.create(inspector); + } + final long[] longVector = new long[inspector.getMaxVectorSize()]; + final float[] floatVector = new float[inspector.getMaxVectorSize()]; + final double[] doubleVector = new double[inspector.getMaxVectorSize()]; + Arrays.fill(longVector, constant.longValue()); + Arrays.fill(floatVector, constant.floatValue()); + Arrays.fill(doubleVector, constant.doubleValue()); + return new VectorValueSelector() + { + @Override + public long[] getLongVector() + { + return longVector; + } + + @Override + public float[] getFloatVector() + { + return floatVector; + } + + @Override + public double[] getDoubleVector() + { + return doubleVector; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + return null; + } + + @Override + public int getMaxVectorSize() + { + return inspector.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return inspector.getCurrentVectorSize(); + } + }; + } + + public static VectorObjectSelector vectorObjectSelector( + VectorSizeInspector inspector, + @Nullable Object object + ) + { + if (object == null) { + return NilVectorSelector.create(inspector); + } + + final Object[] objects = new Object[inspector.getMaxVectorSize()]; + Arrays.fill(objects, object); + + return new VectorObjectSelector() + { + @Override + public Object[] getObjectVector() + { + return objects; + } + + @Override + public int getMaxVectorSize() + { + return inspector.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return inspector.getCurrentVectorSize(); + } + }; + } + + public static SingleValueDimensionVectorSelector singleValueDimensionVectorSelector( + VectorSizeInspector inspector, + @Nullable String value + ) + { + if (value == null) { + return NilVectorSelector.create(inspector); + } + + final int[] row = new int[inspector.getMaxVectorSize()]; + return new SingleValueDimensionVectorSelector() + { + @Override + public int[] getRowVector() + { + return row; + } + + @Override + public int getValueCardinality() + { + return 1; + } + + @Nullable + @Override + public String lookupName(int id) + { + return value; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return true; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public int getMaxVectorSize() + { + return inspector.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return inspector.getCurrentVectorSize(); + } + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java index 07e6ce3aa98..7dd0d5d5f04 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java @@ -100,6 +100,11 @@ public class ExpressionPlan this.unappliedInputs = unappliedInputs; } + public boolean isConstant() + { + return analysis.getRequiredBindings().isEmpty(); + } + public Expr getExpression() { return expression; diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java index 25f2f22758b..3cb46e8ebd8 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java @@ -23,8 +23,11 @@ import com.google.common.base.Preconditions; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.query.expression.ExprUtils; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.ConstantVectorSelectors; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; @@ -38,6 +41,22 @@ public class ExpressionVectorSelectors // No instantiation. } + public static SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector( + VectorColumnSelectorFactory factory, + Expr expression + ) + { + final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression); + Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE)); + // only constant expressions are currently supported, nothing else should get here + + if (plan.isConstant()) { + String constant = plan.getExpression().eval(ExprUtils.nilBindings()).asString(); + return ConstantVectorSelectors.singleValueDimensionVectorSelector(factory.getVectorSizeInspector(), constant); + } + throw new IllegalStateException("Only constant expressions currently support dimension selectors"); + } + public static VectorValueSelector makeVectorValueSelector( VectorColumnSelectorFactory factory, Expr expression @@ -45,6 +64,13 @@ public class ExpressionVectorSelectors { final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression); Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE)); + + if (plan.isConstant()) { + return ConstantVectorSelectors.vectorValueSelector( + factory.getVectorSizeInspector(), + (Number) plan.getExpression().eval(ExprUtils.nilBindings()).value() + ); + } final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory); final ExprVectorProcessor processor = plan.getExpression().buildVectorized(bindings); return new ExpressionVectorValueSelector(processor, bindings); @@ -57,6 +83,14 @@ public class ExpressionVectorSelectors { final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression); Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE)); + + if (plan.isConstant()) { + return ConstantVectorSelectors.vectorObjectSelector( + factory.getVectorSizeInspector(), + plan.getExpression().eval(ExprUtils.nilBindings()).value() + ); + } + final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory); final ExprVectorProcessor processor = plan.getExpression().buildVectorized(bindings); return new ExpressionVectorObjectSelector(processor, bindings); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index 8ad46d6436e..3260afaa156 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -42,6 +42,7 @@ import org.apache.druid.segment.VirtualColumn; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; @@ -146,6 +147,15 @@ public class ExpressionVirtualColumn implements VirtualColumn return plan.is(ExpressionPlan.Trait.VECTORIZABLE); } + @Override + public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + return ExpressionVectorSelectors.makeSingleValueDimensionVectorSelector(factory, parsedExpression.get()); + } + @Override public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory) { @@ -200,6 +210,9 @@ public class ExpressionVirtualColumn implements VirtualColumn return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(outputType); } + // array types shouldn't escape the expression system currently, so coerce anything past this point into some + // style of string + // we don't have to check for unknown input here because output type is unable to be inferred if we don't know // the complete set of input types if (plan.any(ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.NEEDS_APPLIED)) { @@ -207,7 +220,16 @@ public class ExpressionVirtualColumn implements VirtualColumn return new ColumnCapabilitiesImpl().setType(ValueType.STRING).setHasMultipleValues(true); } - // if we got here, lets call it single value string output + // constant strings are supported as dimension selectors, set them as dictionary encoded and unique + if (plan.isConstant()) { + return new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setDictionaryValuesSorted(true) + .setHasMultipleValues(false); + } + + // if we got here, lets call it single value string output, non-dictionary encoded return new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setHasMultipleValues(false) .setDictionaryEncoded(false); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java index 9c7926fe9bb..9e0a7b6cb31 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java @@ -28,6 +28,7 @@ import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.Parser; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnValueSelector; @@ -39,6 +40,7 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorCursor; import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; @@ -75,7 +77,11 @@ public class ExpressionVectorSelectorsTest "parse_long(string1)", "parse_long(string1) * double3", "parse_long(string5) * parse_long(string1)", - "parse_long(string5) * parse_long(string1) * double3" + "parse_long(string5) * parse_long(string1) * double3", + "'string constant'", + "1", + "192412.24124", + "null" ); private static final int ROWS_PER_SEGMENT = 100_000; @@ -167,7 +173,8 @@ public class ExpressionVectorSelectorsTest ) ) ); - VectorCursor cursor = new QueryableIndexStorageAdapter(index).makeVectorCursor( + final QueryableIndexStorageAdapter storageAdapter = new QueryableIndexStorageAdapter(index); + VectorCursor cursor = storageAdapter.makeVectorCursor( null, index.getDataInterval(), virtualColumns, @@ -176,40 +183,55 @@ public class ExpressionVectorSelectorsTest null ); - VectorValueSelector selector = null; - VectorObjectSelector objectSelector = null; - if (outputType.isNumeric()) { - selector = cursor.getColumnSelectorFactory().makeValueSelector("v"); - } else { - objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v"); - } - int rowCount = 0; - while (!cursor.isDone()) { - boolean[] nulls; - switch (outputType) { - case LONG: - nulls = selector.getNullVector(); - long[] longs = selector.getLongVector(); - for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) { - results.add(nulls != null && nulls[i] ? null : longs[i]); - } - break; - case DOUBLE: - nulls = selector.getNullVector(); - double[] doubles = selector.getDoubleVector(); - for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) { - results.add(nulls != null && nulls[i] ? null : doubles[i]); - } - break; - case STRING: - Object[] objects = objectSelector.getObjectVector(); - for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) { - results.add(objects[i]); - } - break; - } + ColumnCapabilities capabilities = virtualColumns.getColumnCapabilities(storageAdapter, "v"); - cursor.advance(); + int rowCount = 0; + if (capabilities.isDictionaryEncoded().isTrue()) { + SingleValueDimensionVectorSelector selector = cursor.getColumnSelectorFactory().makeSingleValueDimensionSelector( + DefaultDimensionSpec.of("v") + ); + while (!cursor.isDone()) { + int[] row = selector.getRowVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) { + results.add(selector.lookupName(row[i])); + } + cursor.advance(); + } + } else { + VectorValueSelector selector = null; + VectorObjectSelector objectSelector = null; + if (outputType.isNumeric()) { + selector = cursor.getColumnSelectorFactory().makeValueSelector("v"); + } else { + objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v"); + } + while (!cursor.isDone()) { + boolean[] nulls; + switch (outputType) { + case LONG: + nulls = selector.getNullVector(); + long[] longs = selector.getLongVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) { + results.add(nulls != null && nulls[i] ? null : longs[i]); + } + break; + case DOUBLE: + nulls = selector.getNullVector(); + double[] doubles = selector.getDoubleVector(); + for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) { + results.add(nulls != null && nulls[i] ? null : doubles[i]); + } + break; + case STRING: + Object[] objects = objectSelector.getObjectVector(); + for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) { + results.add(objects[i]); + } + break; + } + + cursor.advance(); + } } closer.register(cursor); @@ -224,10 +246,15 @@ public class ExpressionVectorSelectorsTest int rowCountCursor = cursors .map(nonVectorized -> { - final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory().makeColumnValueSelector("v"); + final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory() + .makeColumnValueSelector("v"); int rows = 0; while (!nonVectorized.isDone()) { - Assert.assertEquals(StringUtils.format("Failed at row %s", rows), nonSelector.getObject(), results.get(rows)); + Assert.assertEquals( + StringUtils.format("Failed at row %s", rows), + nonSelector.getObject(), + results.get(rows) + ); rows++; nonVectorized.advance(); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java index 874e7458d2c..47179371792 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java @@ -92,6 +92,7 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe "SELECT SUM(PARSE_LONG(string1)) FROM foo", "SELECT SUM(PARSE_LONG(string3)) FROM foo", "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", + "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo WHERE string2 = '10' GROUP BY 1,2 ORDER BY 3", "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1", "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2", "SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",