mirror of https://github.com/apache/druid.git
vector group by support for string expressions (#11010)
* vector group by support for string expressions * fix test * comments, javadoc
This commit is contained in:
parent
de691808ce
commit
338886fd5f
|
@ -174,7 +174,11 @@ public class SqlExpressionBenchmark
|
|||
// 24: group by long expr with non-expr agg
|
||||
"SELECT (long1 * long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2",
|
||||
// 25: group by non-expr with expr agg
|
||||
"SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2"
|
||||
"SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
|
||||
// 26: group by string expr with non-expr agg
|
||||
"SELECT CONCAT(string2, '-', long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2",
|
||||
// 27: group by string expr with expr agg
|
||||
"SELECT CONCAT(string2, '-', long2), SUM(long1 * double4) FROM foo GROUP BY 1 ORDER BY 2"
|
||||
);
|
||||
|
||||
@Param({"5000000"})
|
||||
|
@ -211,7 +215,9 @@ public class SqlExpressionBenchmark
|
|||
"22",
|
||||
"23",
|
||||
"24",
|
||||
"25"
|
||||
"25",
|
||||
"26",
|
||||
"27"
|
||||
})
|
||||
private String query;
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import com.google.common.primitives.Ints;
|
|||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.math.expr.vector.ExprVectorProcessor;
|
||||
import org.apache.druid.math.expr.vector.VectorMathProcessors;
|
||||
import org.apache.druid.math.expr.vector.VectorStringProcessors;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
|
@ -63,12 +64,19 @@ final class BinPlusExpr extends BinaryEvalOpExprBase
|
|||
@Override
|
||||
public boolean canVectorize(InputBindingInspector inspector)
|
||||
{
|
||||
return inspector.areNumeric(left, right) && inspector.canVectorize(left, right);
|
||||
return inspector.areScalar(left, right) && inspector.canVectorize(left, right);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> ExprVectorProcessor<T> buildVectorized(VectorInputBindingInspector inspector)
|
||||
{
|
||||
ExprType type = ExprTypeConversion.operator(
|
||||
left.getOutputType(inspector),
|
||||
right.getOutputType(inspector)
|
||||
);
|
||||
if (ExprType.STRING.equals(type)) {
|
||||
return VectorStringProcessors.concat(inspector, left, right);
|
||||
}
|
||||
return VectorMathProcessors.plus(inspector, left, right);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -213,6 +213,36 @@ public interface Expr
|
|||
return areNumeric(Arrays.asList(args));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if all provided {@link Expr} can infer the output type as {@link ExprType#isScalar()} (non-array) with a
|
||||
* value of true.
|
||||
*
|
||||
* There must be at least one expression with a computable scalar output type for this method to return true.
|
||||
*/
|
||||
default boolean areScalar(List<Expr> args)
|
||||
{
|
||||
boolean scalar = true;
|
||||
for (Expr arg : args) {
|
||||
ExprType argType = arg.getOutputType(this);
|
||||
if (argType == null) {
|
||||
continue;
|
||||
}
|
||||
scalar &= argType.isScalar();
|
||||
}
|
||||
return scalar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if all provided {@link Expr} can infer the output type as {@link ExprType#isScalar()} (non-array) with a
|
||||
* value of true.
|
||||
*
|
||||
* There must be at least one expression with a computable scalar output type for this method to return true.
|
||||
*/
|
||||
default boolean areScalar(Expr... args)
|
||||
{
|
||||
return areScalar(Arrays.asList(args));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if every provided {@link Expr} computes {@link Expr#canVectorize(InputBindingInspector)} to a value of true
|
||||
*/
|
||||
|
|
|
@ -42,6 +42,11 @@ public enum ExprType
|
|||
return isNumeric(this);
|
||||
}
|
||||
|
||||
public boolean isScalar()
|
||||
{
|
||||
return isScalar(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* The expression system does not distinguish between {@link ValueType#FLOAT} and {@link ValueType#DOUBLE}, and
|
||||
* cannot currently handle {@link ValueType#COMPLEX} inputs. This method will convert {@link ValueType#FLOAT} to
|
||||
|
@ -131,6 +136,11 @@ public enum ExprType
|
|||
return LONG.equals(type) || DOUBLE.equals(type);
|
||||
}
|
||||
|
||||
public static boolean isScalar(@Nullable ExprType exprType)
|
||||
{
|
||||
return !isArray(exprType);
|
||||
}
|
||||
|
||||
public static boolean isArray(@Nullable ExprType type)
|
||||
{
|
||||
return LONG_ARRAY.equals(type) || DOUBLE_ARRAY.equals(type) || STRING_ARRAY.equals(type);
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor;
|
|||
import org.apache.druid.math.expr.vector.ExprVectorProcessor;
|
||||
import org.apache.druid.math.expr.vector.VectorMathProcessors;
|
||||
import org.apache.druid.math.expr.vector.VectorProcessors;
|
||||
import org.apache.druid.math.expr.vector.VectorStringProcessors;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
|
@ -2191,6 +2192,21 @@ public interface Function
|
|||
{
|
||||
return ExprType.STRING;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canVectorize(Expr.InputBindingInspector inspector, List<Expr> args)
|
||||
{
|
||||
return inspector.areScalar(args) && inspector.canVectorize(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> ExprVectorProcessor<T> asVectorProcessor(
|
||||
Expr.VectorInputBindingInspector inspector,
|
||||
List<Expr> args
|
||||
)
|
||||
{
|
||||
return VectorStringProcessors.concat(inspector, args);
|
||||
}
|
||||
}
|
||||
|
||||
class StrlenFunc implements Function
|
||||
|
|
|
@ -25,7 +25,10 @@ import org.apache.druid.math.expr.Expr;
|
|||
import java.lang.reflect.Array;
|
||||
|
||||
/**
|
||||
* Base {@link ExprVectorProcessor} for expressions and functions with 2 'object' typed inputs (strings, arrays)
|
||||
* Base {@link ExprVectorProcessor} for expressions and functions with 2 'object' typed inputs (strings, arrays).
|
||||
*
|
||||
* In SQL compatible null handling mode, for a row with either left or right input as a null value, it will be handled
|
||||
* by {@link #processNull(int)} instead of {@link #processIndex(Object, Object, int)}.
|
||||
*/
|
||||
public abstract class BivariateFunctionVectorObjectProcessor<TLeftInput, TRightInput, TOutput>
|
||||
implements ExprVectorProcessor<TOutput>
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.math.expr.vector;
|
||||
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.math.expr.ExprType;
|
||||
|
||||
/**
|
||||
* many strings enter, one string leaves...
|
||||
*/
|
||||
public abstract class StringOutMultiStringInVectorProcessor implements ExprVectorProcessor<String[]>
|
||||
{
|
||||
final ExprVectorProcessor<String[]>[] inputs;
|
||||
final int maxVectorSize;
|
||||
final String[] outValues;
|
||||
final boolean sqlCompatible = NullHandling.sqlCompatible();
|
||||
|
||||
protected StringOutMultiStringInVectorProcessor(
|
||||
ExprVectorProcessor<String[]>[] inputs,
|
||||
int maxVectorSize
|
||||
)
|
||||
{
|
||||
this.inputs = inputs;
|
||||
this.maxVectorSize = maxVectorSize;
|
||||
this.outValues = new String[maxVectorSize];
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExprType getOutputType()
|
||||
{
|
||||
return ExprType.STRING;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExprEvalVector<String[]> evalVector(Expr.VectorInputBinding bindings)
|
||||
{
|
||||
final int currentSize = bindings.getCurrentVectorSize();
|
||||
final String[][] in = new String[inputs.length][];
|
||||
for (int i = 0; i < inputs.length; i++) {
|
||||
in[i] = inputs[i].evalVector(bindings).values();
|
||||
}
|
||||
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
processIndex(in, i);
|
||||
}
|
||||
return new ExprEvalStringVector(outValues);
|
||||
}
|
||||
|
||||
abstract void processIndex(String[][] in, int i);
|
||||
}
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.apache.druid.math.expr.vector;
|
||||
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.math.expr.ExprType;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
@ -27,8 +26,6 @@ import javax.annotation.Nullable;
|
|||
public abstract class StringOutStringsInFunctionVectorProcessor
|
||||
extends BivariateFunctionVectorObjectProcessor<String[], String[], String[]>
|
||||
{
|
||||
final boolean sqlCompatible = NullHandling.sqlCompatible();
|
||||
|
||||
protected StringOutStringsInFunctionVectorProcessor(
|
||||
ExprVectorProcessor<String[]> left,
|
||||
ExprVectorProcessor<String[]> right,
|
||||
|
@ -44,7 +41,7 @@ public abstract class StringOutStringsInFunctionVectorProcessor
|
|||
}
|
||||
|
||||
@Nullable
|
||||
abstract String processValue(@Nullable String leftVal, @Nullable String rightVal);
|
||||
protected abstract String processValue(@Nullable String leftVal, @Nullable String rightVal);
|
||||
|
||||
@Override
|
||||
void processIndex(String[] strings, String[] strings2, int i)
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.math.expr.vector;
|
||||
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.math.expr.ExprType;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.List;
|
||||
|
||||
public class VectorStringProcessors
|
||||
{
|
||||
public static <T> ExprVectorProcessor<T> concat(Expr.VectorInputBindingInspector inspector, Expr left, Expr right)
|
||||
{
|
||||
final ExprVectorProcessor processor;
|
||||
if (NullHandling.sqlCompatible()) {
|
||||
processor = new StringOutStringsInFunctionVectorProcessor(
|
||||
left.buildVectorized(inspector),
|
||||
right.buildVectorized(inspector),
|
||||
inspector.getMaxVectorSize()
|
||||
)
|
||||
{
|
||||
@Nullable
|
||||
@Override
|
||||
protected String processValue(@Nullable String leftVal, @Nullable String rightVal)
|
||||
{
|
||||
// in sql compatible mode, nulls are handled by super class and never make it here...
|
||||
return leftVal + rightVal;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
processor = new StringOutStringsInFunctionVectorProcessor(
|
||||
left.buildVectorized(inspector),
|
||||
right.buildVectorized(inspector),
|
||||
inspector.getMaxVectorSize()
|
||||
)
|
||||
{
|
||||
@Nullable
|
||||
@Override
|
||||
protected String processValue(@Nullable String leftVal, @Nullable String rightVal)
|
||||
{
|
||||
return NullHandling.nullToEmptyIfNeeded(leftVal) + NullHandling.nullToEmptyIfNeeded(rightVal);
|
||||
}
|
||||
};
|
||||
}
|
||||
return processor;
|
||||
}
|
||||
|
||||
public static <T> ExprVectorProcessor<T> concat(Expr.VectorInputBindingInspector inspector, List<Expr> inputs)
|
||||
{
|
||||
final ExprVectorProcessor<String[]>[] inputProcessors = new ExprVectorProcessor[inputs.size()];
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
inputProcessors[i] = CastToTypeVectorProcessor.cast(inputs.get(i).buildVectorized(inspector), ExprType.STRING);
|
||||
}
|
||||
final ExprVectorProcessor processor = new StringOutMultiStringInVectorProcessor(
|
||||
inputProcessors,
|
||||
inspector.getMaxVectorSize()
|
||||
)
|
||||
{
|
||||
@Override
|
||||
void processIndex(String[][] in, int i)
|
||||
{
|
||||
// Result of concatenation is null if any of the Values is null.
|
||||
// e.g. 'select CONCAT(null, "abc") as c;' will return null as per Standard SQL spec.
|
||||
String first = NullHandling.nullToEmptyIfNeeded(in[0][i]);
|
||||
if (first == null) {
|
||||
outValues[i] = null;
|
||||
return;
|
||||
}
|
||||
final StringBuilder builder = new StringBuilder(first);
|
||||
for (int inputNumber = 1; inputNumber < in.length; inputNumber++) {
|
||||
final String s = NullHandling.nullToEmptyIfNeeded(in[inputNumber][i]);
|
||||
if (s == null) {
|
||||
outValues[i] = null;
|
||||
return;
|
||||
} else {
|
||||
builder.append(s);
|
||||
}
|
||||
}
|
||||
outValues[i] = builder.toString();
|
||||
}
|
||||
};
|
||||
return processor;
|
||||
}
|
||||
}
|
|
@ -202,6 +202,15 @@ public class VectorExprSanityTest extends InitializedNullHandlingTest
|
|||
testFunctions(types, templates, args);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStringFns()
|
||||
{
|
||||
testExpression("s1 + s2", types);
|
||||
testExpression("s1 + '-' + s2", types);
|
||||
testExpression("concat(s1, s2)", types);
|
||||
testExpression("concat(s1,'-',s2,'-',l1,'-',d1)", types);
|
||||
}
|
||||
|
||||
static void testFunctions(Map<String, ExprType> types, String[] templates, String[] args)
|
||||
{
|
||||
for (String template : templates) {
|
||||
|
|
|
@ -177,9 +177,6 @@ public class BloomDimFilterSqlTest extends BaseCalciteQueryTest
|
|||
@Test
|
||||
public void testBloomFilterVirtualColumn() throws Exception
|
||||
{
|
||||
// Cannot vectorize due to expression virtual columns.
|
||||
cannotVectorize();
|
||||
|
||||
BloomKFilter filter = new BloomKFilter(1500);
|
||||
filter.addString("def-foo");
|
||||
byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter);
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.groupby.epinephelinae.vector;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
|
||||
import org.apache.datasketches.memory.Memory;
|
||||
import org.apache.datasketches.memory.WritableMemory;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.query.groupby.ResultRow;
|
||||
import org.apache.druid.segment.vector.VectorObjectSelector;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A {@link GroupByVectorColumnSelector} that builds an internal String<->Integer dictionary, used for grouping
|
||||
* single-valued STRING columns which are not natively dictionary encoded, e.g. expression virtual columns.
|
||||
*
|
||||
* This is effectively the {@link VectorGroupByEngine} analog of
|
||||
* {@link org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingStringGroupByColumnSelectorStrategy}
|
||||
*/
|
||||
public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector implements GroupByVectorColumnSelector
|
||||
{
|
||||
private static final int GROUP_BY_MISSING_VALUE = -1;
|
||||
|
||||
private final VectorObjectSelector selector;
|
||||
|
||||
private int nextId = 0;
|
||||
private final List<String> dictionary = new ArrayList<>();
|
||||
private final Object2IntOpenHashMap<String> reverseDictionary = new Object2IntOpenHashMap<>();
|
||||
|
||||
{
|
||||
reverseDictionary.defaultReturnValue(-1);
|
||||
}
|
||||
|
||||
public DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(VectorObjectSelector selector)
|
||||
{
|
||||
this.selector = selector;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int getGroupingKeySize()
|
||||
{
|
||||
return Integer.BYTES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeKeys(
|
||||
final WritableMemory keySpace,
|
||||
final int keySize,
|
||||
final int keyOffset,
|
||||
final int startRow,
|
||||
final int endRow
|
||||
)
|
||||
{
|
||||
final Object[] vector = selector.getObjectVector();
|
||||
|
||||
for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) {
|
||||
final String value = (String) vector[i];
|
||||
final int dictId = reverseDictionary.getInt(value);
|
||||
if (dictId < 0) {
|
||||
dictionary.add(value);
|
||||
reverseDictionary.put(value, nextId);
|
||||
keySpace.putInt(j, nextId);
|
||||
nextId++;
|
||||
} else {
|
||||
keySpace.putInt(j, dictId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeKeyToResultRow(
|
||||
final Memory keyMemory,
|
||||
final int keyOffset,
|
||||
final ResultRow resultRow,
|
||||
final int resultRowPosition
|
||||
)
|
||||
{
|
||||
final int id = keyMemory.getInt(keyOffset);
|
||||
// GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map.
|
||||
if (id != GROUP_BY_MISSING_VALUE) {
|
||||
final String value = dictionary.get(id);
|
||||
resultRow.set(resultRowPosition, value);
|
||||
} else {
|
||||
resultRow.set(resultRowPosition, NullHandling.defaultStringValue());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -113,9 +113,7 @@ public class GroupByVectorColumnProcessorFactory implements VectorColumnProcesso
|
|||
)
|
||||
{
|
||||
if (ValueType.STRING.equals(capabilities.getType())) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Vectorized groupBys on non-dictionary encoded string columns with object selectors are not yet implemented"
|
||||
);
|
||||
return new DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(selector);
|
||||
}
|
||||
return NilGroupByVectorColumnSelector.INSTANCE;
|
||||
}
|
||||
|
|
|
@ -46,7 +46,6 @@ import org.apache.druid.segment.ColumnProcessors;
|
|||
import org.apache.druid.segment.StorageAdapter;
|
||||
import org.apache.druid.segment.VirtualColumns;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.filter.Filters;
|
||||
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
|
||||
import org.apache.druid.segment.vector.VectorCursor;
|
||||
|
@ -108,12 +107,7 @@ public class VectorGroupByEngine
|
|||
if (columnCapabilities == null) {
|
||||
return true;
|
||||
}
|
||||
// strings must be single valued, dictionary encoded, and have unique dictionary entries
|
||||
if (ValueType.STRING.equals(columnCapabilities.getType())) {
|
||||
return columnCapabilities.hasMultipleValues().isFalse() &&
|
||||
columnCapabilities.isDictionaryEncoded().isTrue() &&
|
||||
columnCapabilities.areDictionaryValuesUnique().isTrue();
|
||||
}
|
||||
// must be single valued
|
||||
return columnCapabilities.hasMultipleValues().isFalse();
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1016,10 +1016,6 @@ public class GroupByQueryRunnerTest extends InitializedNullHandlingTest
|
|||
@Test
|
||||
public void testGroupByWithStringVirtualColumn()
|
||||
{
|
||||
// Cannot vectorize due to virtual columns.
|
||||
// all virtual columns are single input column, so it will work for group by v1, even with multi-value inputs
|
||||
cannotVectorize();
|
||||
|
||||
GroupByQuery query = makeQueryBuilder()
|
||||
.setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
|
||||
|
@ -1080,6 +1076,69 @@ public class GroupByQueryRunnerTest extends InitializedNullHandlingTest
|
|||
TestHelper.assertExpectedObjects(expectedResults, results, "virtual-column");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithStringVirtualColumnVectorizable()
|
||||
{
|
||||
GroupByQuery query = makeQueryBuilder()
|
||||
.setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
|
||||
.setVirtualColumns(
|
||||
new ExpressionVirtualColumn(
|
||||
"vc",
|
||||
"cast(quality, 'STRING')",
|
||||
ValueType.STRING,
|
||||
TestExprMacroTable.INSTANCE
|
||||
)
|
||||
)
|
||||
.setDimensions(new DefaultDimensionSpec("vc", "alias"))
|
||||
.setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index"))
|
||||
.setGranularity(QueryRunnerTestHelper.DAY_GRAN)
|
||||
.build();
|
||||
|
||||
List<ResultRow> expectedResults = Arrays.asList(
|
||||
makeRow(query, "2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L),
|
||||
makeRow(query, "2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
|
||||
makeRow(
|
||||
query,
|
||||
"2011-04-01",
|
||||
"alias",
|
||||
"entertainment",
|
||||
"rows",
|
||||
1L,
|
||||
"idx",
|
||||
158L
|
||||
),
|
||||
makeRow(query, "2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
|
||||
makeRow(query, "2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
makeRow(query, "2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
makeRow(query, "2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
|
||||
makeRow(query, "2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
|
||||
makeRow(query, "2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
|
||||
|
||||
makeRow(query, "2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L),
|
||||
makeRow(query, "2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
|
||||
makeRow(
|
||||
query,
|
||||
"2011-04-02",
|
||||
"alias",
|
||||
"entertainment",
|
||||
"rows",
|
||||
1L,
|
||||
"idx",
|
||||
166L
|
||||
),
|
||||
makeRow(query, "2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
|
||||
makeRow(query, "2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
makeRow(query, "2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
|
||||
makeRow(query, "2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
|
||||
makeRow(query, "2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
|
||||
makeRow(query, "2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)
|
||||
);
|
||||
|
||||
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "virtual-column");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithDurationGranularity()
|
||||
{
|
||||
|
@ -6336,9 +6395,6 @@ public class GroupByQueryRunnerTest extends InitializedNullHandlingTest
|
|||
@Test
|
||||
public void testGroupByWithSubtotalsSpecOfDimensionsPrefixes()
|
||||
{
|
||||
// Cannot vectorize due to usage of expressions.
|
||||
cannotVectorize();
|
||||
|
||||
if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) {
|
||||
return;
|
||||
}
|
||||
|
@ -6452,9 +6508,6 @@ public class GroupByQueryRunnerTest extends InitializedNullHandlingTest
|
|||
@Test
|
||||
public void testGroupByWithSubtotalsSpecGeneral()
|
||||
{
|
||||
// Cannot vectorize due to usage of expressions.
|
||||
cannotVectorize();
|
||||
|
||||
if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -155,8 +155,6 @@ public class VectorizedVirtualColumnTest
|
|||
@Test
|
||||
public void testGroupBySingleValueStringNotDictionaryEncoded()
|
||||
{
|
||||
// cannot currently group by string columns that are not dictionary encoded
|
||||
cannotVectorize();
|
||||
testGroupBy(new ColumnCapabilitiesImpl()
|
||||
.setType(ValueType.STRING)
|
||||
.setDictionaryEncoded(false)
|
||||
|
|
|
@ -9198,9 +9198,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
|
|||
@Test
|
||||
public void testRegexpLikeFilter() throws Exception
|
||||
{
|
||||
// Cannot vectorize due to usage of regex filter.
|
||||
cannotVectorize();
|
||||
|
||||
testQuery(
|
||||
"SELECT COUNT(*)\n"
|
||||
+ "FROM foo\n"
|
||||
|
@ -14717,6 +14714,75 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcatGroup() throws Exception
|
||||
{
|
||||
testQuery(
|
||||
"SELECT CONCAT(dim1, '-', dim1, '_', dim1) as dimX FROM foo GROUP BY 1",
|
||||
ImmutableList.of(
|
||||
new GroupByQuery.Builder()
|
||||
.setDataSource(CalciteTests.DATASOURCE1)
|
||||
.setInterval(querySegmentSpec(Filtration.eternity()))
|
||||
.setVirtualColumns(expressionVirtualColumn(
|
||||
"v0",
|
||||
"concat(\"dim1\",'-',\"dim1\",'_',\"dim1\")",
|
||||
ValueType.STRING
|
||||
))
|
||||
.setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0")))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setContext(QUERY_CONTEXT_DEFAULT)
|
||||
.build()
|
||||
),
|
||||
ImmutableList.of(
|
||||
new Object[]{"-_"},
|
||||
new Object[]{"1-1_1"},
|
||||
new Object[]{"10.1-10.1_10.1"},
|
||||
new Object[]{"2-2_2"},
|
||||
new Object[]{"abc-abc_abc"},
|
||||
new Object[]{"def-def_def"}
|
||||
)
|
||||
);
|
||||
|
||||
final List<Object[]> secondResults;
|
||||
if (useDefault) {
|
||||
secondResults = ImmutableList.of(
|
||||
new Object[]{"10.1x2.0999910.1"},
|
||||
new Object[]{"1ax4.099991"},
|
||||
new Object[]{"2x3.099992"},
|
||||
new Object[]{"abcx6.09999abc"},
|
||||
new Object[]{"ax1.09999"},
|
||||
new Object[]{"defabcx5.09999def"}
|
||||
);
|
||||
} else {
|
||||
secondResults = ImmutableList.of(
|
||||
new Object[]{null},
|
||||
new Object[]{"1ax4.099991"},
|
||||
new Object[]{"2x3.099992"},
|
||||
new Object[]{"ax1.09999"},
|
||||
new Object[]{"defabcx5.09999def"}
|
||||
);
|
||||
}
|
||||
testQuery(
|
||||
"SELECT CONCAT(dim1, CONCAT(dim2,'x'), m2, 9999, dim1) as dimX FROM foo GROUP BY 1",
|
||||
ImmutableList.of(
|
||||
new GroupByQuery.Builder()
|
||||
.setDataSource(CalciteTests.DATASOURCE1)
|
||||
.setInterval(querySegmentSpec(Filtration.eternity()))
|
||||
.setVirtualColumns(expressionVirtualColumn(
|
||||
"v0",
|
||||
"concat(\"dim1\",concat(\"dim2\",'x'),\"m2\",9999,\"dim1\")",
|
||||
ValueType.STRING
|
||||
))
|
||||
.setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0")))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setContext(QUERY_CONTEXT_DEFAULT)
|
||||
.build()
|
||||
|
||||
),
|
||||
secondResults
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextcat() throws Exception
|
||||
{
|
||||
|
|
|
@ -83,7 +83,10 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
|
|||
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
|
||||
"SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
|
||||
"SELECT (long1 * long2), SUM(double1) FROM foo GROUP BY 1 ORDER BY 2",
|
||||
"SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2"
|
||||
"SELECT string2, SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
|
||||
"SELECT string1 + string2, COUNT(*) FROM foo GROUP BY 1 ORDER BY 2",
|
||||
"SELECT CONCAT(string1, '-', string2), string3, COUNT(*) FROM foo GROUP BY 1,2 ORDER BY 3",
|
||||
"SELECT CONCAT(string1, '-', string2, '-', long1, '-', double1, '-', float1) FROM foo GROUP BY 1"
|
||||
);
|
||||
|
||||
private static final int ROWS_PER_SEGMENT = 100_000;
|
||||
|
|
Loading…
Reference in New Issue