mirror of https://github.com/apache/druid.git
Exposing optional replaceMissingValueWith in lookup function and macros (#14956)
* Exposing optional replaceMissingValueWith in lookup function and macros * args range validation * Updating docs * Addressing comments * Update docs/querying/sql-scalar.md Co-authored-by: Clint Wylie <cjwylie@gmail.com> * Update docs/querying/sql-functions.md Co-authored-by: Clint Wylie <cjwylie@gmail.com> * Addressing comments --------- Co-authored-by: Clint Wylie <cjwylie@gmail.com>
This commit is contained in:
parent
d038237ece
commit
f1edd671fb
|
@ -62,6 +62,12 @@ SELECT
|
|||
FROM sales
|
||||
GROUP BY 1
|
||||
```
|
||||
The lookup function also accepts the 3rd argument called `replaceMissingValueWith` as a constant string. If your value is missing a lookup for the queried key, the lookup function returns the result value from `replaceMissingValueWith`
|
||||
For example:
|
||||
```
|
||||
LOOKUP(store, 'store_to_country', 'NA')
|
||||
```
|
||||
If value is missing from `store_to_country` lookup for given key 'store' then it will return `NA`.
|
||||
|
||||
They can also be queried using the [JOIN operator](datasource.md#join):
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ The following built-in functions are available.
|
|||
|concat|concat(expr, expr...) concatenate a list of strings|
|
||||
|format|format(pattern[, args...]) returns a string formatted in the manner of Java's [String.format](https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#format-java.lang.String-java.lang.Object...-).|
|
||||
|like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`|
|
||||
|lookup|lookup(expr, lookup-name) looks up expr in a registered [query-time lookup](../querying/lookups.md)|
|
||||
|lookup|lookup(expr, lookup-name[,replaceMissingValueWith]) looks up expr in a registered,`replaceMissingValueWith` is an optional constant string [query-time lookup](../querying/lookups.md)|
|
||||
|parse_long|parse_long(string[, radix]) parses a string as a long with the given radix, or 10 (decimal) if a radix is not provided.|
|
||||
|regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern.|
|
||||
|regexp_like|regexp_like(expr, pattern) returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. |
|
||||
|
|
|
@ -885,7 +885,7 @@ Calculates the base-10 of the numeric expression.
|
|||
|
||||
## LOOKUP
|
||||
|
||||
`LOOKUP(<CHARACTER>, <CHARACTER>)`
|
||||
`LOOKUP(<CHARACTER>, <CHARACTER>[, <CHARACTER>])`
|
||||
|
||||
**Function type:** [Scalar, string](sql-scalar.md#string-functions)
|
||||
|
||||
|
|
|
@ -98,7 +98,7 @@ String functions accept strings, and return a type appropriate to the function.
|
|||
|`CHAR_LENGTH(expr)`|Alias for `LENGTH`.|
|
||||
|`CHARACTER_LENGTH(expr)`|Alias for `LENGTH`.|
|
||||
|`STRLEN(expr)`|Alias for `LENGTH`.|
|
||||
|`LOOKUP(expr, lookupName)`|Look up `expr` in a registered [query-time lookup table](lookups.md). Note that lookups can also be queried directly using the [`lookup` schema](sql.md#from).|
|
||||
|`LOOKUP(expr, lookupName, [replaceMissingValueWith])`|Look up `expr` in a registered [query-time lookup table](lookups.md). Note that lookups can also be queried directly using the [`lookup` schema](sql.md#from). Optional constant replaceMissingValueWith can be passed as 3rd argument to be returned when value is missing from lookup.|
|
||||
|`LOWER(expr)`|Returns `expr` in all lowercase.|
|
||||
|`UPPER(expr)`|Returns `expr` in all uppercase.|
|
||||
|`PARSE_LONG(string, [radix])`|Parses a string into a long (BIGINT) with the given radix, or 10 (decimal) if a radix is not provided.|
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.druid.query.expression;
|
|||
import com.google.inject.Inject;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.math.expr.Evals;
|
||||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.math.expr.ExprMacroTable;
|
||||
|
@ -54,10 +55,11 @@ public class LookupExprMacro implements ExprMacroTable.ExprMacro
|
|||
@Override
|
||||
public Expr apply(final List<Expr> args)
|
||||
{
|
||||
validationHelperCheckArgumentCount(args, 2);
|
||||
validationHelperCheckArgumentRange(args, 2, 3);
|
||||
|
||||
final Expr arg = args.get(0);
|
||||
final Expr lookupExpr = args.get(1);
|
||||
final Expr replaceMissingValueWith = getReplaceMissingValueWith(args);
|
||||
|
||||
validationHelperCheckArgIsLiteral(lookupExpr, "second argument");
|
||||
if (lookupExpr.getLiteralValue() == null) {
|
||||
|
@ -69,7 +71,9 @@ public class LookupExprMacro implements ExprMacroTable.ExprMacro
|
|||
lookupExtractorFactoryContainerProvider,
|
||||
lookupName,
|
||||
false,
|
||||
null,
|
||||
replaceMissingValueWith != null && replaceMissingValueWith.isLiteral()
|
||||
? Evals.asString(replaceMissingValueWith.getLiteralValue())
|
||||
: null,
|
||||
false,
|
||||
null
|
||||
);
|
||||
|
@ -104,6 +108,15 @@ public class LookupExprMacro implements ExprMacroTable.ExprMacro
|
|||
@Override
|
||||
public String stringify()
|
||||
{
|
||||
if (replaceMissingValueWith != null) {
|
||||
return StringUtils.format(
|
||||
"%s(%s, %s, %s)",
|
||||
FN_NAME,
|
||||
arg.stringify(),
|
||||
lookupExpr.stringify(),
|
||||
replaceMissingValueWith.stringify()
|
||||
);
|
||||
}
|
||||
return StringUtils.format("%s(%s, %s)", FN_NAME, arg.stringify(), lookupExpr.stringify());
|
||||
}
|
||||
|
||||
|
@ -116,4 +129,14 @@ public class LookupExprMacro implements ExprMacroTable.ExprMacro
|
|||
|
||||
return new LookupExpr(arg);
|
||||
}
|
||||
|
||||
private Expr getReplaceMissingValueWith(final List<Expr> args)
|
||||
{
|
||||
if (args.size() > 2) {
|
||||
final Expr missingValExpr = args.get(2);
|
||||
validationHelperCheckArgIsLiteral(missingValExpr, "third argument");
|
||||
return missingValExpr;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.expression;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.compress.utils.Sets;
|
||||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.query.lookup.LookupExtractorFactoryContainer;
|
||||
import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class LookupExprMacroTest extends MacroTestBase
|
||||
{
|
||||
public LookupExprMacroTest()
|
||||
{
|
||||
super(
|
||||
new LookupExprMacro(new LookupExtractorFactoryContainerProvider()
|
||||
{
|
||||
@Override
|
||||
public Set<String> getAllLookupNames()
|
||||
{
|
||||
return Sets.newHashSet("test_lookup");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<LookupExtractorFactoryContainer> get(String lookupName)
|
||||
{
|
||||
return Optional.empty();
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTooFewArgs()
|
||||
{
|
||||
expectException(IllegalArgumentException.class, "Function[lookup] requires 2 to 3 arguments");
|
||||
apply(Collections.emptyList());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonLiteralLookupName()
|
||||
{
|
||||
expectException(
|
||||
IllegalArgumentException.class,
|
||||
"Function[lookup] second argument must be a registered lookup name"
|
||||
);
|
||||
apply(getArgs(Lists.newArrayList("1", new ArrayList<String>())));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidCalls()
|
||||
{
|
||||
Assert.assertNotNull(apply(getArgs(Lists.newArrayList("1", "test_lookup"))));
|
||||
Assert.assertNotNull(apply(getArgs(Lists.newArrayList("null", "test_lookup"))));
|
||||
Assert.assertNotNull(apply(getArgs(Lists.newArrayList("1", "test_lookup", null))));
|
||||
Assert.assertNotNull(apply(getArgs(Lists.newArrayList("1", "test_lookup", "N/A"))));
|
||||
}
|
||||
|
||||
private List<Expr> getArgs(List<Object> args)
|
||||
{
|
||||
return args.stream().map(a -> {
|
||||
if (a != null && a instanceof String) {
|
||||
return ExprEval.of(a.toString()).toExpr();
|
||||
}
|
||||
return ExprEval.bestEffortOf(null).toExpr();
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
}
|
|
@ -47,7 +47,12 @@ public class LookupExprMacroTest extends InitializedNullHandlingTest
|
|||
{
|
||||
assertExpr("lookup(x, 'lookyloo')", "xfoo");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLookupMissingValue()
|
||||
{
|
||||
assertExpr("lookup(y, 'lookyloo', 'N/A')", "N/A");
|
||||
assertExpr("lookup(y, 'lookyloo', null)", null);
|
||||
}
|
||||
@Test
|
||||
public void testLookupNotFound()
|
||||
{
|
||||
|
|
|
@ -26,20 +26,33 @@ import org.apache.calcite.sql.SqlFunctionCategory;
|
|||
import org.apache.calcite.sql.type.SqlTypeFamily;
|
||||
import org.apache.calcite.sql.type.SqlTypeName;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.math.expr.Evals;
|
||||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider;
|
||||
import org.apache.druid.query.lookup.RegisteredLookupExtractionFn;
|
||||
import org.apache.druid.segment.column.RowSignature;
|
||||
import org.apache.druid.sql.calcite.expression.BasicOperandTypeChecker;
|
||||
import org.apache.druid.sql.calcite.expression.DruidExpression;
|
||||
import org.apache.druid.sql.calcite.expression.OperatorConversions;
|
||||
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
|
||||
import org.apache.druid.sql.calcite.planner.PlannerContext;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class QueryLookupOperatorConversion implements SqlOperatorConversion
|
||||
{
|
||||
private static final SqlFunction SQL_FUNCTION = OperatorConversions
|
||||
.operatorBuilder("LOOKUP")
|
||||
.operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER)
|
||||
.operandTypeChecker(
|
||||
BasicOperandTypeChecker.builder()
|
||||
.operandTypes(
|
||||
SqlTypeFamily.CHARACTER,
|
||||
SqlTypeFamily.CHARACTER,
|
||||
SqlTypeFamily.CHARACTER
|
||||
)
|
||||
.requiredOperandCount(2)
|
||||
.literalOperands(2)
|
||||
.build())
|
||||
.returnTypeNullable(SqlTypeName.VARCHAR)
|
||||
.functionCategory(SqlFunctionCategory.STRING)
|
||||
.build();
|
||||
|
@ -73,6 +86,7 @@ public class QueryLookupOperatorConversion implements SqlOperatorConversion
|
|||
inputExpressions -> {
|
||||
final DruidExpression arg = inputExpressions.get(0);
|
||||
final Expr lookupNameExpr = plannerContext.parseExpression(inputExpressions.get(1).getExpression());
|
||||
final String replaceMissingValueWith = getReplaceMissingValueWith(inputExpressions, plannerContext);
|
||||
|
||||
if (arg.isSimpleExtraction() && lookupNameExpr.isLiteral()) {
|
||||
return arg.getSimpleExtraction().cascade(
|
||||
|
@ -80,7 +94,7 @@ public class QueryLookupOperatorConversion implements SqlOperatorConversion
|
|||
lookupExtractorFactoryContainerProvider,
|
||||
(String) lookupNameExpr.getLiteralValue(),
|
||||
false,
|
||||
null,
|
||||
replaceMissingValueWith,
|
||||
null,
|
||||
true
|
||||
)
|
||||
|
@ -91,4 +105,18 @@ public class QueryLookupOperatorConversion implements SqlOperatorConversion
|
|||
}
|
||||
);
|
||||
}
|
||||
|
||||
private String getReplaceMissingValueWith(
|
||||
final List<DruidExpression> inputExpressions,
|
||||
final PlannerContext plannerContext
|
||||
)
|
||||
{
|
||||
if (inputExpressions.size() > 2) {
|
||||
final Expr missingValExpr = plannerContext.parseExpression(inputExpressions.get(2).getExpression());
|
||||
if (missingValExpr.isLiteral()) {
|
||||
return Evals.asString(missingValExpr.getLiteralValue());
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8966,6 +8966,65 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
|
|||
)
|
||||
);
|
||||
}
|
||||
@Test
|
||||
public void testLookupReplaceMissingValueWith()
|
||||
{
|
||||
// Cannot vectorize due to extraction dimension specs.
|
||||
cannotVectorize();
|
||||
|
||||
final RegisteredLookupExtractionFn extractionFn1 = new RegisteredLookupExtractionFn(
|
||||
null,
|
||||
"lookyloo",
|
||||
false,
|
||||
"Missing_Value",
|
||||
null,
|
||||
true
|
||||
);
|
||||
final RegisteredLookupExtractionFn extractionFnRMVNull = new RegisteredLookupExtractionFn(
|
||||
null,
|
||||
"lookyloo",
|
||||
false,
|
||||
null,
|
||||
null,
|
||||
true
|
||||
);
|
||||
testQuery(
|
||||
"SELECT LOOKUP(dim1, 'lookyloo', 'Missing_Value'), LOOKUP(dim1, 'lookyloo', null) as rmvNull, COUNT(*) FROM foo group by 1,2",
|
||||
ImmutableList.of(
|
||||
GroupByQuery.builder()
|
||||
.setDataSource(CalciteTests.DATASOURCE1)
|
||||
.setInterval(querySegmentSpec(Filtration.eternity()))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setDimensions(
|
||||
dimensions(
|
||||
new ExtractionDimensionSpec(
|
||||
"dim1",
|
||||
"d0",
|
||||
ColumnType.STRING,
|
||||
extractionFn1
|
||||
),
|
||||
new ExtractionDimensionSpec(
|
||||
"dim1",
|
||||
"d1",
|
||||
ColumnType.STRING,
|
||||
extractionFnRMVNull
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
aggregators(
|
||||
new CountAggregatorFactory("a0")
|
||||
)
|
||||
)
|
||||
.setContext(QUERY_CONTEXT_DEFAULT)
|
||||
.build()
|
||||
),
|
||||
ImmutableList.of(
|
||||
new Object[]{"Missing_Value", NullHandling.defaultStringValue(), 5L},
|
||||
new Object[]{"xabc", "xabc", 1L}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountDistinctOfLookup()
|
||||
|
|
|
@ -1130,6 +1130,7 @@ simpleJson
|
|||
dimensionSpec
|
||||
flattenSpec
|
||||
binaryAsString
|
||||
replaceMissingValueWith
|
||||
sslFactory
|
||||
sslMode
|
||||
Proto
|
||||
|
|
Loading…
Reference in New Issue