diff --git a/docs/misc/math-expr.md b/docs/misc/math-expr.md index dc356479ad5..3867fcb5718 100644 --- a/docs/misc/math-expr.md +++ b/docs/misc/math-expr.md @@ -78,6 +78,8 @@ The following built-in functions are available. |parse_long|parse_long(string[, radix]) parses a string as a long with the given radix, or 10 (decimal) if a radix is not provided.| |regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern.| |regexp_like|regexp_like(expr, pattern) returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. | +|contains_string|contains_string(expr, string) returns whether `expr` contains `string` as a substring. This method is case-sensitive.| +|icontains_string|contains_string(expr, string) returns whether `expr` contains `string` as a substring. This method is case-insensitive.| |replace|replace(expr, pattern, replacement) replaces pattern with replacement| |substring|substring(expr, index, length) behaves like java.lang.String's substring| |right|right(expr, length) returns the rightmost length characters from a string| diff --git a/docs/querying/sql.md b/docs/querying/sql.md index 1595c274db7..94cfcca15d5 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -397,6 +397,8 @@ String functions accept strings, and return a type appropriate to the function. |`POSITION(needle IN haystack [FROM fromIndex])`|Returns the index of needle within haystack, with indexes starting from 1. The search will begin at fromIndex, or 1 if fromIndex is not specified. If the needle is not found, returns 0.| |`REGEXP_EXTRACT(expr, pattern, [index])`|Apply regular expression `pattern` to `expr` and extract a capture group, or `NULL` if there is no match. If index is unspecified or zero, returns the first substring that matched the pattern. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. Note: when `druid.generic.useDefaultValueForNull = true`, it is not possible to differentiate an empty-string match from a non-match (both will return `NULL`).| |`REGEXP_LIKE(expr, pattern)`|Returns whether `expr` matches regular expression `pattern`. The pattern may match anywhere inside `expr`; if you want to match the entire string instead, use the `^` and `$` markers at the start and end of your pattern. Similar to [`LIKE`](#comparison-operators), but uses regexps instead of LIKE patterns. Especially useful in WHERE clauses.| +|`CONTAINS_STRING(, str)`|Returns true if the `str` is a substring of `expr`.| +|`ICONTAINS_STRING(, str)`|Returns true if the `str` is a substring of `expr`. The match is case-insensitive.| |`REPLACE(expr, pattern, replacement)`|Replaces pattern with replacement in expr, and returns the result.| |`STRPOS(haystack, needle)`|Returns the index of needle within haystack, with indexes starting from 1. If the needle is not found, returns 0.| |`SUBSTRING(expr, index, [length])`|Returns a substring of expr starting at index, with a max length, both measured in UTF-16 code units.| diff --git a/processing/src/main/java/org/apache/druid/query/expression/CaseInsensitiveContainsExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/CaseInsensitiveContainsExprMacro.java new file mode 100644 index 00000000000..5f69c382b7f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/expression/CaseInsensitiveContainsExprMacro.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprMacroTable; + +import java.util.List; + +/** + * This class implements a function that checks if one string contains another string. It is required that second + * string be a literal. This expression is case-insensitive. + * signature: + * long contains_string(string, string) + *

+ * Examples: + * - {@code contains_string("foobar", "bar") - 1 } + * - {@code contains_string("foobar", "car") - 0 } + * - {@code contains_string("foobar", "Bar") - 1 } + *

+ * See {@link ContainsExprMacro} for the case-sensitive version. + */ + +public class CaseInsensitiveContainsExprMacro implements ExprMacroTable.ExprMacro +{ + public static final String FN_NAME = "icontains_string"; + + @Override + public String name() + { + return FN_NAME; + } + + @Override + public Expr apply(final List args) + { + if (args.size() != 2) { + throw new IAE("Function[%s] must have 2 arguments", name()); + } + + final Expr arg = args.get(0); + final Expr searchStr = args.get(1); + return new ContainsExpr(FN_NAME, arg, searchStr, false); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/expression/ContainsExpr.java b/processing/src/main/java/org/apache/druid/query/expression/ContainsExpr.java new file mode 100644 index 00000000000..f9550f32429 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/expression/ContainsExpr.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.ExprType; + +import javax.annotation.Nonnull; +import java.util.function.Function; + +/** + * {@link Expr} class returned by {@link ContainsExprMacro} and {@link CaseInsensitiveContainsExprMacro} for + * evaluating the expression. + */ +class ContainsExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr +{ + private final Function searchFunction; + private final Expr searchStrExpr; + + ContainsExpr(String functioName, Expr arg, Expr searchStrExpr, boolean caseSensitive) + { + super(functioName, arg); + this.searchStrExpr = validateSearchExpr(searchStrExpr, functioName); + // Creates the function eagerly to avoid branching in eval. + this.searchFunction = createFunction(searchStrExpr, caseSensitive); + } + + private ContainsExpr(String functioName, Expr arg, Expr searchStrExpr, Function searchFunction) + { + super(functioName, arg); + this.searchFunction = searchFunction; + this.searchStrExpr = validateSearchExpr(searchStrExpr, functioName); + } + + @Nonnull + @Override + public ExprEval eval(final Expr.ObjectBinding bindings) + { + final String s = NullHandling.nullToEmptyIfNeeded(arg.eval(bindings).asString()); + + if (s == null) { + // same behavior as regexp_like. + return ExprEval.of(false, ExprType.LONG); + } else { + final boolean doesContain = searchFunction.apply(s); + return ExprEval.of(doesContain, ExprType.LONG); + } + } + + @Override + public Expr visit(Expr.Shuttle shuttle) + { + Expr newArg = arg.visit(shuttle); + return shuttle.visit(new ContainsExpr(name, newArg, searchStrExpr, searchFunction)); + } + + @Override + public String stringify() + { + return StringUtils.format("%s(%s, %s)", name, arg.stringify(), searchStrExpr.stringify()); + } + + private Function createFunction(Expr searchStrExpr, boolean caseSensitive) + { + String searchStr = StringUtils.nullToEmptyNonDruidDataString((String) searchStrExpr.getLiteralValue()); + if (caseSensitive) { + return s -> s.contains(searchStr); + } + return s -> org.apache.commons.lang.StringUtils.containsIgnoreCase(s, searchStr); + } + + private Expr validateSearchExpr(Expr searchExpr, String functioName) + { + if (!ExprUtils.isStringLiteral(searchExpr)) { + throw new IAE("Function[%s] substring must be a string literal", functioName); + } + return searchExpr; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/expression/ContainsExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/ContainsExprMacro.java new file mode 100644 index 00000000000..a1744f63b71 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/expression/ContainsExprMacro.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprMacroTable; + +import java.util.List; + +/** + * This class implements a function that checks if one string contains another string. It is required that second + * string be a literal. This expression is case-sensitive. + * signature: + * long contains_string(string, string) + *

+ * Examples: + * - {@code contains_string("foobar", "bar") - 1 } + * - {@code contains_string("foobar", "car") - 0 } + * - {@code contains_string("foobar", "Bar") - 0 } + *

+ * See {@link CaseInsensitiveContainsExprMacro} for the case-insensitive version. + */ +public class ContainsExprMacro implements ExprMacroTable.ExprMacro +{ + public static final String FN_NAME = "contains_string"; + + @Override + public String name() + { + return FN_NAME; + } + + @Override + public Expr apply(final List args) + { + if (args.size() != 2) { + throw new IAE("Function[%s] must have 2 arguments", name()); + } + + final Expr arg = args.get(0); + final Expr searchStr = args.get(1); + return new ContainsExpr(FN_NAME, arg, searchStr, true); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/expression/CaseInsensitiveExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/CaseInsensitiveExprMacroTest.java new file mode 100644 index 00000000000..1722ad32fe0 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/expression/CaseInsensitiveExprMacroTest.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprType; +import org.apache.druid.math.expr.Parser; +import org.junit.Assert; +import org.junit.Test; + +public class CaseInsensitiveExprMacroTest extends MacroTestBase +{ + public CaseInsensitiveExprMacroTest() + { + super(new CaseInsensitiveContainsExprMacro()); + } + + @Test + public void testErrorZeroArguments() + { + expectException(IllegalArgumentException.class, "Function[icontains_string] must have 2 arguments"); + eval("icontains_string()", Parser.withMap(ImmutableMap.of())); + } + + @Test + public void testErrorThreeArguments() + { + expectException(IllegalArgumentException.class, "Function[icontains_string] must have 2 arguments"); + eval("icontains_string('a', 'b', 'c')", Parser.withMap(ImmutableMap.of())); + } + + @Test + public void testMatchSearchLowerCase() + { + final ExprEval result = eval("icontains_string(a, 'OBA')", Parser.withMap(ImmutableMap.of("a", "foobar"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testMatchSearchUpperCase() + { + final ExprEval result = eval("icontains_string(a, 'oba')", Parser.withMap(ImmutableMap.of("a", "FOOBAR"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNoMatch() + { + final ExprEval result = eval("icontains_string(a, 'bar')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(false, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullSearch() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[icontains_string] substring must be a string literal"); + } + + final ExprEval result = eval("icontains_string(a, null)", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringSearch() + { + final ExprEval result = eval("icontains_string(a, '')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullSearchOnEmptyString() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[icontains_string] substring must be a string literal"); + } + + final ExprEval result = eval("icontains_string(a, null)", Parser.withMap(ImmutableMap.of("a", ""))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringSearchOnEmptyString() + { + final ExprEval result = eval("icontains_string(a, '')", Parser.withMap(ImmutableMap.of("a", ""))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullSearchOnNull() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[icontains_string] substring must be a string literal"); + } + + final ExprEval result = eval( + "icontains_string(a, null)", + Parser.withSuppliers(ImmutableMap.of("a", () -> null)) + ); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringSearchOnNull() + { + final ExprEval result = eval("icontains_string(a, '')", Parser.withSuppliers(ImmutableMap.of("a", () -> null))); + Assert.assertEquals( + ExprEval.of(!NullHandling.sqlCompatible(), ExprType.LONG).value(), + result.value() + ); + } + +} diff --git a/processing/src/test/java/org/apache/druid/query/expression/ContainsExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/ContainsExprMacroTest.java new file mode 100644 index 00000000000..bfbff7d0ab5 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/expression/ContainsExprMacroTest.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprType; +import org.apache.druid.math.expr.Parser; +import org.junit.Assert; +import org.junit.Test; + +public class ContainsExprMacroTest extends MacroTestBase +{ + public ContainsExprMacroTest() + { + super(new ContainsExprMacro()); + } + + @Test + public void testErrorZeroArguments() + { + expectException(IllegalArgumentException.class, "Function[contains_string] must have 2 arguments"); + eval("contains_string()", Parser.withMap(ImmutableMap.of())); + } + + @Test + public void testErrorThreeArguments() + { + expectException(IllegalArgumentException.class, "Function[contains_string] must have 2 arguments"); + eval("contains_string('a', 'b', 'c')", Parser.withMap(ImmutableMap.of())); + } + + @Test + public void testMatch() + { + final ExprEval result = eval("contains_string(a, 'oba')", Parser.withMap(ImmutableMap.of("a", "foobar"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNoMatch() + { + final ExprEval result = eval("contains_string(a, 'bar')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(false, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullSearch() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[contains_string] substring must be a string literal"); + } + + final ExprEval result = eval("contains_string(a, null)", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringSearch() + { + final ExprEval result = eval("contains_string(a, '')", Parser.withMap(ImmutableMap.of("a", "foo"))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullSearchOnEmptyString() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[contains_string] substring must be a string literal"); + } + + final ExprEval result = eval("contains_string(a, null)", Parser.withMap(ImmutableMap.of("a", ""))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringSearchOnEmptyString() + { + final ExprEval result = eval("contains_string(a, '')", Parser.withMap(ImmutableMap.of("a", ""))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testNullSearchOnNull() + { + if (NullHandling.sqlCompatible()) { + expectException(IllegalArgumentException.class, "Function[contains_string] substring must be a string literal"); + } + + final ExprEval result = eval("contains_string(a, null)", Parser.withSuppliers(ImmutableMap.of("a", () -> null))); + Assert.assertEquals( + ExprEval.of(true, ExprType.LONG).value(), + result.value() + ); + } + + @Test + public void testEmptyStringSearchOnNull() + { + final ExprEval result = eval("contains_string(a, '')", Parser.withSuppliers(ImmutableMap.of("a", () -> null))); + Assert.assertEquals( + ExprEval.of(!NullHandling.sqlCompatible(), ExprType.LONG).value(), + result.value() + ); + } +} diff --git a/server/src/main/java/org/apache/druid/guice/ExpressionModule.java b/server/src/main/java/org/apache/druid/guice/ExpressionModule.java index 9e451e8f12f..7a25f92e37b 100644 --- a/server/src/main/java/org/apache/druid/guice/ExpressionModule.java +++ b/server/src/main/java/org/apache/druid/guice/ExpressionModule.java @@ -25,6 +25,8 @@ import com.google.inject.Binder; import com.google.inject.multibindings.Multibinder; import org.apache.druid.initialization.DruidModule; import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.expression.CaseInsensitiveContainsExprMacro; +import org.apache.druid.query.expression.ContainsExprMacro; import org.apache.druid.query.expression.GuiceExprMacroTable; import org.apache.druid.query.expression.IPv4AddressMatchExprMacro; import org.apache.druid.query.expression.IPv4AddressParseExprMacro; @@ -52,6 +54,8 @@ public class ExpressionModule implements DruidModule .add(LikeExprMacro.class) .add(RegexpExtractExprMacro.class) .add(RegexpLikeExprMacro.class) + .add(ContainsExprMacro.class) + .add(CaseInsensitiveContainsExprMacro.class) .add(TimestampCeilExprMacro.class) .add(TimestampExtractExprMacro.class) .add(TimestampFloorExprMacro.class) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ContainsOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ContainsOperatorConversion.java new file mode 100644 index 00000000000..aca2260de94 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ContainsOperatorConversion.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.expression.builtin; + +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.expression.CaseInsensitiveContainsExprMacro; +import org.apache.druid.query.expression.ContainsExprMacro; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.SearchQueryDimFilter; +import org.apache.druid.query.search.ContainsSearchQuerySpec; +import org.apache.druid.query.search.SearchQuerySpec; +import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.expression.OperatorConversions; +import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * Register {@code contains_string} and {@code icontains_string} functions with calcite that internally + * translate these functions into {@link SearchQueryDimFilter} with {@link ContainsSearchQuerySpec} as + * search query spec. + */ +public class ContainsOperatorConversion implements SqlOperatorConversion +{ + private final SqlOperator operator; + private final boolean caseSensitive; + + private ContainsOperatorConversion( + final SqlFunction sqlFunction, + final boolean caseSensitive + ) + { + this.operator = sqlFunction; + this.caseSensitive = caseSensitive; + } + + public static SqlOperatorConversion caseSensitive() + { + final SqlFunction sqlFunction = createSqlFunction(ContainsExprMacro.FN_NAME); + return new ContainsOperatorConversion(sqlFunction, true); + } + + public static SqlOperatorConversion caseInsensitive() + { + final SqlFunction sqlFunction = createSqlFunction(CaseInsensitiveContainsExprMacro.FN_NAME); + return new ContainsOperatorConversion(sqlFunction, false); + } + + private static SqlFunction createSqlFunction(final String functionName) + { + return OperatorConversions + .operatorBuilder(StringUtils.toUpperCase(functionName)) + .operandTypes(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER) + .requiredOperands(2) + .literalOperands(1) + .returnTypeNonNull(SqlTypeName.BOOLEAN) + .functionCategory(SqlFunctionCategory.STRING) + .build(); + } + + @Override + public SqlOperator calciteOperator() + { + return operator; + } + + @Nullable + @Override + public DruidExpression toDruidExpression( + PlannerContext plannerContext, + RowSignature rowSignature, + RexNode rexNode + ) + { + return OperatorConversions.convertCall( + plannerContext, + rowSignature, + rexNode, + operands -> DruidExpression.fromExpression(DruidExpression.functionCall( + StringUtils.toLowerCase(operator.getName()), + operands + )) + ); + } + + @Nullable + @Override + public DimFilter toDruidFilter( + PlannerContext plannerContext, + RowSignature rowSignature, + @Nullable VirtualColumnRegistry virtualColumnRegistry, + RexNode rexNode + ) + { + final List operands = ((RexCall) rexNode).getOperands(); + final DruidExpression druidExpression = Expressions.toDruidExpression( + plannerContext, + rowSignature, + operands.get(0) + ); + + if (druidExpression == null) { + return null; + } + + final String search = RexLiteral.stringValue(operands.get(1)); + final SearchQuerySpec spec = new ContainsSearchQuerySpec(search, caseSensitive); + + if (druidExpression.isSimpleExtraction()) { + return new SearchQueryDimFilter( + druidExpression.getSimpleExtraction().getColumn(), + spec, + druidExpression.getSimpleExtraction().getExtractionFn(), + null + ); + } else if (virtualColumnRegistry != null) { + VirtualColumn v = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( + plannerContext, + druidExpression, + operands.get(0).getType() + ); + + return new SearchQueryDimFilter( + v.getOutputName(), spec, null, null); + } else { + return null; + } + } + +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java index 3f7699fa9f1..e1fb0b48fe5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java @@ -61,6 +61,7 @@ import org.apache.druid.sql.calcite.expression.builtin.BTrimOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.CastOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.CeilOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ConcatOperatorConversion; +import org.apache.druid.sql.calcite.expression.builtin.ContainsOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.DateTruncOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.ExtractOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.FloorOperatorConversion; @@ -181,6 +182,8 @@ public class DruidOperatorTable implements SqlOperatorTable .add(new AliasedOperatorConversion(new TruncateOperatorConversion(), "TRUNC")) .add(new LPadOperatorConversion()) .add(new RPadOperatorConversion()) + .add(ContainsOperatorConversion.caseSensitive()) + .add(ContainsOperatorConversion.caseInsensitive()) .build(); private static final List VALUE_COERCION_OPERATOR_CONVERSIONS = diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java index 1d842175ef4..6e05ce8b49c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionTestHelper.java @@ -38,7 +38,9 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.segment.RowAdapters; import org.apache.druid.segment.RowBasedColumnSelectorFactory; import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.virtual.VirtualizedColumnSelectorFactory; import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; @@ -283,11 +285,14 @@ class ExpressionTestHelper ); final ValueMatcher matcher = expectedFilter.toFilter().makeMatcher( - RowBasedColumnSelectorFactory.create( - RowAdapters.standardRow(), - () -> new MapBasedRow(0L, bindings), - rowSignature, - false + new VirtualizedColumnSelectorFactory( + RowBasedColumnSelectorFactory.create( + RowAdapters.standardRow(), + () -> new MapBasedRow(0L, bindings), + rowSignature, + false + ), + VirtualColumns.create(virtualColumns) ) ); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java index 9975cffe64c..bf4bd4a0cb5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java @@ -35,9 +35,12 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.extraction.RegexDimExtractionFn; import org.apache.druid.query.filter.RegexDimFilter; +import org.apache.druid.query.filter.SearchQueryDimFilter; +import org.apache.druid.query.search.ContainsSearchQuerySpec; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; +import org.apache.druid.sql.calcite.expression.builtin.ContainsOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.DateTruncOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.LPadOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.LeftOperatorConversion; @@ -1072,6 +1075,231 @@ public class ExpressionsTest extends ExpressionTestBase ); } + @Test + public void testContains() + { + testHelper.testExpression( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("there") + ), + DruidExpression.fromExpression("contains_string(\"spacey\",'there')"), + 1L + ); + + testHelper.testExpression( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("There") + ), + DruidExpression.fromExpression("contains_string(\"spacey\",'There')"), + 0L + ); + + testHelper.testExpression( + ContainsOperatorConversion.caseInsensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("There") + ), + DruidExpression.fromExpression("icontains_string(\"spacey\",'There')"), + 1L + ); + + testHelper.testExpression( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("what is"), + testHelper.makeInputRef("spacey") + ), + testHelper.makeLiteral("what") + ), + DruidExpression.fromExpression("contains_string(concat('what is',\"spacey\"),'what')"), + 1L + ); + + testHelper.testExpression( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("what is"), + testHelper.makeInputRef("spacey") + ), + testHelper.makeLiteral("there") + ), + DruidExpression.fromExpression("contains_string(concat('what is',\"spacey\"),'there')"), + 1L + ); + + testHelper.testExpression( + ContainsOperatorConversion.caseInsensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("what is"), + testHelper.makeInputRef("spacey") + ), + testHelper.makeLiteral("There") + ), + DruidExpression.fromExpression("icontains_string(concat('what is',\"spacey\"),'There')"), + 1L + ); + + testHelper.testExpression( + SqlStdOperatorTable.AND, + ImmutableList.of( + testHelper.makeCall( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("there") + ), + testHelper.makeCall( + SqlStdOperatorTable.EQUALS, + testHelper.makeLiteral("yes"), + testHelper.makeLiteral("yes") + ) + ), + DruidExpression.fromExpression("(contains_string(\"spacey\",'there') && ('yes' == 'yes'))"), + 1L + ); + + testHelper.testExpression( + SqlStdOperatorTable.AND, + ImmutableList.of( + testHelper.makeCall( + ContainsOperatorConversion.caseInsensitive().calciteOperator(), + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("There") + ), + testHelper.makeCall( + SqlStdOperatorTable.EQUALS, + testHelper.makeLiteral("yes"), + testHelper.makeLiteral("yes") + ) + ), + DruidExpression.fromExpression("(icontains_string(\"spacey\",'There') && ('yes' == 'yes'))"), + 1L + ); + } + + @Test + public void testContainsAsFilter() + { + testHelper.testFilter( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("there") + ), + Collections.emptyList(), + new SearchQueryDimFilter("spacey", new ContainsSearchQuerySpec("there", true), null), + true + ); + + testHelper.testFilter( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("There") + ), + Collections.emptyList(), + new SearchQueryDimFilter("spacey", new ContainsSearchQuerySpec("There", true), null), + false + ); + + testHelper.testFilter( + ContainsOperatorConversion.caseInsensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("There") + ), + Collections.emptyList(), + new SearchQueryDimFilter("spacey", new ContainsSearchQuerySpec("There", false), null), + true + ); + + testHelper.testFilter( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("what is"), + testHelper.makeInputRef("spacey") + ), + testHelper.makeLiteral("what") + ), + ImmutableList.of( + new ExpressionVirtualColumn( + "v0", + "concat('what is',\"spacey\")", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ), + new SearchQueryDimFilter("v0", new ContainsSearchQuerySpec("what", true), null), + true + ); + + testHelper.testFilter( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("what is"), + testHelper.makeInputRef("spacey") + ), + testHelper.makeLiteral("there") + ), + ImmutableList.of( + new ExpressionVirtualColumn( + "v0", + "concat('what is',\"spacey\")", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ), + new SearchQueryDimFilter("v0", new ContainsSearchQuerySpec("there", true), null), + true + ); + + testHelper.testFilter( + ContainsOperatorConversion.caseInsensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeCall( + SqlStdOperatorTable.CONCAT, + testHelper.makeLiteral("what is"), + testHelper.makeInputRef("spacey") + ), + testHelper.makeLiteral("What") + ), + ImmutableList.of( + new ExpressionVirtualColumn( + "v0", + "concat('what is',\"spacey\")", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ), + new SearchQueryDimFilter("v0", new ContainsSearchQuerySpec("What", false), null), + true + ); + + testHelper.testFilter( + ContainsOperatorConversion.caseSensitive().calciteOperator(), + ImmutableList.of( + testHelper.makeInputRef("spacey"), + testHelper.makeLiteral("") + ), + Collections.emptyList(), + new SearchQueryDimFilter("spacey", new ContainsSearchQuerySpec("", true), null), + true + ); + } @Test public void testTimeFloor() diff --git a/website/.spelling b/website/.spelling index 14fd910f4a7..d9698fb54ee 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1095,6 +1095,8 @@ nvl parse_long regexp_extract regexp_like +contains_string +icontains_string result1 result2 rint