SQL: Add support for single parameter text manipulating functions (#31874)

Added support for ASCII, BIT_LENGTH, CHAR, CHAR_LENGTH, LCASE, LENGTH, LTRIM, RTRIM, SPACE, UCASE functions.
Wherever Painless scripting is necessary (WHERE conditions, ORDER BY etc), those scripts are being used.
This commit is contained in:
Andrei Stefan 2018-07-12 15:05:42 +03:00 committed by GitHub
parent 2cfe703299
commit edf83c1d87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 1227 additions and 3 deletions

View File

@ -58,6 +58,16 @@ import org.elasticsearch.xpack.sql.expression.function.scalar.math.Sin;
import org.elasticsearch.xpack.sql.expression.function.scalar.math.Sinh;
import org.elasticsearch.xpack.sql.expression.function.scalar.math.Sqrt;
import org.elasticsearch.xpack.sql.expression.function.scalar.math.Tan;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Ascii;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.BitLength;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Char;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.CharLength;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.LCase;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.LTrim;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Length;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.RTrim;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Space;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.UCase;
import org.elasticsearch.xpack.sql.parser.ParsingException;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.util.StringUtils;
@ -134,6 +144,17 @@ public class FunctionRegistry {
def(Sinh.class, Sinh::new),
def(Sqrt.class, Sqrt::new),
def(Tan.class, Tan::new),
// String
def(Ascii.class, Ascii::new),
def(Char.class, Char::new),
def(BitLength.class, BitLength::new),
def(CharLength.class, CharLength::new),
def(LCase.class, LCase::new),
def(Length.class, Length::new),
def(LTrim.class, LTrim::new),
def(RTrim.class, RTrim::new),
def(Space.class, Space::new),
def(UCase.class, UCase::new),
// Special
def(Score.class, Score::new)));
@ -299,6 +320,7 @@ public class FunctionRegistry {
T build(Location location, Expression lhs, Expression rhs);
}
@SuppressWarnings("overloads")
private static FunctionDefinition def(Class<? extends Function> function, FunctionBuilder builder,
boolean datetime, String... aliases) {
String primaryName = normalize(function.getSimpleName());

View File

@ -17,6 +17,7 @@ import org.elasticsearch.xpack.sql.expression.function.scalar.processor.runtime.
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.runtime.ConstantProcessor;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.runtime.HitExtractorProcessor;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.runtime.Processor;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor;
import java.util.ArrayList;
import java.util.List;
@ -46,6 +47,8 @@ public final class Processors {
entries.add(new Entry(Processor.class, DateTimeProcessor.NAME, DateTimeProcessor::new));
// math
entries.add(new Entry(Processor.class, MathProcessor.NAME, MathProcessor::new));
// string
entries.add(new Entry(Processor.class, StringProcessor.NAME, StringProcessor::new));
return entries;
}
}

View File

@ -30,7 +30,7 @@ public class ATan2 extends BinaryNumericFunction {
}
@Override
protected NodeInfo<? extends Expression> info() {
protected NodeInfo<ATan2> info() {
return NodeInfo.create(this, ATan2::new, left(), right());
}

View File

@ -26,7 +26,7 @@ public class Power extends BinaryNumericFunction {
}
@Override
protected NodeInfo<? extends Expression> info() {
protected NodeInfo<Power> info() {
return NodeInfo.create(this, Power::new, left(), right());
}

View File

@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Returns the ASCII code of the leftmost character of the given (char) expression.
*/
public class Ascii extends UnaryStringFunction {
public Ascii(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<Ascii> info() {
return NodeInfo.create(this, Ascii::new, field());
}
@Override
protected Ascii replaceChild(Expression newChild) {
return new Ascii(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.ASCII;
}
@Override
public DataType dataType() {
return DataType.INTEGER;
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Returns returns the number of bits contained within the value expression.
*/
public class BitLength extends UnaryStringFunction {
public BitLength(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<BitLength> info() {
return NodeInfo.create(this, BitLength::new, field());
}
@Override
protected BitLength replaceChild(Expression newChild) {
return new BitLength(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.BIT_LENGTH;
}
@Override
public DataType dataType() {
//TODO investigate if a data type Long (BIGINT) wouldn't be more appropriate here
return DataType.INTEGER;
}
}

View File

@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Converts an int ASCII code to a character value.
*/
public class Char extends UnaryStringIntFunction {
public Char(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<Char> info() {
return NodeInfo.create(this, Char::new, field());
}
@Override
protected Char replaceChild(Expression newChild) {
return new Char(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.CHAR;
}
@Override
public DataType dataType() {
return DataType.KEYWORD;
}
}

View File

@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Returns the length (in characters) of the string expression.
*/
public class CharLength extends UnaryStringFunction {
public CharLength(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<CharLength> info() {
return NodeInfo.create(this, CharLength::new, field());
}
@Override
protected CharLength replaceChild(Expression newChild) {
return new CharLength(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.CHAR_LENGTH;
}
@Override
public DataType dataType() {
return DataType.INTEGER;
}
}

View File

@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Lowercases all uppercase letters in a string.
*/
public class LCase extends UnaryStringFunction {
public LCase(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<LCase> info() {
return NodeInfo.create(this, LCase::new, field());
}
@Override
protected LCase replaceChild(Expression newChild) {
return new LCase(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.LCASE;
}
@Override
public DataType dataType() {
return DataType.KEYWORD;
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Trims the leading whitespaces.
*/
public class LTrim extends UnaryStringFunction {
public LTrim(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<LTrim> info() {
return NodeInfo.create(this, LTrim::new, field());
}
@Override
protected LTrim replaceChild(Expression newChild) {
return new LTrim(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.LTRIM;
}
@Override
public DataType dataType() {
return DataType.KEYWORD;
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Returns the length (number of characters) in a string, excluding the trailing blanks.
*/
public class Length extends UnaryStringFunction {
public Length(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<Length> info() {
return NodeInfo.create(this, Length::new, field());
}
@Override
protected Length replaceChild(Expression newChild) {
return new Length(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.LENGTH;
}
@Override
public DataType dataType() {
return DataType.INTEGER;
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Trims the trailing whitespaces.
*/
public class RTrim extends UnaryStringFunction {
public RTrim(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<RTrim> info() {
return NodeInfo.create(this, RTrim::new, field());
}
@Override
protected RTrim replaceChild(Expression newChild) {
return new RTrim(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.RTRIM;
}
@Override
public DataType dataType() {
return DataType.KEYWORD;
}
}

View File

@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Generates a string consisting of count spaces.
*/
public class Space extends UnaryStringIntFunction {
public Space(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<Space> info() {
return NodeInfo.create(this, Space::new, field());
}
@Override
protected Space replaceChild(Expression newChild) {
return new Space(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.SPACE;
}
@Override
public DataType dataType() {
return DataType.KEYWORD;
}
}

View File

@ -0,0 +1,51 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
abstract class StringFunctionUtils {
/**
* Trims the trailing whitespace characters from the given String. Uses {@link java.lang.Character.isWhitespace(char)}
* to determine if a character is whitespace or not.
*
* @param s the original String
* @return the resulting String
*/
static String trimTrailingWhitespaces(String s) {
if (!hasLength(s)) {
return s;
}
StringBuilder sb = new StringBuilder(s);
while (sb.length() > 0 && Character.isWhitespace(sb.charAt(sb.length() - 1))) {
sb.deleteCharAt(sb.length() - 1);
}
return sb.toString();
}
/**
* Trims the leading whitespace characters from the given String. Uses {@link java.lang.Character.isWhitespace(char)}
* to determine if a character is whitespace or not.
*
* @param s the original String
* @return the resulting String
*/
static String trimLeadingWhitespaces(String s) {
if (!hasLength(s)) {
return s;
}
StringBuilder sb = new StringBuilder(s);
while (sb.length() > 0 && Character.isWhitespace(sb.charAt(0))) {
sb.deleteCharAt(0);
}
return sb.toString();
}
private static boolean hasLength(String s) {
return (s != null && s.length() > 0);
}
}

View File

@ -0,0 +1,160 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.runtime.Processor;
import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
import java.util.function.Function;
public class StringProcessor implements Processor {
private interface StringFunction<R> {
default R apply(Object o) {
if (!(o instanceof String || o instanceof Character)) {
throw new SqlIllegalArgumentException("A string/char is required; received [{}]", o);
}
return doApply(o.toString());
}
R doApply(String s);
}
private interface NumericFunction<R> {
default R apply(Object o) {
if (!(o instanceof Number)) {
throw new SqlIllegalArgumentException("A number is required; received [{}]", o);
}
return doApply((Number) o);
}
R doApply(Number s);
}
public enum StringOperation {
ASCII((String s) -> s.length() == 0 ? null : Integer.valueOf(s.charAt(0))),
CHAR((Number n) -> {
int i = n.intValue();
return i < 0 || i > 255 ? null : String.valueOf((char) i);
}),
LCASE((String s) -> s.toLowerCase(Locale.ROOT)),
UCASE((String s) -> s.toUpperCase(Locale.ROOT)),
LENGTH((String s) -> StringFunctionUtils.trimTrailingWhitespaces(s).length()),
RTRIM((String s) -> StringFunctionUtils.trimTrailingWhitespaces(s)),
LTRIM((String s) -> StringFunctionUtils.trimLeadingWhitespaces(s)),
SPACE((Number n) -> {
int i = n.intValue();
if (i < 0) {
return null;
};
char[] spaces = new char[i];
char whitespace = ' ';
Arrays.fill(spaces, whitespace);
return new String(spaces);
}),
BIT_LENGTH((String s) -> UnicodeUtil.calcUTF16toUTF8Length(s, 0, s.length()) * 8),
CHAR_LENGTH(String::length);
private final Function<Object, Object> apply;
StringOperation(StringFunction<Object> apply) {
this.apply = l -> l == null ? null : apply.apply(l);
}
StringOperation(NumericFunction<Object> apply) {
this.apply = l -> l == null ? null : apply.apply((l));
}
StringOperation(Function<Object, Object> apply) {
this(apply, false);
}
/**
* Wrapper for nulls around the given function.
* If true, nulls are passed through, otherwise the function is short-circuited
* and null returned.
*/
StringOperation(Function<Object, Object> apply, boolean nullAware) {
if (nullAware) {
this.apply = apply;
} else {
this.apply = l -> l == null ? null : apply.apply(l);
}
}
public final Object apply(Object l) {
return apply.apply(l);
}
/**
* "translate" the function name ("char") into a function name that is not a reserved keyword in java.
* Used in {@code InternalSqlScriptUtils#character(Number)}.
*/
@Override
public String toString() {
return this == CHAR ? "character" : super.toString();
}
}
public static final String NAME = "s";
private final StringOperation processor;
public StringProcessor(StringOperation processor) {
this.processor = processor;
}
public StringProcessor(StreamInput in) throws IOException {
processor = in.readEnum(StringOperation.class);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeEnum(processor);
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public Object process(Object input) {
return processor.apply(input);
}
StringOperation processor() {
return processor;
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != getClass()) {
return false;
}
StringProcessor other = (StringProcessor) obj;
return processor == other.processor;
}
@Override
public int hashCode() {
return processor.hashCode();
}
@Override
public String toString() {
return processor.toString();
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Uppercases all lowercase letters in a string.
*/
public class UCase extends UnaryStringFunction {
public UCase(Location location, Expression field) {
super(location, field);
}
@Override
protected NodeInfo<UCase> info() {
return NodeInfo.create(this, UCase::new, field());
}
@Override
protected UCase replaceChild(Expression newChild) {
return new UCase(location(), newChild);
}
@Override
protected StringOperation operation() {
return StringOperation.UCASE;
}
@Override
public DataType dataType() {
return DataType.KEYWORD;
}
}

View File

@ -0,0 +1,89 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.FieldAttribute;
import org.elasticsearch.xpack.sql.expression.function.scalar.UnaryScalarFunction;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.definition.ProcessorDefinition;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.definition.ProcessorDefinitions;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.definition.UnaryProcessorDefinition;
import org.elasticsearch.xpack.sql.expression.function.scalar.script.ScriptTemplate;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.util.StringUtils;
import java.util.Locale;
import java.util.Objects;
import static java.lang.String.format;
import static org.elasticsearch.xpack.sql.expression.function.scalar.script.ParamsBuilder.paramsBuilder;
public abstract class UnaryStringFunction extends UnaryScalarFunction {
protected UnaryStringFunction(Location location, Expression field) {
super(location, field);
}
@Override
public boolean foldable() {
return field().foldable();
}
@Override
public Object fold() {
return operation().apply(field().fold());
}
@Override
protected TypeResolution resolveType() {
if (!childrenResolved()) {
return new TypeResolution("Unresolved children");
}
return field().dataType().isString() ? TypeResolution.TYPE_RESOLVED : new TypeResolution(
"'%s' requires a string type, received %s", operation(), field().dataType().esType);
}
@Override
protected final ProcessorDefinition makeProcessorDefinition() {
return new UnaryProcessorDefinition(location(), this, ProcessorDefinitions.toProcessorDefinition(field()),
new StringProcessor(operation()));
}
protected abstract StringOperation operation();
@Override
protected ScriptTemplate asScriptFrom(FieldAttribute field) {
//TODO change this to use _source instead of the exact form (aka field.keyword for text fields)
return new ScriptTemplate(formatScript("doc[{}].value"),
paramsBuilder().variable(field.isInexact() ? field.exactAttribute().name() : field.name()).build(),
dataType());
}
@Override
protected String formatScript(String template) {
// basically, transform the script to InternalSqlScriptUtils.[function_name](other_function_or_field_name)
return super.formatScript(
format(Locale.ROOT, "{sql}.%s(%s)",
StringUtils.underscoreToLowerCamelCase(operation().toString()),
template));
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != getClass()) {
return false;
}
UnaryStringFunction other = (UnaryStringFunction) obj;
return Objects.equals(other.field(), field());
}
@Override
public int hashCode() {
return Objects.hash(field());
}
}

View File

@ -0,0 +1,90 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.FieldAttribute;
import org.elasticsearch.xpack.sql.expression.function.scalar.UnaryScalarFunction;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.definition.ProcessorDefinition;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.definition.ProcessorDefinitions;
import org.elasticsearch.xpack.sql.expression.function.scalar.processor.definition.UnaryProcessorDefinition;
import org.elasticsearch.xpack.sql.expression.function.scalar.script.ScriptTemplate;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.util.StringUtils;
import java.util.Locale;
import java.util.Objects;
import static java.lang.String.format;
import static org.elasticsearch.xpack.sql.expression.function.scalar.script.ParamsBuilder.paramsBuilder;
/**
* Base unary function for text manipulating SQL functions that receive as parameter a number
*/
public abstract class UnaryStringIntFunction extends UnaryScalarFunction {
protected UnaryStringIntFunction(Location location, Expression field) {
super(location, field);
}
@Override
public boolean foldable() {
return field().foldable();
}
@Override
public Object fold() {
return operation().apply(field().fold());
}
@Override
protected TypeResolution resolveType() {
if (!childrenResolved()) {
return new TypeResolution("Unresolved children");
}
return field().dataType().isInteger ? TypeResolution.TYPE_RESOLVED : new TypeResolution(
"'%s' requires a integer type, received %s", operation(), field().dataType().esType);
}
@Override
protected final ProcessorDefinition makeProcessorDefinition() {
return new UnaryProcessorDefinition(location(), this, ProcessorDefinitions.toProcessorDefinition(field()),
new StringProcessor(operation()));
}
protected abstract StringOperation operation();
@Override
protected ScriptTemplate asScriptFrom(FieldAttribute field) {
return new ScriptTemplate(formatScript("doc[{}].value"),
paramsBuilder().variable(field.name()).build(),
dataType());
}
@Override
protected String formatScript(String template) {
return super.formatScript(
format(Locale.ROOT, "{sql}.%s(%s)",
StringUtils.underscoreToLowerCamelCase(operation().toString()),
template));
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != getClass()) {
return false;
}
UnaryStringIntFunction other = (UnaryStringIntFunction) obj;
return Objects.equals(other.field(), field());
}
@Override
public int hashCode() {
return Objects.hash(field());
}
}

View File

@ -6,6 +6,7 @@
package org.elasticsearch.xpack.sql.expression.function.scalar.whitelist;
import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeFunction;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
/**
* Whitelisted class for SQL scripts.
@ -19,4 +20,44 @@ public final class InternalSqlScriptUtils {
public static Integer dateTimeChrono(long millis, String tzId, String chronoName) {
return DateTimeFunction.dateTimeChrono(millis, tzId, chronoName);
}
public static Integer ascii(String s) {
return (Integer) StringOperation.ASCII.apply(s);
}
public static Integer bitLength(String s) {
return (Integer) StringOperation.BIT_LENGTH.apply(s);
}
public static String character(Number n) {
return (String) StringOperation.CHAR.apply(n);
}
public static Integer charLength(String s) {
return (Integer) StringOperation.CHAR_LENGTH.apply(s);
}
public static String lcase(String s) {
return (String) StringOperation.LCASE.apply(s);
}
public static String ucase(String s) {
return (String) StringOperation.UCASE.apply(s);
}
public static Integer length(String s) {
return (Integer) StringOperation.LENGTH.apply(s);
}
public static String rtrim(String s) {
return (String) StringOperation.RTRIM.apply(s);
}
public static String ltrim(String s) {
return (String) StringOperation.LTRIM.apply(s);
}
public static String space(Number n) {
return (String) StringOperation.SPACE.apply(n);
}
}

View File

@ -414,6 +414,9 @@ abstract class QueryTranslator {
FieldAttribute fa = (FieldAttribute) e.left();
inexact = fa.isInexact();
target = nameOf(inexact ? fa : fa.exactAttribute());
} else {
throw new SqlIllegalArgumentException("Scalar function ({}) not allowed (yet) as arguments for LIKE",
Expressions.name(e.left()));
}
if (e instanceof Like) {

View File

@ -58,6 +58,33 @@ public abstract class StringUtils {
return sb.toString().toUpperCase(Locale.ROOT);
}
//CAMEL_CASE to camelCase
public static String underscoreToLowerCamelCase(String string) {
if (!Strings.hasText(string)) {
return EMPTY;
}
StringBuilder sb = new StringBuilder();
String s = string.trim().toLowerCase(Locale.ROOT);
boolean previousCharWasUnderscore = false;
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
if (ch == '_') {
previousCharWasUnderscore = true;
}
else {
if (previousCharWasUnderscore) {
sb.append(Character.toUpperCase(ch));
previousCharWasUnderscore = false;
}
else {
sb.append(ch);
}
}
}
return sb.toString();
}
public static String nullAsEmpty(String string) {
return string == null ? EMPTY : string;
}

View File

@ -9,4 +9,14 @@
class org.elasticsearch.xpack.sql.expression.function.scalar.whitelist.InternalSqlScriptUtils {
Integer dateTimeChrono(long, String, String)
Integer ascii(String)
Integer bitLength(String)
String character(Number)
Integer charLength(String)
String lcase(String)
String ucase(String)
Integer length(String)
String rtrim(String)
String ltrim(String)
String space(Number)
}

View File

@ -0,0 +1,160 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.test.AbstractWireSerializingTestCase;
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
import java.io.IOException;
public class StringFunctionProcessorTests extends AbstractWireSerializingTestCase<StringProcessor> {
public static StringProcessor randomStringFunctionProcessor() {
return new StringProcessor(randomFrom(StringOperation.values()));
}
@Override
protected StringProcessor createTestInstance() {
return randomStringFunctionProcessor();
}
@Override
protected Reader<StringProcessor> instanceReader() {
return StringProcessor::new;
}
@Override
protected StringProcessor mutateInstance(StringProcessor instance) throws IOException {
return new StringProcessor(randomValueOtherThan(instance.processor(), () -> randomFrom(StringOperation.values())));
}
private void stringCharInputValidation(StringProcessor proc) {
SqlIllegalArgumentException siae = expectThrows(SqlIllegalArgumentException.class, () -> proc.process(123));
assertEquals("A string/char is required; received [123]", siae.getMessage());
}
private void numericInputValidation(StringProcessor proc) {
SqlIllegalArgumentException siae = expectThrows(SqlIllegalArgumentException.class, () -> proc.process("A"));
assertEquals("A number is required; received [A]", siae.getMessage());
}
public void testAscii() {
StringProcessor proc = new StringProcessor(StringOperation.ASCII);
assertNull(proc.process(null));
assertEquals(65, proc.process("A"));
// accepts chars as well
assertEquals(65, proc.process('A'));
assertEquals(65, proc.process("Alpha"));
// validate input
stringCharInputValidation(proc);
}
public void testChar() {
StringProcessor proc = new StringProcessor(StringOperation.CHAR);
assertNull(proc.process(null));
assertEquals("A", proc.process(65));
assertNull(proc.process(256));
assertNull(proc.process(-1));
// validate input
numericInputValidation(proc);
}
public void testLCase() {
StringProcessor proc = new StringProcessor(StringOperation.LCASE);
assertNull(proc.process(null));
assertEquals("fulluppercase", proc.process("FULLUPPERCASE"));
assertEquals("someuppercase", proc.process("SomeUpPerCasE"));
assertEquals("fulllowercase", proc.process("fulllowercase"));
assertEquals("a", proc.process('A'));
stringCharInputValidation(proc);
}
public void testUCase() {
StringProcessor proc = new StringProcessor(StringOperation.UCASE);
assertNull(proc.process(null));
assertEquals("FULLLOWERCASE", proc.process("fulllowercase"));
assertEquals("SOMELOWERCASE", proc.process("SomeLoweRCasE"));
assertEquals("FULLUPPERCASE", proc.process("FULLUPPERCASE"));
assertEquals("A", proc.process('a'));
stringCharInputValidation(proc);
}
public void testLength() {
StringProcessor proc = new StringProcessor(StringOperation.LENGTH);
assertNull(proc.process(null));
assertEquals(7, proc.process("foo bar"));
assertEquals(0, proc.process(""));
assertEquals(0, proc.process(" "));
assertEquals(7, proc.process("foo bar "));
assertEquals(10, proc.process(" foo bar "));
assertEquals(1, proc.process('f'));
stringCharInputValidation(proc);
}
public void testRTrim() {
StringProcessor proc = new StringProcessor(StringOperation.RTRIM);
assertNull(proc.process(null));
assertEquals("foo bar", proc.process("foo bar"));
assertEquals("", proc.process(""));
assertEquals("", proc.process(" "));
assertEquals("foo bar", proc.process("foo bar "));
assertEquals(" foo bar", proc.process(" foo bar "));
assertEquals("f", proc.process('f'));
stringCharInputValidation(proc);
}
public void testLTrim() {
StringProcessor proc = new StringProcessor(StringOperation.LTRIM);
assertNull(proc.process(null));
assertEquals("foo bar", proc.process("foo bar"));
assertEquals("", proc.process(""));
assertEquals("", proc.process(" "));
assertEquals("foo bar", proc.process(" foo bar"));
assertEquals("foo bar ", proc.process(" foo bar "));
assertEquals("f", proc.process('f'));
stringCharInputValidation(proc);
}
public void testSpace() {
StringProcessor proc = new StringProcessor(StringOperation.SPACE);
int count = 7;
assertNull(proc.process(null));
assertEquals(" ", proc.process(count));
assertEquals(count, ((String) proc.process(count)).length());
assertNotNull(proc.process(0));
assertEquals("", proc.process(0));
assertNull(proc.process(-1));
numericInputValidation(proc);
}
public void testBitLength() {
StringProcessor proc = new StringProcessor(StringOperation.BIT_LENGTH);
assertNull(proc.process(null));
assertEquals(56, proc.process("foo bar"));
assertEquals(0, proc.process(""));
assertEquals(8, proc.process('f'));
stringCharInputValidation(proc);
}
public void testCharLength() {
StringProcessor proc = new StringProcessor(StringOperation.CHAR_LENGTH);
assertNull(proc.process(null));
assertEquals(7, proc.process("foo bar"));
assertEquals(0, proc.process(""));
assertEquals(1, proc.process('f'));
assertEquals(1, proc.process('€'));
stringCharInputValidation(proc);
}
}

View File

@ -139,4 +139,14 @@ public class QueryTranslatorTests extends ESTestCase {
assertEquals("date", rq.field());
assertEquals(DateTime.parse("1969-05-13T12:34:56Z"), rq.lower());
}
public void testLikeConstructsNotSupported() {
LogicalPlan p = plan("SELECT LTRIM(keyword) lt FROM test WHERE LTRIM(keyword) LIKE '%a%'");
assertTrue(p instanceof Project);
p = ((Project) p).child();
assertTrue(p instanceof Filter);
Expression condition = ((Filter) p).condition();
SqlIllegalArgumentException ex = expectThrows(SqlIllegalArgumentException.class, () -> QueryTranslator.toQuery(condition, false));
assertEquals("Scalar function (LTRIM(keyword)) not allowed (yet) as arguments for LIKE", ex.getMessage());
}
}

View File

@ -48,6 +48,9 @@ public abstract class ShowTestCase extends CliIntegrationTestCase {
assertThat(readLine(), containsString("----------"));
assertThat(readLine(), RegexMatcher.matches("\\s*LOG\\s*\\|\\s*SCALAR\\s*"));
assertThat(readLine(), RegexMatcher.matches("\\s*LOG10\\s*\\|\\s*SCALAR\\s*"));
assertThat(readLine(), RegexMatcher.matches("\\s*LCASE\\s*\\|\\s*SCALAR\\s*"));
assertThat(readLine(), RegexMatcher.matches("\\s*LENGTH\\s*\\|\\s*SCALAR\\s*"));
assertThat(readLine(), RegexMatcher.matches("\\s*LTRIM\\s*\\|\\s*SCALAR\\s*"));
assertEquals("", readLine());
}

View File

@ -37,6 +37,7 @@ public abstract class CsvSpecTestCase extends SpecBaseIntegrationTestCase {
tests.addAll(readScriptSpec("/alias.csv-spec", parser));
tests.addAll(readScriptSpec("/nulls.csv-spec", parser));
tests.addAll(readScriptSpec("/nested.csv-spec", parser));
tests.addAll(readScriptSpec("/functions.csv-spec", parser));
return tests;
}

View File

@ -34,6 +34,7 @@ public abstract class SqlSpecTestCase extends SpecBaseIntegrationTestCase {
tests.addAll(readScriptSpec("/math.sql-spec", parser));
tests.addAll(readScriptSpec("/agg.sql-spec", parser));
tests.addAll(readScriptSpec("/arithmetic.sql-spec", parser));
tests.addAll(readScriptSpec("/string-functions.sql-spec", parser));
return tests;
}

View File

@ -69,6 +69,16 @@ SIN |SCALAR
SINH |SCALAR
SQRT |SCALAR
TAN |SCALAR
ASCII |SCALAR
CHAR |SCALAR
BIT_LENGTH |SCALAR
CHAR_LENGTH |SCALAR
LCASE |SCALAR
LENGTH |SCALAR
LTRIM |SCALAR
RTRIM |SCALAR
SPACE |SCALAR
UCASE |SCALAR
SCORE |SCORE
;
@ -90,6 +100,7 @@ ACOS |SCALAR
ASIN |SCALAR
ATAN |SCALAR
ATAN2 |SCALAR
ASCII |SCALAR
;
showFunctionsWithPatternChar

View File

@ -222,6 +222,16 @@ SIN |SCALAR
SINH |SCALAR
SQRT |SCALAR
TAN |SCALAR
ASCII |SCALAR
CHAR |SCALAR
BIT_LENGTH |SCALAR
CHAR_LENGTH |SCALAR
LCASE |SCALAR
LENGTH |SCALAR
LTRIM |SCALAR
RTRIM |SCALAR
SPACE |SCALAR
UCASE |SCALAR
SCORE |SCORE
// end::showFunctions
@ -250,6 +260,7 @@ ACOS |SCALAR
ASIN |SCALAR
ATAN |SCALAR
ATAN2 |SCALAR
ASCII |SCALAR
// end::showFunctionsLikeWildcard
;

View File

@ -0,0 +1,30 @@
bitLengthGroupByAndOrderBy
SELECT BIT_LENGTH(first_name), COUNT(*) count FROM "test_emp" GROUP BY BIT_LENGTH(first_name) ORDER BY BIT_LENGTH(first_name) LIMIT 10;
BIT_LENGTH(first_name):i| count:l
24 |4
32 |11
40 |16
48 |24
56 |19
64 |14
72 |10
80 |1
88 |1
;
bitLengthOrderByFieldWithWhere
SELECT BIT_LENGTH(first_name) len, first_name FROM "test_emp" WHERE BIT_LENGTH(first_name) > 64 ORDER BY first_name LIMIT 10;
len:i | first_name:s
80 |Adamantios
72 |Alejandro
72 |Alejandro
72 |Chirstian
72 |Cristinel
72 |Duangkaew
72 |Eberhardt
72 |Margareta
72 |Prasadram
88 |Sreekrishna
;

View File

@ -0,0 +1,76 @@
stringAscii
SELECT ASCII(first_name) s FROM "test_emp" WHERE emp_no < 10010 ORDER BY emp_no;
stringChar
SELECT CHAR(emp_no % 10000) m, first_name FROM "test_emp" WHERE emp_no < 10010 ORDER BY emp_no;
stringAsciiFilter
SELECT emp_no, ASCII(first_name) a FROM "test_emp" WHERE ASCII(first_name) < 10010 ORDER BY emp_no;
stringAsciiEqualsConstant
SELECT emp_no, ASCII(first_name) a, first_name name FROM "test_emp" WHERE ASCII(first_name) = 65 ORDER BY emp_no;
//https://github.com/elastic/elasticsearch/issues/31863
//stringSelectConstantAsciiEqualsConstant
//SELECT ASCII('A') = 65 a FROM "test_emp" WHERE ASCII('A') = 65 ORDER BY emp_no;
stringCharFilter
SELECT emp_no, CHAR(emp_no % 10000) m FROM "test_emp" WHERE CHAR(emp_no % 10000) = 'A';
lcaseFilter
SELECT LCASE(first_name) lc, CHAR(ASCII(LCASE(first_name))) chr FROM "test_emp" WHERE CHAR(ASCII(LCASE(first_name))) = 'a';
ltrimFilter
SELECT LTRIM(first_name) lt FROM "test_emp" WHERE LTRIM(first_name) = 'Bob';
//Unsupported yet
//ltrimFilterWithLike
//SELECT LTRIM("first_name") lt FROM "test_emp" WHERE LTRIM("first_name") LIKE '%a%';
rtrimFilter
SELECT RTRIM(first_name) rt FROM "test_emp" WHERE RTRIM(first_name) = 'Johnny';
spaceFilter
SELECT SPACE(languages) spaces, languages FROM "test_emp" WHERE SPACE(languages) = ' ';
spaceFilterWithLengthFunctions
SELECT SPACE(languages) spaces, languages, first_name FROM "test_emp" WHERE CHAR_LENGTH(SPACE(languages)) = 3 ORDER BY first_name;
ucaseFilter
SELECT UCASE(gender) uppercased, COUNT(*) count FROM "test_emp" WHERE UCASE(gender) = 'F' GROUP BY UCASE(gender);
//
// Group and order by
//
asciiGroupByAndOrderBy
SELECT ASCII(first_name) A, COUNT(*) count FROM "test_emp" WHERE ASCII(first_name) < 75 GROUP BY ASCII(first_name) ORDER BY ASCII(first_name) DESC;
charGroupByAndOrderBy
SELECT CHAR(emp_no % 10000) C FROM "test_emp" WHERE emp_no > 10010 GROUP BY CHAR(emp_no % 10000) ORDER BY CHAR(emp_no % 10000) DESC LIMIT 20;
//this would fail because H2 returns the result of char_length as Long, while we use a DataType of type String (size Integer.MAX_VALUE) and we return an Integer
//CAST is used as an "workaround"
charLengthGroupByAndHavingAndOrderBy
SELECT CAST(CHAR_LENGTH("first_name") AS INT) cl, COUNT(*) count FROM "test_emp" GROUP BY "first_name" HAVING COUNT(*)>1 ORDER BY CHAR_LENGTH("first_name") ;
//this one, without ORDER BY, would return different results than H2. In ES, the default ordering of the composite aggregation
//values is "asc" while in H2 there is no default ordering
lcaseGroupByAndOrderBy
SELECT LCASE(first_name) lc, CHAR(ASCII(LCASE(first_name))) chr FROM "test_emp" GROUP BY LCASE(first_name) ORDER BY LCASE(first_name);
ucaseGroupByAndOrderBy
SELECT UCASE(gender) uc, COUNT(*) count FROM "test_emp" GROUP BY UCASE(gender) ORDER BY UCASE(gender) DESC;
rtrimGroupByAndOrderBy
SELECT RTRIM(first_name) rt FROM "test_emp" GROUP BY RTRIM(first_name) HAVING COUNT(*)>1;
ltrimGroupByAndOrderBy
SELECT LTRIM(first_name) lt FROM "test_emp" GROUP BY LTRIM(first_name) HAVING COUNT(*)>1;
spaceGroupByWithCharLength
SELECT CAST(CHAR_LENGTH(SPACE(languages)) AS INT) cls FROM "test_emp" GROUP BY CHAR_LENGTH(SPACE(languages));
spaceGroupByAndOrderBy
SELECT SPACE("languages") s, COUNT(*) count FROM "test_emp" GROUP BY SPACE("languages") ORDER BY SPACE(languages);
spaceGroupByAndOrderByWithCharLength
SELECT SPACE("languages") s, COUNT(*) count, CAST(CHAR_LENGTH(SPACE("languages")) AS INT) cls FROM "test_emp" WHERE "languages" IS NOT NULL GROUP BY SPACE("languages") ORDER BY SPACE("languages");