From 3ccfc3de58f87249bd811432ec60f78dfd70d371 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 4 Oct 2018 16:03:57 +0200 Subject: [PATCH] SCRIPTING: Terms set query expression (#33856) * SCRIPTING: Add Expr. Compile for TermSetQuery Ctx. * Follow up to #33602 adding the ability to compile TermsSetQuery scripts with the expressions engine in the same way we support SearchScript in Expressions * Duplicated the code here for now to make the change less complex, the only difference to SearchScript is that `_score` and `_value` are not handled for TermsSetQuery * remove redundant check --- .../expression/ExpressionScriptEngine.java | 225 ++++++++++-------- .../ExpressionTermSetQueryScript.java | 76 ++++++ .../ExpressionTermsSetQueryTests.java | 105 ++++++++ .../script/TermsSetQueryScript.java | 10 +- 4 files changed, 321 insertions(+), 95 deletions(-) create mode 100644 modules/lang-expression/src/main/java/org/elasticsearch/script/expression/ExpressionTermSetQueryScript.java create mode 100644 modules/lang-expression/src/test/java/org/elasticsearch/script/expression/ExpressionTermsSetQueryTests.java diff --git a/modules/lang-expression/src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngine.java b/modules/lang-expression/src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngine.java index 9d305a5f2d9..7330bd6ea5d 100644 --- a/modules/lang-expression/src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngine.java +++ b/modules/lang-expression/src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngine.java @@ -37,7 +37,6 @@ import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.GeoPointFieldMapper.GeoPointFieldType; import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.script.BucketAggregationScript; import org.elasticsearch.script.BucketAggregationSelectorScript; import org.elasticsearch.script.ClassPermission; @@ -47,6 +46,7 @@ import org.elasticsearch.script.ScriptContext; import org.elasticsearch.script.ScriptEngine; import org.elasticsearch.script.ScriptException; import org.elasticsearch.script.SearchScript; +import org.elasticsearch.script.TermsSetQueryScript; import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; @@ -128,6 +128,9 @@ public class ExpressionScriptEngine extends AbstractComponent implements ScriptE } else if (context.instanceClazz.equals(ScoreScript.class)) { ScoreScript.Factory factory = (p, lookup) -> newScoreScript(expr, lookup, p); return context.factoryClazz.cast(factory); + } else if (context.instanceClazz.equals(TermsSetQueryScript.class)) { + TermsSetQueryScript.Factory factory = (p, lookup) -> newTermsSetQueryScript(expr, lookup, p); + return context.factoryClazz.cast(factory); } throw new IllegalArgumentException("expression engine does not know how to handle script context [" + context.name + "]"); } @@ -164,7 +167,6 @@ public class ExpressionScriptEngine extends AbstractComponent implements ScriptE } private SearchScript.LeafFactory newSearchScript(Expression expr, SearchLookup lookup, @Nullable Map vars) { - MapperService mapper = lookup.doc().mapperService(); // NOTE: if we need to do anything complicated with bindings in the future, we can just extend Bindings, // instead of complicating SimpleBindings (which should stay simple) SimpleBindings bindings = new SimpleBindings(); @@ -183,100 +185,11 @@ public class ExpressionScriptEngine extends AbstractComponent implements ScriptE // way to know this is for aggregations and so _value is ok to have... } else if (vars != null && vars.containsKey(variable)) { - // TODO: document and/or error if vars contains _score? - // NOTE: by checking for the variable in vars first, it allows masking document fields with a global constant, - // but if we were to reverse it, we could provide a way to supply dynamic defaults for documents missing the field? - Object value = vars.get(variable); - if (value instanceof Number) { - bindings.add(variable, new DoubleConstValueSource(((Number) value).doubleValue()).asDoubleValuesSource()); - } else { - throw new ParseException("Parameter [" + variable + "] must be a numeric type", 0); - } - + bindFromParams(vars, bindings, variable); } else { - String fieldname = null; - String methodname = null; - String variablename = "value"; // .value is the default for doc['field'], its optional. - boolean dateAccessor = false; // true if the variable is of type doc['field'].date.xxx - VariableContext[] parts = VariableContext.parse(variable); - if (parts[0].text.equals("doc") == false) { - throw new ParseException("Unknown variable [" + parts[0].text + "]", 0); - } - if (parts.length < 2 || parts[1].type != VariableContext.Type.STR_INDEX) { - throw new ParseException("Variable 'doc' must be used with a specific field like: doc['myfield']", 3); - } else { - fieldname = parts[1].text; - } - if (parts.length == 3) { - if (parts[2].type == VariableContext.Type.METHOD) { - methodname = parts[2].text; - } else if (parts[2].type == VariableContext.Type.MEMBER) { - variablename = parts[2].text; - } else { - throw new IllegalArgumentException("Only member variables or member methods may be accessed on a field when not accessing the field directly"); - } - } - if (parts.length > 3) { - // access to the .date "object" within the field - if (parts.length == 4 && ("date".equals(parts[2].text) || "getDate".equals(parts[2].text))) { - if (parts[3].type == VariableContext.Type.METHOD) { - methodname = parts[3].text; - dateAccessor = true; - } else if (parts[3].type == VariableContext.Type.MEMBER) { - variablename = parts[3].text; - dateAccessor = true; - } - } - if (!dateAccessor) { - throw new IllegalArgumentException("Variable [" + variable + "] does not follow an allowed format of either doc['field'] or doc['field'].method()"); - } - } - - MappedFieldType fieldType = mapper.fullName(fieldname); - - if (fieldType == null) { - throw new ParseException("Field [" + fieldname + "] does not exist in mappings", 5); - } - - IndexFieldData fieldData = lookup.doc().getForField(fieldType); - // delegate valuesource creation based on field's type // there are three types of "fields" to expressions, and each one has a different "api" of variables and methods. - - final ValueSource valueSource; - if (fieldType instanceof GeoPointFieldType) { - // geo - if (methodname == null) { - valueSource = GeoField.getVariable(fieldData, fieldname, variablename); - } else { - valueSource = GeoField.getMethod(fieldData, fieldname, methodname); - } - } else if (fieldType instanceof DateFieldMapper.DateFieldType) { - if (dateAccessor) { - // date object - if (methodname == null) { - valueSource = DateObject.getVariable(fieldData, fieldname, variablename); - } else { - valueSource = DateObject.getMethod(fieldData, fieldname, methodname); - } - } else { - // date field itself - if (methodname == null) { - valueSource = DateField.getVariable(fieldData, fieldname, variablename); - } else { - valueSource = DateField.getMethod(fieldData, fieldname, methodname); - } - } - } else if (fieldData instanceof IndexNumericFieldData) { - // number - if (methodname == null) { - valueSource = NumericField.getVariable(fieldData, fieldname, variablename); - } else { - valueSource = NumericField.getMethod(fieldData, fieldname, methodname); - } - } else { - throw new ParseException("Field [" + fieldname + "] must be numeric, date, or geopoint", 5); - } + final ValueSource valueSource = getDocValueSource(variable, lookup); needsScores |= valueSource.getSortField(false).needsScores(); bindings.add(variable, valueSource.asDoubleValuesSource()); } @@ -288,6 +201,30 @@ public class ExpressionScriptEngine extends AbstractComponent implements ScriptE return new ExpressionSearchScript(expr, bindings, specialValue, needsScores); } + private TermsSetQueryScript.LeafFactory newTermsSetQueryScript(Expression expr, SearchLookup lookup, + @Nullable Map vars) { + // NOTE: if we need to do anything complicated with bindings in the future, we can just extend Bindings, + // instead of complicating SimpleBindings (which should stay simple) + SimpleBindings bindings = new SimpleBindings(); + for (String variable : expr.variables) { + try { + if (vars != null && vars.containsKey(variable)) { + bindFromParams(vars, bindings, variable); + } else { + // delegate valuesource creation based on field's type + // there are three types of "fields" to expressions, and each one has a different "api" of variables and methods. + final ValueSource valueSource = getDocValueSource(variable, lookup); + bindings.add(variable, valueSource.asDoubleValuesSource()); + } + } catch (Exception e) { + // we defer "binding" of variables until here: give context for that variable + throw convertToScriptException("link error", expr.sourceText, variable, e); + } + } + ReplaceableConstDoubleValueSource specialValue = null; + return new ExpressionTermSetQueryScript(expr, bindings, specialValue); + } + /** * This is a hack for filter scripts, which must return booleans instead of doubles as expression do. * See https://github.com/elastic/elasticsearch/issues/26429. @@ -362,4 +299,106 @@ public class ExpressionScriptEngine extends AbstractComponent implements ScriptE stack.add(pointer.toString()); throw new ScriptException(message, cause, stack, source, NAME); } + + private static ValueSource getDocValueSource(String variable, SearchLookup lookup) throws ParseException { + VariableContext[] parts = VariableContext.parse(variable); + if (parts[0].text.equals("doc") == false) { + throw new ParseException("Unknown variable [" + parts[0].text + "]", 0); + } + if (parts.length < 2 || parts[1].type != VariableContext.Type.STR_INDEX) { + throw new ParseException("Variable 'doc' must be used with a specific field like: doc['myfield']", 3); + } + + // .value is the default for doc['field'], its optional. + String variablename = "value"; + String methodname = null; + if (parts.length == 3) { + if (parts[2].type == VariableContext.Type.METHOD) { + methodname = parts[2].text; + } else if (parts[2].type == VariableContext.Type.MEMBER) { + variablename = parts[2].text; + } else { + throw new IllegalArgumentException( + "Only member variables or member methods may be accessed on a field when not accessing the field directly" + ); + } + } + // true if the variable is of type doc['field'].date.xxx + boolean dateAccessor = false; + if (parts.length > 3) { + // access to the .date "object" within the field + if (parts.length == 4 && ("date".equals(parts[2].text) || "getDate".equals(parts[2].text))) { + if (parts[3].type == VariableContext.Type.METHOD) { + methodname = parts[3].text; + dateAccessor = true; + } else if (parts[3].type == VariableContext.Type.MEMBER) { + variablename = parts[3].text; + dateAccessor = true; + } + } + if (!dateAccessor) { + throw new IllegalArgumentException( + "Variable [" + variable + "] does not follow an allowed format of either doc['field'] or doc['field'].method()" + ); + } + } + + String fieldname = parts[1].text; + MappedFieldType fieldType = lookup.doc().mapperService().fullName(fieldname); + + if (fieldType == null) { + throw new ParseException("Field [" + fieldname + "] does not exist in mappings", 5); + } + + IndexFieldData fieldData = lookup.doc().getForField(fieldType); + final ValueSource valueSource; + if (fieldType instanceof GeoPointFieldType) { + // geo + if (methodname == null) { + valueSource = GeoField.getVariable(fieldData, fieldname, variablename); + } else { + valueSource = GeoField.getMethod(fieldData, fieldname, methodname); + } + } else if (fieldType instanceof DateFieldMapper.DateFieldType) { + if (dateAccessor) { + // date object + if (methodname == null) { + valueSource = DateObject.getVariable(fieldData, fieldname, variablename); + } else { + valueSource = DateObject.getMethod(fieldData, fieldname, methodname); + } + } else { + // date field itself + if (methodname == null) { + valueSource = DateField.getVariable(fieldData, fieldname, variablename); + } else { + valueSource = DateField.getMethod(fieldData, fieldname, methodname); + } + } + } else if (fieldData instanceof IndexNumericFieldData) { + // number + if (methodname == null) { + valueSource = NumericField.getVariable(fieldData, fieldname, variablename); + } else { + valueSource = NumericField.getMethod(fieldData, fieldname, methodname); + } + } else { + throw new ParseException("Field [" + fieldname + "] must be numeric, date, or geopoint", 5); + } + return valueSource; + } + + // TODO: document and/or error if params contains _score? + // NOTE: by checking for the variable in params first, it allows masking document fields with a global constant, + // but if we were to reverse it, we could provide a way to supply dynamic defaults for documents missing the field? + private static void bindFromParams(@Nullable final Map params, final SimpleBindings bindings, final String variable) throws ParseException { + // NOTE: by checking for the variable in vars first, it allows masking document fields with a global constant, + // but if we were to reverse it, we could provide a way to supply dynamic defaults for documents missing the field? + Object value = params.get(variable); + if (value instanceof Number) { + bindings.add(variable, new DoubleConstValueSource(((Number) value).doubleValue()).asDoubleValuesSource()); + } else { + throw new ParseException("Parameter [" + variable + "] must be a numeric type", 0); + } + } } diff --git a/modules/lang-expression/src/main/java/org/elasticsearch/script/expression/ExpressionTermSetQueryScript.java b/modules/lang-expression/src/main/java/org/elasticsearch/script/expression/ExpressionTermSetQueryScript.java new file mode 100644 index 00000000000..d2fd77713ea --- /dev/null +++ b/modules/lang-expression/src/main/java/org/elasticsearch/script/expression/ExpressionTermSetQueryScript.java @@ -0,0 +1,76 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.script.expression; + +import java.io.IOException; +import org.apache.lucene.expressions.Bindings; +import org.apache.lucene.expressions.Expression; +import org.apache.lucene.expressions.SimpleBindings; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DoubleValues; +import org.apache.lucene.search.DoubleValuesSource; +import org.elasticsearch.script.GeneralScriptException; +import org.elasticsearch.script.TermsSetQueryScript; + +/** + * A bridge to evaluate an {@link Expression} against {@link Bindings} in the context + * of a {@link TermsSetQueryScript}. + */ +class ExpressionTermSetQueryScript implements TermsSetQueryScript.LeafFactory { + + final Expression exprScript; + final SimpleBindings bindings; + final DoubleValuesSource source; + final ReplaceableConstDoubleValueSource specialValue; // _value + + ExpressionTermSetQueryScript(Expression e, SimpleBindings b, ReplaceableConstDoubleValueSource v) { + exprScript = e; + bindings = b; + source = exprScript.getDoubleValuesSource(bindings); + specialValue = v; + } + + @Override + public TermsSetQueryScript newInstance(final LeafReaderContext leaf) throws IOException { + return new TermsSetQueryScript() { + // Fake the scorer until setScorer is called. + DoubleValues values = source.getValues(leaf, null); + + @Override + public Number execute() { + try { + return values.doubleValue(); + } catch (Exception exception) { + throw new GeneralScriptException("Error evaluating " + exprScript, exception); + } + } + + @Override + public void setDocument(int d) { + try { + values.advanceExact(d); + } catch (IOException e) { + throw new IllegalStateException("Can't advance to doc using " + exprScript, e); + } + } + }; + } + +} diff --git a/modules/lang-expression/src/test/java/org/elasticsearch/script/expression/ExpressionTermsSetQueryTests.java b/modules/lang-expression/src/test/java/org/elasticsearch/script/expression/ExpressionTermsSetQueryTests.java new file mode 100644 index 00000000000..c7eae2446a6 --- /dev/null +++ b/modules/lang-expression/src/test/java/org/elasticsearch/script/expression/ExpressionTermsSetQueryTests.java @@ -0,0 +1,105 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.script.expression; + +import java.io.IOException; +import java.text.ParseException; +import java.util.Collections; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.fielddata.AtomicNumericFieldData; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.NumberFieldMapper.NumberFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; +import org.elasticsearch.script.ScriptException; +import org.elasticsearch.script.TermsSetQueryScript; +import org.elasticsearch.search.lookup.SearchLookup; +import org.elasticsearch.test.ESTestCase; + +import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyObject; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class ExpressionTermsSetQueryTests extends ESTestCase { + private ExpressionScriptEngine service; + private SearchLookup lookup; + + @Override + public void setUp() throws Exception { + super.setUp(); + + NumberFieldType fieldType = new NumberFieldType(NumberType.DOUBLE); + MapperService mapperService = mock(MapperService.class); + when(mapperService.fullName("field")).thenReturn(fieldType); + when(mapperService.fullName("alias")).thenReturn(fieldType); + + SortedNumericDoubleValues doubleValues = mock(SortedNumericDoubleValues.class); + when(doubleValues.advanceExact(anyInt())).thenReturn(true); + when(doubleValues.nextValue()).thenReturn(2.718); + + AtomicNumericFieldData atomicFieldData = mock(AtomicNumericFieldData.class); + when(atomicFieldData.getDoubleValues()).thenReturn(doubleValues); + + IndexNumericFieldData fieldData = mock(IndexNumericFieldData.class); + when(fieldData.getFieldName()).thenReturn("field"); + when(fieldData.load(anyObject())).thenReturn(atomicFieldData); + + service = new ExpressionScriptEngine(Settings.EMPTY); + lookup = new SearchLookup(mapperService, ignored -> fieldData, null); + } + + private TermsSetQueryScript.LeafFactory compile(String expression) { + TermsSetQueryScript.Factory factory = + service.compile(null, expression, TermsSetQueryScript.CONTEXT, Collections.emptyMap()); + return factory.newFactory(Collections.emptyMap(), lookup); + } + + public void testCompileError() { + ScriptException e = expectThrows(ScriptException.class, () -> { + compile("doc['field'].value * *@#)(@$*@#$ + 4"); + }); + assertTrue(e.getCause() instanceof ParseException); + } + + public void testLinkError() { + ScriptException e = expectThrows(ScriptException.class, () -> { + compile("doc['nonexistent'].value * 5"); + }); + assertTrue(e.getCause() instanceof ParseException); + } + + public void testFieldAccess() throws IOException { + TermsSetQueryScript script = compile("doc['field'].value").newInstance(null); + script.setDocument(1); + + double result = script.execute().doubleValue(); + assertEquals(2.718, result, 0.0); + } + + public void testFieldAccessWithFieldAlias() throws IOException { + TermsSetQueryScript script = compile("doc['alias'].value").newInstance(null); + script.setDocument(1); + + double result = script.execute().doubleValue(); + assertEquals(2.718, result, 0.0); + } +} diff --git a/server/src/main/java/org/elasticsearch/script/TermsSetQueryScript.java b/server/src/main/java/org/elasticsearch/script/TermsSetQueryScript.java index 085f40e0d7a..122e3defe75 100644 --- a/server/src/main/java/org/elasticsearch/script/TermsSetQueryScript.java +++ b/server/src/main/java/org/elasticsearch/script/TermsSetQueryScript.java @@ -61,15 +61,21 @@ public abstract class TermsSetQueryScript { private final LeafSearchLookup leafLookup; public TermsSetQueryScript(Map params, SearchLookup lookup, LeafReaderContext leafContext) { - this.params = new ParameterMap(params, DEPRECATIONS); + Map parameters = new HashMap<>(params); this.leafLookup = lookup.getLeafSearchLookup(leafContext); + parameters.putAll(leafLookup.asMap()); + this.params = new ParameterMap(parameters, DEPRECATIONS); + } + + protected TermsSetQueryScript() { + params = null; + leafLookup = null; } /** * Return the parameters for this script. */ public Map getParams() { - this.params.putAll(leafLookup.asMap()); return params; }