Handle missing values in painless (#30975)

* Handle missing values in painless

Throw an exception for `doc['field'].value`
if this document is missing a value for the `field`.

For 7.0:
This is the default behaviour from 7.0

For 6.x:
To enable this behavior from 6.x, a user can set a jvm.option:
 `-Des.script.exception_for_missing_value=true` on a node.
If a user does not enable this behavior, a deprecation warning is logged on start up.

Closes #29286
This commit is contained in:
Mayya Sharipova 2018-07-09 11:59:49 -04:00 committed by GitHub
parent 584fa261cc
commit 5481fbc249
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 303 additions and 17 deletions

View File

@ -691,6 +691,7 @@ class BuildPlugin implements Plugin<Project> {
systemProperty 'tests.task', path
systemProperty 'tests.security.manager', 'true'
systemProperty 'jna.nosys', 'true'
systemProperty 'es.scripting.exception_for_missing_value', 'true'
// TODO: remove setting logging level via system property
systemProperty 'tests.logger.level', 'WARN'
for (Map.Entry<String, String> property : System.properties.entrySet()) {

View File

@ -119,6 +119,30 @@ GET hockey/_search
----------------------------------------------------------------
// CONSOLE
[float]
===== Missing values
If you request the value from a field `field` that isnt in
the document, `doc['field'].value` for this document returns:
- `0` if a `field` has a numeric datatype (long, double etc.)
- `false` is a `field` has a boolean datatype
- epoch date if a `field` has a date datatype
- `null` if a `field` has a string datatype
- `null` if a `field` has a geo datatype
- `""` if a `field` has a binary datatype
IMPORTANT: Starting in 7.0, `doc['field'].value` throws an exception if
the field is missing in a document. To enable this behavior now,
set a <<jvm-options,`jvm.option`>>
`-Des.scripting.exception_for_missing_value=true` on a node. If you do not enable
this behavior, a deprecation warning is logged on start up.
To check if a document is missing a value, you can call
`doc['field'].size() == 0`.
[float]
==== Updating Fields with Painless

View File

@ -17,7 +17,7 @@
* under the License.
*/
import org.apache.tools.ant.types.Path
esplugin {
description 'An easy, safe and fast scripting language for Elasticsearch'

View File

@ -88,14 +88,13 @@ setup:
---
"Scripted Field with a null safe dereference (null)":
# Change this to ?: once we have it implemented
- do:
search:
body:
script_fields:
bar:
script:
source: "(doc['missing'].value?.length() ?: 0) + params.x;"
source: "(doc['missing'].size() == 0 ? 0 : doc['missing'].value.length()) + params.x;"
params:
x: 5

View File

@ -329,7 +329,7 @@ if (isEclipse == false || project.path == ":server-tests") {
task integTest(type: RandomizedTestingTask,
group: JavaBasePlugin.VERIFICATION_GROUP,
description: 'Multi-node tests',
dependsOn: test.dependsOn) {
dependsOn: test.dependsOn.collect()) {
configure(BuildPlugin.commonTestConfig(project))
classpath = project.test.classpath
testClassesDirs = project.test.testClassesDirs
@ -338,3 +338,13 @@ if (isEclipse == false || project.path == ":server-tests") {
check.dependsOn integTest
integTest.mustRunAfter test
}
// TODO: remove ScriptDocValuesMissingV6BehaviourTests in 7.0
additionalTest('testScriptDocValuesMissingV6Behaviour'){
include '**/ScriptDocValuesMissingV6BehaviourTests.class'
systemProperty 'es.scripting.exception_for_missing_value', 'false'
}
test {
// these are tested explicitly in separate test tasks
exclude '**/*ScriptDocValuesMissingV6BehaviourTests.class'
}

View File

@ -29,6 +29,7 @@ import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoUtils;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.script.ScriptModule;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.MutableDateTime;
@ -125,6 +126,10 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
public long getValue() {
if (count == 0) {
if (ScriptModule.EXCEPTION_FOR_MISSING_VALUE) {
throw new IllegalStateException("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!");
}
return 0L;
}
return values[0];
@ -167,6 +172,10 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
*/
public ReadableDateTime getValue() {
if (count == 0) {
if (ScriptModule.EXCEPTION_FOR_MISSING_VALUE) {
throw new IllegalStateException("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!");
}
return EPOCH;
}
return get(0);
@ -268,6 +277,10 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
public double getValue() {
if (count == 0) {
if (ScriptModule.EXCEPTION_FOR_MISSING_VALUE) {
throw new IllegalStateException("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!");
}
return 0d;
}
return values[0];
@ -324,6 +337,10 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
public GeoPoint getValue() {
if (count == 0) {
if (ScriptModule.EXCEPTION_FOR_MISSING_VALUE) {
throw new IllegalStateException("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!");
}
return null;
}
return values[0];
@ -436,7 +453,14 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
}
public boolean getValue() {
return count != 0 && values[0];
if (count == 0) {
if (ScriptModule.EXCEPTION_FOR_MISSING_VALUE) {
throw new IllegalStateException("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!");
}
return false;
}
return values[0];
}
@Override
@ -519,7 +543,14 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
}
public String getValue() {
return count == 0 ? null : get(0);
if (count == 0) {
if (ScriptModule.EXCEPTION_FOR_MISSING_VALUE) {
throw new IllegalStateException("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!");
}
return null;
}
return get(0);
}
}
@ -540,7 +571,14 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
}
public BytesRef getValue() {
return count == 0 ? new BytesRef() : get(0);
if (count == 0) {
if (ScriptModule.EXCEPTION_FOR_MISSING_VALUE) {
throw new IllegalStateException("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!");
}
return new BytesRef();
}
return get(0);
}
}

View File

@ -31,6 +31,9 @@ import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.plugins.ScriptPlugin;
import org.elasticsearch.search.aggregations.pipeline.movfn.MovingFunctionScript;
import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
/**
* Manages building {@link ScriptService}.
@ -61,6 +64,11 @@ public class ScriptModule {
).collect(Collectors.toMap(c -> c.name, Function.identity()));
}
public static final boolean EXCEPTION_FOR_MISSING_VALUE =
Booleans.parseBoolean(System.getProperty("es.scripting.exception_for_missing_value", "false"));
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(ScriptModule.class));
private final ScriptService scriptService;
public ScriptModule(Settings settings, List<ScriptPlugin> scriptPlugins) {
@ -84,6 +92,10 @@ public class ScriptModule {
}
}
}
if (EXCEPTION_FOR_MISSING_VALUE == false)
DEPRECATION_LOGGER.deprecated("Script: returning default values for missing document values is deprecated. " +
"Set system property '-Des.scripting.exception_for_missing_value=true' " +
"to make behaviour compatible with future major versions.");
scriptService = new ScriptService(settings, Collections.unmodifiableMap(engines), Collections.unmodifiableMap(contexts));
}

View File

@ -44,7 +44,13 @@ public class ScriptDocValuesDatesTests extends ESTestCase {
for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
dates.setNextDocId(d);
assertEquals(expectedDates[d].length > 0 ? expectedDates[d][0] : new DateTime(0, DateTimeZone.UTC), dates.getValue());
if (expectedDates[d].length > 0) {
assertEquals(expectedDates[d][0] , dates.getValue());
} else {
Exception e = expectThrows(IllegalStateException.class, () -> dates.getValue());
assertEquals("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!", e.getMessage());
}
assertEquals(values[d].length, dates.size());
for (int i = 0; i < values[d].length; i++) {

View File

@ -24,7 +24,6 @@ import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
public class ScriptDocValuesLongsTests extends ESTestCase {
public void testLongs() throws IOException {
long[][] values = new long[between(3, 10)][];
@ -39,8 +38,13 @@ public class ScriptDocValuesLongsTests extends ESTestCase {
for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
longs.setNextDocId(d);
assertEquals(values[d].length > 0 ? values[d][0] : 0, longs.getValue());
if (values[d].length > 0) {
assertEquals(values[d][0], longs.getValue());
} else {
Exception e = expectThrows(IllegalStateException.class, () -> longs.getValue());
assertEquals("A document doesn't have a value for a field! " +
"Use doc[<field>].size()==0 to check if a document is missing a field!", e.getMessage());
}
assertEquals(values[d].length, longs.size());
assertEquals(values[d].length, longs.getValues().size());
for (int i = 0; i < values[d].length; i++) {

View File

@ -0,0 +1,195 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.fielddata.ScriptDocValues.Longs;
import org.elasticsearch.index.fielddata.ScriptDocValues.Dates;
import org.elasticsearch.index.fielddata.ScriptDocValues.Booleans;
import org.elasticsearch.plugins.ScriptPlugin;
import org.elasticsearch.script.MockScriptEngine;
import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptEngine;
import org.elasticsearch.script.ScriptModule;
import org.elasticsearch.test.ESTestCase;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.ReadableDateTime;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import static java.util.Collections.singletonList;
public class ScriptDocValuesMissingV6BehaviourTests extends ESTestCase {
public void testScriptMissingValuesWarning(){
new ScriptModule(Settings.EMPTY, singletonList(new ScriptPlugin() {
@Override
public ScriptEngine getScriptEngine(Settings settings, Collection<ScriptContext<?>> contexts) {
return new MockScriptEngine(MockScriptEngine.NAME, Collections.singletonMap("1", script -> "1"));
}
}));
assertWarnings("Script: returning default values for missing document values is deprecated. " +
"Set system property '-Des.scripting.exception_for_missing_value=true' " +
"to make behaviour compatible with future major versions.");
}
public void testZeroForMissingValueLong() throws IOException {
long[][] values = new long[between(3, 10)][];
for (int d = 0; d < values.length; d++) {
values[d] = new long[0];
}
Longs longs = wrap(values);
for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
longs.setNextDocId(d);
assertEquals(0, longs.getValue());
}
}
public void testEpochForMissingValueDate() throws IOException {
final ReadableDateTime EPOCH = new DateTime(0, DateTimeZone.UTC);
long[][] values = new long[between(3, 10)][];
for (int d = 0; d < values.length; d++) {
values[d] = new long[0];
}
Dates dates = wrapDates(values);
for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
dates.setNextDocId(d);
assertEquals(EPOCH, dates.getValue());
}
}
public void testFalseForMissingValueBoolean() throws IOException {
long[][] values = new long[between(3, 10)][];
for (int d = 0; d < values.length; d++) {
values[d] = new long[0];
}
Booleans bools = wrapBooleans(values);
for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
bools.setNextDocId(d);
assertEquals(false, bools.getValue());
}
}
public void testNullForMissingValueGeo() throws IOException{
final MultiGeoPointValues values = wrap(new GeoPoint[0]);
final ScriptDocValues.GeoPoints script = new ScriptDocValues.GeoPoints(values);
script.setNextDocId(0);
assertEquals(null, script.getValue());
}
private Longs wrap(long[][] values) {
return new Longs(new AbstractSortedNumericDocValues() {
long[] current;
int i;
@Override
public boolean advanceExact(int doc) {
i = 0;
current = values[doc];
return current.length > 0;
}
@Override
public int docValueCount() {
return current.length;
}
@Override
public long nextValue() {
return current[i++];
}
});
}
private Booleans wrapBooleans(long[][] values) {
return new Booleans(new AbstractSortedNumericDocValues() {
long[] current;
int i;
@Override
public boolean advanceExact(int doc) {
i = 0;
current = values[doc];
return current.length > 0;
}
@Override
public int docValueCount() {
return current.length;
}
@Override
public long nextValue() {
return current[i++];
}
});
}
private Dates wrapDates(long[][] values) {
return new Dates(new AbstractSortedNumericDocValues() {
long[] current;
int i;
@Override
public boolean advanceExact(int doc) {
current = values[doc];
i = 0;
return current.length > 0;
}
@Override
public int docValueCount() {
return current.length;
}
@Override
public long nextValue() {
return current[i++];
}
});
}
private static MultiGeoPointValues wrap(final GeoPoint... points) {
return new MultiGeoPointValues() {
int docID = -1;
int i;
@Override
public GeoPoint nextValue() {
if (docID != 0) {
fail();
}
return points[i++];
}
@Override
public boolean advanceExact(int docId) {
docID = docId;
return points.length > 0;
}
@Override
public int docValueCount() {
if (docID != 0) {
return 0;
}
return points.length;
}
};
}
}

View File

@ -134,15 +134,12 @@ public class FunctionScoreIT extends ESIntegTestCase {
}
public void testMinScoreFunctionScoreBasic() throws IOException {
index(INDEX, TYPE, jsonBuilder().startObject().field("num", 2).endObject());
refresh();
float score = randomFloat();
float minScore = randomFloat();
index(INDEX, TYPE, jsonBuilder().startObject()
.field("num", 2)
.field("random_score", score) // Pass the random score as a document field so that it can be extracted in the script
.endObject());
.field("num", 2)
.field("random_score", score) // Pass the random score as a document field so that it can be extracted in the script
.endObject());
refresh();
ensureYellow();