Expose multi-valued dates to scripts and document painless's date functions (#22875)

Implemented by wrapping an array of reused `ModuleDateTime`s that
we grow when needed. The `ModuleDateTime`s are reused when we
move to the next document.

Also improves the error message returned when attempting to modify
the `ScriptdocValues`, removes a couple of allocations, and documents
that the date functions are available in Painless.

Relates to #22162
This commit is contained in:
Nik Everett 2017-02-01 21:57:07 -05:00 committed by GitHub
parent 7f59bed87b
commit dacc150934
9 changed files with 405 additions and 66 deletions

View File

@ -25,32 +25,62 @@ import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.geo.GeoHashUtils;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoUtils;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.MutableDateTime;
import org.joda.time.ReadableDateTime;
import java.util.AbstractList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.function.UnaryOperator;
/**
* Script level doc values, the assumption is that any implementation will implement a <code>getValue</code>
* and a <code>getValues</code> that return the relevant type that then can be used in scripts.
*/
public interface ScriptDocValues<T> extends List<T> {
public abstract class ScriptDocValues<T> extends AbstractList<T> {
/**
* Set the current doc ID.
*/
void setNextDocId(int docId);
public abstract void setNextDocId(int docId);
/**
* Return a copy of the list of the values for the current document.
*/
List<T> getValues();
public final List<T> getValues() {
return this;
}
public static final class Strings extends AbstractList<String> implements ScriptDocValues<String> {
// Throw meaningful exceptions if someone tries to modify the ScriptDocValues.
@Override
public final void add(int index, T element) {
throw new UnsupportedOperationException("doc values are unmodifiable");
}
@Override
public final boolean remove(Object o) {
throw new UnsupportedOperationException("doc values are unmodifiable");
}
@Override
public final void replaceAll(UnaryOperator<T> operator) {
throw new UnsupportedOperationException("doc values are unmodifiable");
}
@Override
public final T set(int index, T element) {
throw new UnsupportedOperationException("doc values are unmodifiable");
}
@Override
public final void sort(Comparator<? super T> c) {
throw new UnsupportedOperationException("doc values are unmodifiable");
}
public static final class Strings extends ScriptDocValues<String> {
private final SortedBinaryDocValues values;
@ -84,11 +114,6 @@ public interface ScriptDocValues<T> extends List<T> {
}
}
@Override
public List<String> getValues() {
return Collections.unmodifiableList(this);
}
@Override
public String get(int index) {
return values.valueAt(index).utf8ToString();
@ -101,10 +126,10 @@ public interface ScriptDocValues<T> extends List<T> {
}
public static class Longs extends AbstractList<Long> implements ScriptDocValues<Long> {
public static final class Longs extends ScriptDocValues<Long> {
private final SortedNumericDocValues values;
private final MutableDateTime date = new MutableDateTime(0, DateTimeZone.UTC);
private Dates dates;
public Longs(SortedNumericDocValues values) {
this.values = values;
@ -113,6 +138,9 @@ public interface ScriptDocValues<T> extends List<T> {
@Override
public void setNextDocId(int docId) {
values.setDocument(docId);
if (dates != null) {
dates.refreshArray();
}
}
public SortedNumericDocValues getInternalValues() {
@ -127,14 +155,20 @@ public interface ScriptDocValues<T> extends List<T> {
return values.valueAt(0);
}
@Override
public List<Long> getValues() {
return Collections.unmodifiableList(this);
public ReadableDateTime getDate() {
if (dates == null) {
dates = new Dates(values);
dates.refreshArray();
}
return dates.getValue();
}
public ReadableDateTime getDate() {
date.setMillis(getValue());
return date;
public List<ReadableDateTime> getDates() {
if (dates == null) {
dates = new Dates(values);
dates.refreshArray();
}
return dates;
}
@Override
@ -146,10 +180,87 @@ public interface ScriptDocValues<T> extends List<T> {
public int size() {
return values.count();
}
}
public static class Doubles extends AbstractList<Double> implements ScriptDocValues<Double> {
public static final class Dates extends ScriptDocValues<ReadableDateTime> {
private static final ReadableDateTime EPOCH = new DateTime(0, DateTimeZone.UTC);
private final SortedNumericDocValues values;
/**
* Values wrapped in {@link MutableDateTime}. Null by default an allocated on first usage so we allocate a reasonably size. We keep
* this array so we don't have allocate new {@link MutableDateTime}s on every usage. Instead we reuse them for every document.
*/
private MutableDateTime[] dates;
public Dates(SortedNumericDocValues values) {
this.values = values;
}
/**
* Fetch the first field value or 0 millis after epoch if there are no values.
*/
public ReadableDateTime getValue() {
if (values.count() == 0) {
return EPOCH;
}
return get(0);
}
@Override
public ReadableDateTime get(int index) {
if (index >= values.count()) {
throw new IndexOutOfBoundsException(
"attempted to fetch the [" + index + "] date when there are only [" + values.count() + "] dates.");
}
return dates[index];
}
@Override
public int size() {
return values.count();
}
@Override
public void setNextDocId(int docId) {
values.setDocument(docId);
refreshArray();
}
/**
* Refresh the backing array. Package private so it can be called when {@link Longs} loads dates.
*/
void refreshArray() {
if (values.count() == 0) {
return;
}
if (dates == null) {
// Happens for the document. We delay allocating dates so we can allocate it with a reasonable size.
dates = new MutableDateTime[values.count()];
for (int i = 0; i < dates.length; i++) {
dates[i] = new MutableDateTime(values.valueAt(i), DateTimeZone.UTC);
}
return;
}
if (values.count() > dates.length) {
// Happens when we move to a new document and it has more dates than any documents before it.
MutableDateTime[] backup = dates;
dates = new MutableDateTime[values.count()];
System.arraycopy(backup, 0, dates, 0, backup.length);
for (int i = 0; i < backup.length; i++) {
dates[i].setMillis(values.valueAt(i));
}
for (int i = backup.length; i < dates.length; i++) {
dates[i] = new MutableDateTime(values.valueAt(i), DateTimeZone.UTC);
}
return;
}
for (int i = 0; i < values.count(); i++) {
dates[i].setMillis(values.valueAt(i));
}
}
}
public static final class Doubles extends ScriptDocValues<Double> {
private final SortedNumericDoubleValues values;
@ -174,11 +285,6 @@ public interface ScriptDocValues<T> extends List<T> {
return values.valueAt(0);
}
@Override
public List<Double> getValues() {
return Collections.unmodifiableList(this);
}
@Override
public Double get(int index) {
return values.valueAt(index);
@ -190,7 +296,7 @@ public interface ScriptDocValues<T> extends List<T> {
}
}
class GeoPoints extends AbstractList<GeoPoint> implements ScriptDocValues<GeoPoint> {
public static final class GeoPoints extends ScriptDocValues<GeoPoint> {
private final MultiGeoPointValues values;
@ -237,11 +343,6 @@ public interface ScriptDocValues<T> extends List<T> {
return getValue().lon();
}
@Override
public List<GeoPoint> getValues() {
return Collections.unmodifiableList(this);
}
@Override
public GeoPoint get(int index) {
final GeoPoint point = values.valueAt(index);
@ -291,7 +392,7 @@ public interface ScriptDocValues<T> extends List<T> {
}
}
final class Booleans extends AbstractList<Boolean> implements ScriptDocValues<Boolean> {
public static final class Booleans extends ScriptDocValues<Boolean> {
private final SortedNumericDocValues values;
@ -304,11 +405,6 @@ public interface ScriptDocValues<T> extends List<T> {
values.setDocument(docId);
}
@Override
public List<Boolean> getValues() {
return this;
}
public boolean getValue() {
return values.count() != 0 && values.valueAt(0) == 1;
}
@ -325,7 +421,7 @@ public interface ScriptDocValues<T> extends List<T> {
}
public static class BytesRefs extends AbstractList<BytesRef> implements ScriptDocValues<BytesRef> {
public static final class BytesRefs extends ScriptDocValues<BytesRef> {
private final SortedBinaryDocValues values;
@ -350,11 +446,6 @@ public interface ScriptDocValues<T> extends List<T> {
return values.valueAt(0);
}
@Override
public List<BytesRef> getValues() {
return Collections.unmodifiableList(this);
}
@Override
public BytesRef get(int index) {
return values.valueAt(index);
@ -365,5 +456,4 @@ public interface ScriptDocValues<T> extends List<T> {
return values.count();
}
}
}

View File

@ -46,9 +46,7 @@ import org.joda.time.DateTimeZone;
import java.io.IOException;
import java.net.InetAddress;
import java.util.AbstractList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -233,11 +231,11 @@ public class IpFieldMapper extends FieldMapper {
InetAddressPoint.decode(min), InetAddressPoint.decode(max));
}
public static final class IpScriptDocValues extends AbstractList<String> implements ScriptDocValues<String> {
public static final class IpScriptDocValues extends ScriptDocValues<String> {
private final RandomAccessOrds values;
IpScriptDocValues(RandomAccessOrds values) {
public IpScriptDocValues(RandomAccessOrds values) {
this.values = values;
}
@ -254,11 +252,6 @@ public class IpFieldMapper extends FieldMapper {
}
}
@Override
public List<String> getValues() {
return Collections.unmodifiableList(this);
}
@Override
public String get(int index) {
BytesRef encoded = values.lookupOrd(values.ordAt(0));

View File

@ -0,0 +1,76 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.SortedNumericDocValues;
import org.elasticsearch.index.fielddata.ScriptDocValues.Dates;
import org.elasticsearch.test.ESTestCase;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.ReadableDateTime;
public class ScriptDocValuesDatesTests extends ESTestCase {
public void test() {
long[][] values = new long[between(3, 10)][];
ReadableDateTime[][] expectedDates = new ReadableDateTime[values.length][];
for (int d = 0; d < values.length; d++) {
values[d] = new long[randomBoolean() ? randomBoolean() ? 0 : 1 : between(2, 100)];
expectedDates[d] = new ReadableDateTime[values[d].length];
for (int i = 0; i < values[d].length; i++) {
expectedDates[d][i] = new DateTime(randomNonNegativeLong(), DateTimeZone.UTC);
values[d][i] = expectedDates[d][i].getMillis();
}
}
Dates dates = wrap(values);
for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
dates.setNextDocId(d);
assertEquals(expectedDates[d].length > 0 ? expectedDates[d][0] : new DateTime(0, DateTimeZone.UTC), dates.getValue());
assertEquals(values[d].length, dates.size());
for (int i = 0; i < values[d].length; i++) {
assertEquals(expectedDates[d][i], dates.get(i));
}
Exception e = expectThrows(UnsupportedOperationException.class, () -> dates.add(new DateTime()));
assertEquals("doc values are unmodifiable", e.getMessage());
}
}
private Dates wrap(long[][] values) {
return new Dates(new SortedNumericDocValues() {
long[] current;
@Override
public void setDocument(int doc) {
current = values[doc];
}
@Override
public int count() {
return current.length;
}
@Override
public long valueAt(int index) {
return current[index];
}
});
}
}

View File

@ -25,7 +25,7 @@ import org.elasticsearch.test.ESTestCase;
import java.util.Arrays;
public class ScriptDocValuesTests extends ESTestCase {
public class ScriptDocValuesGeoPointsTests extends ESTestCase {
private static MultiGeoPointValues wrap(final GeoPoint... points) {
return new MultiGeoPointValues() {

View File

@ -0,0 +1,103 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.SortedNumericDocValues;
import org.elasticsearch.index.fielddata.ScriptDocValues.Longs;
import org.elasticsearch.test.ESTestCase;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.ReadableDateTime;
public class ScriptDocValuesLongsTests extends ESTestCase {
public void testLongs() {
long[][] values = new long[between(3, 10)][];
for (int d = 0; d < values.length; d++) {
values[d] = new long[randomBoolean() ? randomBoolean() ? 0 : 1 : between(2, 100)];
for (int i = 0; i < values[d].length; i++) {
values[d][i] = randomLong();
}
}
Longs longs = wrap(values);
for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
longs.setNextDocId(d);
assertEquals(values[d].length > 0 ? values[d][0] : 0, longs.getValue());
assertEquals(values[d].length, longs.size());
assertEquals(values[d].length, longs.getValues().size());
for (int i = 0; i < values[d].length; i++) {
assertEquals(values[d][i], longs.get(i).longValue());
assertEquals(values[d][i], longs.getValues().get(i).longValue());
}
Exception e = expectThrows(UnsupportedOperationException.class, () -> longs.getValues().add(100L));
assertEquals("doc values are unmodifiable", e.getMessage());
}
}
public void testDates() {
long[][] values = new long[between(3, 10)][];
ReadableDateTime[][] dates = new ReadableDateTime[values.length][];
for (int d = 0; d < values.length; d++) {
values[d] = new long[randomBoolean() ? randomBoolean() ? 0 : 1 : between(2, 100)];
dates[d] = new ReadableDateTime[values[d].length];
for (int i = 0; i < values[d].length; i++) {
dates[d][i] = new DateTime(randomNonNegativeLong(), DateTimeZone.UTC);
values[d][i] = dates[d][i].getMillis();
}
}
Longs longs = wrap(values);
for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
longs.setNextDocId(d);
assertEquals(dates[d].length > 0 ? dates[d][0] : new DateTime(0, DateTimeZone.UTC), longs.getDate());
assertEquals(values[d].length, longs.getDates().size());
for (int i = 0; i < values[d].length; i++) {
assertEquals(dates[d][i], longs.getDates().get(i));
}
Exception e = expectThrows(UnsupportedOperationException.class, () -> longs.getDates().add(new DateTime()));
assertEquals("doc values are unmodifiable", e.getMessage());
}
}
private Longs wrap(long[][] values) {
return new Longs(new SortedNumericDocValues() {
long[] current;
@Override
public void setDocument(int doc) {
current = values[doc];
}
@Override
public int count() {
return current.length;
}
@Override
public long valueAt(int index) {
return current[index];
}
});
}
}

View File

@ -39,27 +39,27 @@ To illustrate how Painless works, let's load some hockey stats into an Elasticse
----------------------------------------------------------------
PUT hockey/player/_bulk?refresh
{"index":{"_id":1}}
{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1]}
{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1],"born":"1993/08/13"}
{"index":{"_id":2}}
{"first":"sean","last":"monohan","goals":[7,54,26],"assists":[11,26,13],"gp":[26,82,82]}
{"first":"sean","last":"monohan","goals":[7,54,26],"assists":[11,26,13],"gp":[26,82,82],"born":"1994/10/12"}
{"index":{"_id":3}}
{"first":"jiri","last":"hudler","goals":[5,34,36],"assists":[11,62,42],"gp":[24,80,79]}
{"first":"jiri","last":"hudler","goals":[5,34,36],"assists":[11,62,42],"gp":[24,80,79],"born":"1984/01/04"}
{"index":{"_id":4}}
{"first":"micheal","last":"frolik","goals":[4,6,15],"assists":[8,23,15],"gp":[26,82,82]}
{"first":"micheal","last":"frolik","goals":[4,6,15],"assists":[8,23,15],"gp":[26,82,82],"born":"1988/02/17"}
{"index":{"_id":5}}
{"first":"sam","last":"bennett","goals":[5,0,0],"assists":[8,1,0],"gp":[26,1,0]}
{"first":"sam","last":"bennett","goals":[5,0,0],"assists":[8,1,0],"gp":[26,1,0],"born":"1996/06/20"}
{"index":{"_id":6}}
{"first":"dennis","last":"wideman","goals":[0,26,15],"assists":[11,30,24],"gp":[26,81,82]}
{"first":"dennis","last":"wideman","goals":[0,26,15],"assists":[11,30,24],"gp":[26,81,82],"born":"1983/03/20"}
{"index":{"_id":7}}
{"first":"david","last":"jones","goals":[7,19,5],"assists":[3,17,4],"gp":[26,45,34]}
{"first":"david","last":"jones","goals":[7,19,5],"assists":[3,17,4],"gp":[26,45,34],"born":"1984/08/10"}
{"index":{"_id":8}}
{"first":"tj","last":"brodie","goals":[2,14,7],"assists":[8,42,30],"gp":[26,82,82]}
{"first":"tj","last":"brodie","goals":[2,14,7],"assists":[8,42,30],"gp":[26,82,82],"born":"1990/06/07"}
{"index":{"_id":39}}
{"first":"mark","last":"giordano","goals":[6,30,15],"assists":[3,30,24],"gp":[26,60,63]}
{"first":"mark","last":"giordano","goals":[6,30,15],"assists":[3,30,24],"gp":[26,60,63],"born":"1983/10/03"}
{"index":{"_id":10}}
{"first":"mikael","last":"backlund","goals":[3,15,13],"assists":[6,24,18],"gp":[26,82,82]}
{"first":"mikael","last":"backlund","goals":[3,15,13],"assists":[6,24,18],"gp":[26,82,82],"born":"1989/03/17"}
{"index":{"_id":11}}
{"first":"joe","last":"colborne","goals":[3,18,13],"assists":[6,20,24],"gp":[26,67,82]}
{"first":"joe","last":"colborne","goals":[3,18,13],"assists":[6,20,24],"gp":[26,67,82],"born":"1990/01/30"}
----------------------------------------------------------------
// CONSOLE
// TESTSETUP
@ -194,6 +194,40 @@ POST hockey/player/1/_update
----------------------------------------------------------------
// CONSOLE
[float]
[[modules-scripting-painless-dates]]
=== Regular expressions
Dates are a little different to work with than regular values. Here is an
example returning the year of every player's birth:
[source,js]
----------------------------------------------------------------
GET hockey/_search
{
"script_fields": {
"birth_year": {
"script": {
"inline": "doc.born.date.year"
}
}
}
}
----------------------------------------------------------------
// CONSOLE
The key here is that instead of indexing directly into `doc.born` like you would
a normal field you have to call `doc.born.date` to get a
<<painless-api-reference-org-joda-time-ReadableDateTime, `ReadableDateTime`>>.
From there you can call methods like
<<painless-api-reference-org-joda-time-ReadableDateTime-getYear-0, `getYear`>>,
and <<painless-api-reference-org-joda-time-ReadableDateTime-getDayOfWeek-0, `getDayOfWeek`>>.
In the example above `year` is a shortcut to `getYear()`.
If the date field is a list then `date` will always return the first date. To
access all the dates use `dates` instead of `date`.
[float]
[[modules-scripting-painless-regex]]
=== Regular expressions

View File

@ -6,6 +6,7 @@ Rebuild by running `gradle generatePainlessApi`.
[[painless-api-reference-org-elasticsearch-index-fielddata-ScriptDocValues-Longs]]++org.elasticsearch.index.fielddata.ScriptDocValues.Longs++::
* ++[[painless-api-reference-org-elasticsearch-index-fielddata-ScriptDocValues-Longs-get-1]]<<painless-api-reference-Long,Long>> link:{elasticsearch-javadoc}/org/elasticsearch/index/fielddata/ScriptDocValues$Longs.html#get%2Dint%2D[get](int)++
* ++[[painless-api-reference-org-elasticsearch-index-fielddata-ScriptDocValues-Longs-getDate-0]]<<painless-api-reference-org-joda-time-ReadableDateTime,org.joda.time.ReadableDateTime>> link:{elasticsearch-javadoc}/org/elasticsearch/index/fielddata/ScriptDocValues$Longs.html#getDate%2D%2D[getDate]()++
* ++[[painless-api-reference-org-elasticsearch-index-fielddata-ScriptDocValues-Longs-getDates-0]]<<painless-api-reference-List,List>> link:{elasticsearch-javadoc}/org/elasticsearch/index/fielddata/ScriptDocValues$Longs.html#getDates%2D%2D[getDates]()++
* ++[[painless-api-reference-org-elasticsearch-index-fielddata-ScriptDocValues-Longs-getValue-0]]long link:{elasticsearch-javadoc}/org/elasticsearch/index/fielddata/ScriptDocValues$Longs.html#getValue%2D%2D[getValue]()++
* ++[[painless-api-reference-org-elasticsearch-index-fielddata-ScriptDocValues-Longs-getValues-0]]<<painless-api-reference-List,List>> link:{elasticsearch-javadoc}/org/elasticsearch/index/fielddata/ScriptDocValues$Longs.html#getValues%2D%2D[getValues]()++
* Inherits methods from ++<<painless-api-reference-Collection,Collection>>++, ++<<painless-api-reference-Iterable,Iterable>>++, ++<<painless-api-reference-List,List>>++, ++<<painless-api-reference-Object,Object>>++

View File

@ -82,6 +82,7 @@ class org.elasticsearch.index.fielddata.ScriptDocValues.Longs -> org.elasticsear
long getValue()
List getValues()
org.joda.time.ReadableDateTime getDate()
List getDates()
}
class org.elasticsearch.index.fielddata.ScriptDocValues.Doubles -> org.elasticsearch.index.fielddata.ScriptDocValues$Doubles extends List,Collection,Iterable,Object {

View File

@ -12,12 +12,22 @@ setup:
type: keyword
missing:
type: keyword
date:
type: date
format: yyyy/MM/dd
dates:
type: date
format: yyyy/MM/dd
- do:
index:
index: test
type: test
id: 1
body: { "foo": "aaa" }
body: {
"foo": "aaa",
"date": "2017/01/01",
"dates": ["2017/01/01", "2017/02/01", "2017/03/01"]
}
- do:
indices.refresh: {}
@ -34,6 +44,7 @@ setup:
x: "bbb"
- match: { hits.hits.0.fields.bar.0: "aaabbb"}
---
"Scripted Field Doing Compare":
- do:
@ -60,6 +71,7 @@ setup:
x: "bbb"
- match: { hits.hits.0.fields.bar.0: false}
---
"Scripted Field with a null safe dereference (non-null)":
- do:
@ -89,6 +101,35 @@ setup:
- match: { hits.hits.0.fields.bar.0: 5}
---
"Access a date":
- do:
search:
body:
script_fields:
bar:
script:
inline: "doc.date.date.dayOfWeek"
- match: { hits.hits.0.fields.bar.0: 7}
---
"Access many dates":
- do:
search:
body:
script_fields:
bar:
script:
inline: >
StringBuilder b = new StringBuilder();
for (def date : doc.dates.dates) {
b.append(" ").append(date.getDayOfWeek());
}
return b.toString().trim()
- match: { hits.hits.0.fields.bar.0: "7 3 3"}
---
"Scripted Field with script error":
- do: