diff --git a/docs/reference/search/request/sort.asciidoc b/docs/reference/search/request/sort.asciidoc index 39d1262a050..1e4218bb61d 100644 --- a/docs/reference/search/request/sort.asciidoc +++ b/docs/reference/search/request/sort.asciidoc @@ -50,6 +50,8 @@ to. The `mode` option can have the following values: number based array fields. `avg`:: Use the average of all values as sort value. Only applicable for number based array fields. +`median`:: Use the median of all values as sort value. Only applicable + for number based array fields. ===== Sort mode example usage diff --git a/src/main/java/org/elasticsearch/script/expression/CountMethodFunctionValues.java b/src/main/java/org/elasticsearch/script/expression/CountMethodFunctionValues.java new file mode 100644 index 00000000000..818404e98e2 --- /dev/null +++ b/src/main/java/org/elasticsearch/script/expression/CountMethodFunctionValues.java @@ -0,0 +1,44 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.script.expression; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.docvalues.DoubleDocValues; +import org.elasticsearch.index.fielddata.AtomicNumericFieldData; +import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; + +/** + * FunctionValues to get the count of the number of values in a field for a document. + */ +public class CountMethodFunctionValues extends DoubleDocValues { + SortedNumericDoubleValues values; + + CountMethodFunctionValues(ValueSource parent, AtomicNumericFieldData fieldData) { + super(parent); + + values = fieldData.getDoubleValues(); + } + + @Override + public double doubleVal(int doc) { + values.setDocument(doc); + return values.count(); + } +} diff --git a/src/main/java/org/elasticsearch/script/expression/CountMethodValueSource.java b/src/main/java/org/elasticsearch/script/expression/CountMethodValueSource.java new file mode 100644 index 00000000000..0fa83d92d76 --- /dev/null +++ b/src/main/java/org/elasticsearch/script/expression/CountMethodValueSource.java @@ -0,0 +1,73 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.script.expression; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.elasticsearch.index.fielddata.AtomicFieldData; +import org.elasticsearch.index.fielddata.AtomicNumericFieldData; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +/** + * A ValueSource to create FunctionValues to get the count of the number of values in a field for a document. + */ +public class CountMethodValueSource extends ValueSource { + protected IndexFieldData fieldData; + + protected CountMethodValueSource(IndexFieldData fieldData) { + Objects.requireNonNull(fieldData); + + this.fieldData = fieldData; + } + + @Override + public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException { + AtomicFieldData leafData = fieldData.load(leaf); + assert(leafData instanceof AtomicNumericFieldData); + + return new CountMethodFunctionValues(this, (AtomicNumericFieldData)leafData); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + FieldDataValueSource that = (FieldDataValueSource) o; + + return fieldData.equals(that.fieldData); + } + + @Override + public int hashCode() { + return fieldData.hashCode(); + } + + @Override + public String description() { + return "count: field(" + fieldData.getFieldNames().toString() + ")"; + } +} diff --git a/src/main/java/org/elasticsearch/script/expression/DateMethodFunctionValues.java b/src/main/java/org/elasticsearch/script/expression/DateMethodFunctionValues.java index f71a3697664..f7198fc0ae2 100644 --- a/src/main/java/org/elasticsearch/script/expression/DateMethodFunctionValues.java +++ b/src/main/java/org/elasticsearch/script/expression/DateMethodFunctionValues.java @@ -25,13 +25,14 @@ import java.util.TimeZone; import org.apache.lucene.queries.function.ValueSource; import org.elasticsearch.index.fielddata.AtomicNumericFieldData; +import org.elasticsearch.search.MultiValueMode; class DateMethodFunctionValues extends FieldDataFunctionValues { private final int calendarType; private final Calendar calendar; - DateMethodFunctionValues(ValueSource parent, AtomicNumericFieldData data, int calendarType) { - super(parent, data); + DateMethodFunctionValues(ValueSource parent, MultiValueMode multiValueMode, AtomicNumericFieldData data, int calendarType) { + super(parent, multiValueMode, data); this.calendarType = calendarType; calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); diff --git a/src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java b/src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java index a157790e2bb..522b546656d 100644 --- a/src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java +++ b/src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java @@ -29,14 +29,15 @@ import org.apache.lucene.queries.function.FunctionValues; import org.elasticsearch.index.fielddata.AtomicFieldData; import org.elasticsearch.index.fielddata.AtomicNumericFieldData; import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.search.MultiValueMode; class DateMethodValueSource extends FieldDataValueSource { protected final String methodName; protected final int calendarType; - DateMethodValueSource(IndexFieldData indexFieldData, String methodName, int calendarType) { - super(indexFieldData); + DateMethodValueSource(IndexFieldData indexFieldData, MultiValueMode multiValueMode, String methodName, int calendarType) { + super(indexFieldData, multiValueMode); Objects.requireNonNull(methodName); @@ -44,6 +45,19 @@ class DateMethodValueSource extends FieldDataValueSource { this.calendarType = calendarType; } + @Override + public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException { + AtomicFieldData leafData = fieldData.load(leaf); + assert(leafData instanceof AtomicNumericFieldData); + + return new DateMethodFunctionValues(this, multiValueMode, (AtomicNumericFieldData)leafData, calendarType); + } + + @Override + public String description() { + return methodName + ": field(" + fieldData.getFieldNames().toString() + ")"; + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -64,17 +78,4 @@ class DateMethodValueSource extends FieldDataValueSource { result = 31 * result + calendarType; return result; } - - @Override - public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException { - AtomicFieldData leafData = fieldData.load(leaf); - assert(leafData instanceof AtomicNumericFieldData); - - return new DateMethodFunctionValues(this, (AtomicNumericFieldData)leafData, calendarType); - } - - @Override - public String description() { - return methodName + ": field(" + fieldData.getFieldNames().toString() + ")"; - } } diff --git a/src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngineService.java b/src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngineService.java index 6d6f986432b..5e94422314a 100644 --- a/src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngineService.java +++ b/src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngineService.java @@ -39,6 +39,7 @@ import org.elasticsearch.script.CompiledScript; import org.elasticsearch.script.ExecutableScript; import org.elasticsearch.script.ScriptEngineService; import org.elasticsearch.script.SearchScript; +import org.elasticsearch.search.MultiValueMode; import org.elasticsearch.search.lookup.SearchLookup; import java.text.ParseException; @@ -60,6 +61,13 @@ public class ExpressionScriptEngineService extends AbstractComponent implements protected static final String GET_MINUTES_METHOD = "getMinutes"; protected static final String GET_SECONDS_METHOD = "getSeconds"; + protected static final String MINIMUM_METHOD = "min"; + protected static final String MAXIMUM_METHOD = "max"; + protected static final String AVERAGE_METHOD = "avg"; + protected static final String MEDIAN_METHOD = "median"; + protected static final String SUM_METHOD = "sum"; + protected static final String COUNT_METHOD = "count"; + @Inject public ExpressionScriptEngineService(Settings settings) { super(settings); @@ -156,7 +164,7 @@ public class ExpressionScriptEngineService extends AbstractComponent implements IndexFieldData fieldData = lookup.doc().fieldDataService().getForField((NumberFieldMapper)field); if (methodname == null) { - bindings.add(variable, new FieldDataValueSource(fieldData)); + bindings.add(variable, new FieldDataValueSource(fieldData, MultiValueMode.MIN)); } else { bindings.add(variable, getMethodValueSource(field, fieldData, fieldname, methodname)); } @@ -180,6 +188,18 @@ public class ExpressionScriptEngineService extends AbstractComponent implements return getDateMethodValueSource(field, fieldData, fieldName, methodName, Calendar.MINUTE); case GET_SECONDS_METHOD: return getDateMethodValueSource(field, fieldData, fieldName, methodName, Calendar.SECOND); + case MINIMUM_METHOD: + return new FieldDataValueSource(fieldData, MultiValueMode.MIN); + case MAXIMUM_METHOD: + return new FieldDataValueSource(fieldData, MultiValueMode.MAX); + case AVERAGE_METHOD: + return new FieldDataValueSource(fieldData, MultiValueMode.AVG); + case MEDIAN_METHOD: + return new FieldDataValueSource(fieldData, MultiValueMode.MEDIAN); + case SUM_METHOD: + return new FieldDataValueSource(fieldData, MultiValueMode.SUM); + case COUNT_METHOD: + return new CountMethodValueSource(fieldData); default: throw new IllegalArgumentException("Member method [" + methodName + "] does not exist."); } @@ -190,7 +210,7 @@ public class ExpressionScriptEngineService extends AbstractComponent implements throw new IllegalArgumentException("Member method [" + methodName + "] can only be used with a date field type, not the field [" + fieldName + "]."); } - return new DateMethodValueSource(fieldData, methodName, calendarType); + return new DateMethodValueSource(fieldData, MultiValueMode.MIN, methodName, calendarType); } @Override diff --git a/src/main/java/org/elasticsearch/script/expression/FieldDataFunctionValues.java b/src/main/java/org/elasticsearch/script/expression/FieldDataFunctionValues.java index 7f25b3e1931..b3e06d6b9f2 100644 --- a/src/main/java/org/elasticsearch/script/expression/FieldDataFunctionValues.java +++ b/src/main/java/org/elasticsearch/script/expression/FieldDataFunctionValues.java @@ -31,9 +31,9 @@ import org.elasticsearch.search.MultiValueMode; class FieldDataFunctionValues extends DoubleDocValues { NumericDoubleValues dataAccessor; - FieldDataFunctionValues(ValueSource parent, AtomicNumericFieldData d) { + FieldDataFunctionValues(ValueSource parent, MultiValueMode m, AtomicNumericFieldData d) { super(parent); - dataAccessor = MultiValueMode.MIN.select(d.getDoubleValues(), 0d); + dataAccessor = m.select(d.getDoubleValues(), 0d); } @Override diff --git a/src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java b/src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java index 7a97532068a..39386ee4913 100644 --- a/src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java +++ b/src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java @@ -25,6 +25,7 @@ import org.apache.lucene.queries.function.ValueSource; import org.elasticsearch.index.fielddata.AtomicFieldData; import org.elasticsearch.index.fielddata.AtomicNumericFieldData; import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.search.MultiValueMode; import java.io.IOException; import java.util.Map; @@ -36,18 +37,14 @@ import java.util.Objects; class FieldDataValueSource extends ValueSource { protected IndexFieldData fieldData; + protected MultiValueMode multiValueMode; - protected FieldDataValueSource(IndexFieldData d) { + protected FieldDataValueSource(IndexFieldData d, MultiValueMode m) { Objects.requireNonNull(d); + Objects.requireNonNull(m); fieldData = d; - } - - @Override - public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException { - AtomicFieldData leafData = fieldData.load(leaf); - assert(leafData instanceof AtomicNumericFieldData); - return new FieldDataFunctionValues(this, (AtomicNumericFieldData)leafData); + multiValueMode = m; } @Override @@ -57,12 +54,23 @@ class FieldDataValueSource extends ValueSource { FieldDataValueSource that = (FieldDataValueSource) o; - return fieldData.equals(that.fieldData); + if (!fieldData.equals(that.fieldData)) return false; + return multiValueMode == that.multiValueMode; + } @Override public int hashCode() { - return fieldData.hashCode(); + int result = fieldData.hashCode(); + result = 31 * result + multiValueMode.hashCode(); + return result; + } + + @Override + public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException { + AtomicFieldData leafData = fieldData.load(leaf); + assert(leafData instanceof AtomicNumericFieldData); + return new FieldDataFunctionValues(this, multiValueMode, (AtomicNumericFieldData)leafData); } @Override diff --git a/src/main/java/org/elasticsearch/search/MultiValueMode.java b/src/main/java/org/elasticsearch/search/MultiValueMode.java index 839d4714dbe..50b9ad6b193 100644 --- a/src/main/java/org/elasticsearch/search/MultiValueMode.java +++ b/src/main/java/org/elasticsearch/search/MultiValueMode.java @@ -20,6 +20,7 @@ package org.elasticsearch.search; +import javafx.collections.transformation.SortedList; import org.apache.lucene.index.*; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; @@ -105,6 +106,46 @@ public enum MultiValueMode { } }, + /** + * Median of the values. + * + * Note that apply/reduce do not work with MED since median cannot be derived from + * an accumulator algorithm without using internal memory. + */ + MEDIAN { + @Override + protected long pick(SortedNumericDocValues values, long missingValue) { + int count = values.count(); + if (count > 0) { + if (count % 2 == 0) { + count /= 2; + return (values.valueAt(count - 1) + values.valueAt(count))/2; + } else { + count /= 2; + return values.valueAt(count); + } + } else { + return missingValue; + } + } + + @Override + protected double pick(SortedNumericDoubleValues values, double missingValue) { + int count = values.count(); + if (count > 0) { + if (count % 2 == 0) { + count /= 2; + return (values.valueAt(count - 1) + values.valueAt(count))/2; + } else { + count /= 2; + return values.valueAt(count); + } + } else { + return missingValue; + } + } + }, + /** * Pick the lowest value. */ @@ -288,7 +329,9 @@ public enum MultiValueMode { * @param b another argument * @return the result of the function. */ - public abstract double apply(double a, double b); + public double apply(double a, double b) { + throw new UnsupportedOperationException(); + } /** * Applies the sort mode and returns the result. This method is meant to be @@ -302,7 +345,9 @@ public enum MultiValueMode { * @param b another argument * @return the result of the function. */ - public abstract long apply(long a, long b); + public long apply(long a, long b) { + throw new UnsupportedOperationException(); + } public int applyOrd(int ord1, int ord2) { throw new UnsupportedOperationException(); diff --git a/src/test/java/org/elasticsearch/script/expression/ExpressionScriptTests.java b/src/test/java/org/elasticsearch/script/expression/ExpressionScriptTests.java index 2c8a7ddbc0d..cf01a3e4cc9 100644 --- a/src/test/java/org/elasticsearch/script/expression/ExpressionScriptTests.java +++ b/src/test/java/org/elasticsearch/script/expression/ExpressionScriptTests.java @@ -125,6 +125,72 @@ public class ExpressionScriptTests extends ElasticsearchIntegrationTest { assertEquals(1983.0, hits.getAt(1).field("foo").getValue()); } + public void testMultiValueMethods() throws Exception { + ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("doc", "double0", "type=double", "double1", "type=double")); + ensureGreen("test"); + indexRandom(true, + client().prepareIndex("test", "doc", "1").setSource("double0", "5.0", "double0", "1.0", "double0", "1.5", "double1", "1.2", "double1", "2.4"), + client().prepareIndex("test", "doc", "2").setSource("double0", "5.0", "double1", "3.0"), + client().prepareIndex("test", "doc", "3").setSource("double0", "5.0", "double0", "1.0", "double0", "1.5", "double0", "-1.5", "double1", "4.0")); + + + SearchResponse rsp = buildRequest("doc['double0'].count() + doc['double1'].count()").get(); + assertSearchResponse(rsp); + SearchHits hits = rsp.getHits(); + assertEquals(3, hits.getTotalHits()); + assertEquals(5.0, hits.getAt(0).field("foo").getValue()); + assertEquals(2.0, hits.getAt(1).field("foo").getValue()); + assertEquals(5.0, hits.getAt(2).field("foo").getValue()); + + rsp = buildRequest("doc['double0'].sum()").get(); + assertSearchResponse(rsp); + hits = rsp.getHits(); + assertEquals(3, hits.getTotalHits()); + assertEquals(7.5, hits.getAt(0).field("foo").getValue()); + assertEquals(5.0, hits.getAt(1).field("foo").getValue()); + assertEquals(6.0, hits.getAt(2).field("foo").getValue()); + + rsp = buildRequest("doc['double0'].avg() + doc['double1'].avg()").get(); + assertSearchResponse(rsp); + hits = rsp.getHits(); + assertEquals(3, hits.getTotalHits()); + assertEquals(4.3, hits.getAt(0).field("foo").getValue()); + assertEquals(8.0, hits.getAt(1).field("foo").getValue()); + assertEquals(5.5, hits.getAt(2).field("foo").getValue()); + + rsp = buildRequest("doc['double0'].median()").get(); + assertSearchResponse(rsp); + hits = rsp.getHits(); + assertEquals(3, hits.getTotalHits()); + assertEquals(1.5, hits.getAt(0).field("foo").getValue()); + assertEquals(5.0, hits.getAt(1).field("foo").getValue()); + assertEquals(1.25, hits.getAt(2).field("foo").getValue()); + + rsp = buildRequest("doc['double0'].min()").get(); + assertSearchResponse(rsp); + hits = rsp.getHits(); + assertEquals(3, hits.getTotalHits()); + assertEquals(1.0, hits.getAt(0).field("foo").getValue()); + assertEquals(5.0, hits.getAt(1).field("foo").getValue()); + assertEquals(-1.5, hits.getAt(2).field("foo").getValue()); + + rsp = buildRequest("doc['double0'].max()").get(); + assertSearchResponse(rsp); + hits = rsp.getHits(); + assertEquals(3, hits.getTotalHits()); + assertEquals(5.0, hits.getAt(0).field("foo").getValue()); + assertEquals(5.0, hits.getAt(1).field("foo").getValue()); + assertEquals(5.0, hits.getAt(2).field("foo").getValue()); + + rsp = buildRequest("doc['double0'].sum()/doc['double0'].count()").get(); + assertSearchResponse(rsp); + hits = rsp.getHits(); + assertEquals(3, hits.getTotalHits()); + assertEquals(2.5, hits.getAt(0).field("foo").getValue()); + assertEquals(5.0, hits.getAt(1).field("foo").getValue()); + assertEquals(1.5, hits.getAt(2).field("foo").getValue()); + } + public void testInvalidDateMethodCall() throws Exception { ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("doc", "double", "type=double")); ensureGreen("test"); diff --git a/src/test/java/org/elasticsearch/search/MultiValueModeTests.java b/src/test/java/org/elasticsearch/search/MultiValueModeTests.java index 9f8c9ef6239..61a3e557aa9 100644 --- a/src/test/java/org/elasticsearch/search/MultiValueModeTests.java +++ b/src/test/java/org/elasticsearch/search/MultiValueModeTests.java @@ -32,6 +32,8 @@ import org.elasticsearch.test.ElasticsearchTestCase; import java.io.IOException; import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; public class MultiValueModeTests extends ElasticsearchTestCase { @@ -122,6 +124,9 @@ public class MultiValueModeTests extends ElasticsearchTestCase { private void verify(SortedNumericDocValues values, int maxDoc) { for (long missingValue : new long[] { 0, randomLong() }) { for (MultiValueMode mode : MultiValueMode.values()) { + if (MultiValueMode.MEDIAN.equals(mode)) { + continue; + } final NumericDocValues selected = mode.select(values, missingValue); for (int i = 0; i < maxDoc; ++i) { final long actual = selected.get(i); @@ -147,6 +152,9 @@ public class MultiValueModeTests extends ElasticsearchTestCase { private void verify(SortedNumericDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (long missingValue : new long[] { 0, randomLong() }) { for (MultiValueMode mode : MultiValueMode.values()) { + if (MultiValueMode.MEDIAN.equals(mode)) { + continue; + } final NumericDocValues selected = mode.select(values, missingValue, rootDocs, new BitDocIdSet(innerDocs), maxDoc); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) { @@ -239,6 +247,9 @@ public class MultiValueModeTests extends ElasticsearchTestCase { private void verify(SortedNumericDoubleValues values, int maxDoc) { for (long missingValue : new long[] { 0, randomLong() }) { for (MultiValueMode mode : MultiValueMode.values()) { + if (MultiValueMode.MEDIAN.equals(mode)) { + continue; + } final NumericDoubleValues selected = mode.select(values, missingValue); for (int i = 0; i < maxDoc; ++i) { final double actual = selected.get(i); @@ -264,6 +275,9 @@ public class MultiValueModeTests extends ElasticsearchTestCase { private void verify(SortedNumericDoubleValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException { for (long missingValue : new long[] { 0, randomLong() }) { for (MultiValueMode mode : MultiValueMode.values()) { + if (MultiValueMode.MEDIAN.equals(mode)) { + continue; + } final NumericDoubleValues selected = mode.select(values, missingValue, rootDocs, new BitDocIdSet(innerDocs), maxDoc); int prevRoot = -1; for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {