From 5d54e36964cbaaeb5a87c78867f6c17e26d4ad5d Mon Sep 17 00:00:00 2001 From: kimchy Date: Sun, 17 Oct 2010 16:22:20 +0200 Subject: [PATCH] Statistical Facet: Allow to compute statistical facets on more than one field, closes #436. --- .../statistical/StatisticalFacetBuilder.java | 21 ++- .../StatisticalFacetCollectorParser.java | 17 ++- .../StatisticalFieldsFacetCollector.java | 132 ++++++++++++++++++ .../terms/TermsFacetCollectorParser.java | 29 ++-- .../search/facets/SimpleFacetsTests.java | 16 +++ 5 files changed, 198 insertions(+), 17 deletions(-) create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFieldsFacetCollector.java diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetBuilder.java index 947ea047f61..79df5f0e60d 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetBuilder.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetBuilder.java @@ -30,6 +30,7 @@ import java.io.IOException; * @author kimchy (shay.banon) */ public class StatisticalFacetBuilder extends AbstractFacetBuilder { + private String[] fieldsNames; private String fieldName; public StatisticalFacetBuilder(String name) { @@ -41,6 +42,14 @@ public class StatisticalFacetBuilder extends AbstractFacetBuilder { return this; } + /** + * The fields the terms will be collected from. + */ + public StatisticalFacetBuilder fields(String... fields) { + this.fieldsNames = fields; + return this; + } + public StatisticalFacetBuilder global(boolean global) { this.global = global; return this; @@ -52,13 +61,21 @@ public class StatisticalFacetBuilder extends AbstractFacetBuilder { } @Override public void toXContent(XContentBuilder builder, Params params) throws IOException { - if (fieldName == null) { + if (fieldName == null && fieldsNames == null) { throw new SearchSourceBuilderException("field must be set on statistical facet for facet [" + name + "]"); } builder.startObject(name); builder.startObject(StatisticalFacetCollectorParser.NAME); - builder.field("field", fieldName); + if (fieldsNames != null) { + if (fieldsNames.length == 1) { + builder.field("field", fieldsNames[0]); + } else { + builder.field("fields", fieldsNames); + } + } else { + builder.field("field", fieldName); + } builder.endObject(); addFilterFacetAndGlobal(builder, params); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetCollectorParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetCollectorParser.java index 0fe3328dc66..4f0793b0f56 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetCollectorParser.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetCollectorParser.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.facets.statistical; +import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.facets.FacetPhaseExecutionException; @@ -28,6 +29,7 @@ import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Map; /** @@ -49,12 +51,14 @@ public class StatisticalFacetCollectorParser implements FacetCollectorParser { @Override public FacetCollector parse(String facetName, XContentParser parser, SearchContext context) throws IOException { String field = null; + String[] fieldsNames = null; - String currentFieldName = null; String script = null; String scriptLang = null; Map params = cachedParams.get().get(); params.clear(); + + String currentFieldName = null; XContentParser.Token token; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -63,6 +67,14 @@ public class StatisticalFacetCollectorParser implements FacetCollectorParser { if ("params".equals(currentFieldName)) { params = parser.map(); } + } else if (token == XContentParser.Token.START_ARRAY) { + if ("fields".equals(currentFieldName)) { + List fields = Lists.newArrayListWithCapacity(4); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + fields.add(parser.text()); + } + fieldsNames = fields.toArray(new String[fields.size()]); + } } else if (token.isValue()) { if ("field".equals(currentFieldName)) { field = parser.text(); @@ -73,6 +85,9 @@ public class StatisticalFacetCollectorParser implements FacetCollectorParser { } } } + if (fieldsNames != null) { + return new StatisticalFieldsFacetCollector(facetName, fieldsNames, context); + } if (script == null && field == null) { throw new FacetPhaseExecutionException(facetName, "statistical facet requires either [script] or [field] to be set"); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFieldsFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFieldsFacetCollector.java new file mode 100644 index 00000000000..7a6856713fb --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFieldsFacetCollector.java @@ -0,0 +1,132 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facets.statistical; + +import org.apache.lucene.index.IndexReader; +import org.elasticsearch.common.Strings; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.NumericFieldData; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.facets.Facet; +import org.elasticsearch.search.facets.FacetPhaseExecutionException; +import org.elasticsearch.search.facets.support.AbstractFacetCollector; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + +/** + * @author kimchy (shay.banon) + */ +public class StatisticalFieldsFacetCollector extends AbstractFacetCollector { + + private final String[] fieldsNames; + + private final String[] indexFieldsNames; + + private final FieldDataCache fieldDataCache; + + private final FieldDataType[] fieldsDataType; + + private NumericFieldData[] fieldsData; + + private final StatsProc statsProc = new StatsProc(); + + public StatisticalFieldsFacetCollector(String facetName, String[] fieldsNames, SearchContext context) { + super(facetName); + this.fieldsNames = fieldsNames; + this.fieldDataCache = context.fieldDataCache(); + + fieldsDataType = new FieldDataType[fieldsNames.length]; + fieldsData = new NumericFieldData[fieldsNames.length]; + indexFieldsNames = new String[fieldsNames.length]; + + + for (int i = 0; i < fieldsNames.length; i++) { + FieldMapper mapper = context.mapperService().smartNameFieldMapper(fieldsNames[i]); + if (mapper == null) { + throw new FacetPhaseExecutionException(facetName, "No mapping found for field [" + fieldsNames[i] + "]"); + } + indexFieldsNames[i] = mapper.names().indexName(); + fieldsDataType[i] = mapper.fieldDataType(); + } + } + + @Override protected void doCollect(int doc) throws IOException { + for (NumericFieldData fieldData : fieldsData) { + fieldData.forEachValueInDoc(doc, statsProc); + } + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + for (int i = 0; i < fieldsNames.length; i++) { + fieldsData[i] = (NumericFieldData) fieldDataCache.cache(fieldsDataType[i], reader, indexFieldsNames[i]); + } + } + + @Override public Facet facet() { + return new InternalStatisticalFacet(facetName, Strings.arrayToCommaDelimitedString(fieldsNames), statsProc.min(), statsProc.max(), statsProc.total(), statsProc.sumOfSquares(), statsProc.count()); + } + + public static class StatsProc implements NumericFieldData.DoubleValueInDocProc { + + private double min = Double.NaN; + + private double max = Double.NaN; + + private double total = 0; + + private double sumOfSquares = 0.0; + + private long count; + + @Override public void onValue(int docId, double value) { + if (value < min || Double.isNaN(min)) { + min = value; + } + if (value > max || Double.isNaN(max)) { + max = value; + } + sumOfSquares += value * value; + total += value; + count++; + } + + public final double min() { + return min; + } + + public final double max() { + return max; + } + + public final double total() { + return total; + } + + public final long count() { + return count; + } + + public final double sumOfSquares() { + return sumOfSquares; + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/terms/TermsFacetCollectorParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/terms/TermsFacetCollectorParser.java index 9a68746a01a..9eb60eb52eb 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/terms/TermsFacetCollectorParser.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/terms/TermsFacetCollectorParser.java @@ -47,9 +47,7 @@ public class TermsFacetCollectorParser implements FacetCollectorParser { String field = null; int size = 10; - String fieldName = null; String[] fieldsNames = null; - XContentParser.Token token; ImmutableSet excluded = ImmutableSet.of(); String regex = null; String regexFlags = null; @@ -57,21 +55,24 @@ public class TermsFacetCollectorParser implements FacetCollectorParser { String scriptLang = null; String script = null; Map params = null; + + String currentFieldName = null; + XContentParser.Token token; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { - fieldName = parser.currentName(); + currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_OBJECT) { - if ("params".equals(fieldName)) { + if ("params".equals(currentFieldName)) { params = parser.map(); } } else if (token == XContentParser.Token.START_ARRAY) { - if ("exclude".equals(fieldName)) { + if ("exclude".equals(currentFieldName)) { ImmutableSet.Builder builder = ImmutableSet.builder(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { builder.add(parser.text()); } excluded = builder.build(); - } else if ("fields".equals(fieldName)) { + } else if ("fields".equals(currentFieldName)) { List fields = Lists.newArrayListWithCapacity(4); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { fields.add(parser.text()); @@ -79,21 +80,21 @@ public class TermsFacetCollectorParser implements FacetCollectorParser { fieldsNames = fields.toArray(new String[fields.size()]); } } else if (token.isValue()) { - if ("field".equals(fieldName)) { + if ("field".equals(currentFieldName)) { field = parser.text(); - } else if ("script_field".equals(fieldName)) { + } else if ("script_field".equals(currentFieldName)) { script = parser.text(); - } else if ("size".equals(fieldName)) { + } else if ("size".equals(currentFieldName)) { size = parser.intValue(); - } else if ("regex".equals(fieldName)) { + } else if ("regex".equals(currentFieldName)) { regex = parser.text(); - } else if ("regex_flags".equals(fieldName) || "regexFlags".equals(fieldName)) { + } else if ("regex_flags".equals(currentFieldName) || "regexFlags".equals(currentFieldName)) { regexFlags = parser.text(); - } else if ("order".equals(fieldName) || "comparator".equals(field)) { + } else if ("order".equals(currentFieldName) || "comparator".equals(field)) { comparatorType = TermsFacet.ComparatorType.fromString(parser.text()); - } else if ("script".equals(fieldName)) { + } else if ("script".equals(currentFieldName)) { script = parser.text(); - } else if ("lang".equals(fieldName)) { + } else if ("lang".equals(currentFieldName)) { scriptLang = parser.text(); } } diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facets/SimpleFacetsTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facets/SimpleFacetsTests.java index 66deb7ff33b..3c195e5a420 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facets/SimpleFacetsTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facets/SimpleFacetsTests.java @@ -560,6 +560,22 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet.mean(), equalTo(3d)); assertThat(facet.sumOfSquares(), equalTo(20d)); + // test multi field facet + searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(statisticalFacet("stats").fields("num", "multi_num")) + .execute().actionGet(); + + + facet = searchResponse.facets().facet("stats"); + assertThat(facet.name(), equalTo(facet.name())); + assertThat(facet.count(), equalTo(6l)); + assertThat(facet.total(), equalTo(13d)); + assertThat(facet.min(), equalTo(1d)); + assertThat(facet.max(), equalTo(4d)); + assertThat(facet.mean(), equalTo(13d / 6d)); + assertThat(facet.sumOfSquares(), equalTo(35d)); + // test cross field facet using the same facet name... searchResponse = client.prepareSearch() .setQuery(matchAllQuery())