Statistical Facet: Allow to compute statistical facets on more than one field, closes #436.

This commit is contained in:
kimchy 2010-10-17 16:22:20 +02:00
parent 2fc0022477
commit 5d54e36964
5 changed files with 198 additions and 17 deletions

View File

@ -30,6 +30,7 @@ import java.io.IOException;
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class StatisticalFacetBuilder extends AbstractFacetBuilder { public class StatisticalFacetBuilder extends AbstractFacetBuilder {
private String[] fieldsNames;
private String fieldName; private String fieldName;
public StatisticalFacetBuilder(String name) { public StatisticalFacetBuilder(String name) {
@ -41,6 +42,14 @@ public class StatisticalFacetBuilder extends AbstractFacetBuilder {
return this; return this;
} }
/**
* The fields the terms will be collected from.
*/
public StatisticalFacetBuilder fields(String... fields) {
this.fieldsNames = fields;
return this;
}
public StatisticalFacetBuilder global(boolean global) { public StatisticalFacetBuilder global(boolean global) {
this.global = global; this.global = global;
return this; return this;
@ -52,13 +61,21 @@ public class StatisticalFacetBuilder extends AbstractFacetBuilder {
} }
@Override public void toXContent(XContentBuilder builder, Params params) throws IOException { @Override public void toXContent(XContentBuilder builder, Params params) throws IOException {
if (fieldName == null) { if (fieldName == null && fieldsNames == null) {
throw new SearchSourceBuilderException("field must be set on statistical facet for facet [" + name + "]"); throw new SearchSourceBuilderException("field must be set on statistical facet for facet [" + name + "]");
} }
builder.startObject(name); builder.startObject(name);
builder.startObject(StatisticalFacetCollectorParser.NAME); builder.startObject(StatisticalFacetCollectorParser.NAME);
if (fieldsNames != null) {
if (fieldsNames.length == 1) {
builder.field("field", fieldsNames[0]);
} else {
builder.field("fields", fieldsNames);
}
} else {
builder.field("field", fieldName); builder.field("field", fieldName);
}
builder.endObject(); builder.endObject();
addFilterFacetAndGlobal(builder, params); addFilterFacetAndGlobal(builder, params);

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.facets.statistical; package org.elasticsearch.search.facets.statistical;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.thread.ThreadLocals; import org.elasticsearch.common.thread.ThreadLocals;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.facets.FacetPhaseExecutionException; import org.elasticsearch.search.facets.FacetPhaseExecutionException;
@ -28,6 +29,7 @@ import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
@ -49,12 +51,14 @@ public class StatisticalFacetCollectorParser implements FacetCollectorParser {
@Override public FacetCollector parse(String facetName, XContentParser parser, SearchContext context) throws IOException { @Override public FacetCollector parse(String facetName, XContentParser parser, SearchContext context) throws IOException {
String field = null; String field = null;
String[] fieldsNames = null;
String currentFieldName = null;
String script = null; String script = null;
String scriptLang = null; String scriptLang = null;
Map<String, Object> params = cachedParams.get().get(); Map<String, Object> params = cachedParams.get().get();
params.clear(); params.clear();
String currentFieldName = null;
XContentParser.Token token; XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) { if (token == XContentParser.Token.FIELD_NAME) {
@ -63,6 +67,14 @@ public class StatisticalFacetCollectorParser implements FacetCollectorParser {
if ("params".equals(currentFieldName)) { if ("params".equals(currentFieldName)) {
params = parser.map(); params = parser.map();
} }
} else if (token == XContentParser.Token.START_ARRAY) {
if ("fields".equals(currentFieldName)) {
List<String> fields = Lists.newArrayListWithCapacity(4);
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
fields.add(parser.text());
}
fieldsNames = fields.toArray(new String[fields.size()]);
}
} else if (token.isValue()) { } else if (token.isValue()) {
if ("field".equals(currentFieldName)) { if ("field".equals(currentFieldName)) {
field = parser.text(); field = parser.text();
@ -73,6 +85,9 @@ public class StatisticalFacetCollectorParser implements FacetCollectorParser {
} }
} }
} }
if (fieldsNames != null) {
return new StatisticalFieldsFacetCollector(facetName, fieldsNames, context);
}
if (script == null && field == null) { if (script == null && field == null) {
throw new FacetPhaseExecutionException(facetName, "statistical facet requires either [script] or [field] to be set"); throw new FacetPhaseExecutionException(facetName, "statistical facet requires either [script] or [field] to be set");
} }

View File

@ -0,0 +1,132 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.facets.statistical;
import org.apache.lucene.index.IndexReader;
import org.elasticsearch.common.Strings;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.field.data.FieldDataType;
import org.elasticsearch.index.field.data.NumericFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.facets.Facet;
import org.elasticsearch.search.facets.FacetPhaseExecutionException;
import org.elasticsearch.search.facets.support.AbstractFacetCollector;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
/**
* @author kimchy (shay.banon)
*/
public class StatisticalFieldsFacetCollector extends AbstractFacetCollector {
private final String[] fieldsNames;
private final String[] indexFieldsNames;
private final FieldDataCache fieldDataCache;
private final FieldDataType[] fieldsDataType;
private NumericFieldData[] fieldsData;
private final StatsProc statsProc = new StatsProc();
public StatisticalFieldsFacetCollector(String facetName, String[] fieldsNames, SearchContext context) {
super(facetName);
this.fieldsNames = fieldsNames;
this.fieldDataCache = context.fieldDataCache();
fieldsDataType = new FieldDataType[fieldsNames.length];
fieldsData = new NumericFieldData[fieldsNames.length];
indexFieldsNames = new String[fieldsNames.length];
for (int i = 0; i < fieldsNames.length; i++) {
FieldMapper mapper = context.mapperService().smartNameFieldMapper(fieldsNames[i]);
if (mapper == null) {
throw new FacetPhaseExecutionException(facetName, "No mapping found for field [" + fieldsNames[i] + "]");
}
indexFieldsNames[i] = mapper.names().indexName();
fieldsDataType[i] = mapper.fieldDataType();
}
}
@Override protected void doCollect(int doc) throws IOException {
for (NumericFieldData fieldData : fieldsData) {
fieldData.forEachValueInDoc(doc, statsProc);
}
}
@Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException {
for (int i = 0; i < fieldsNames.length; i++) {
fieldsData[i] = (NumericFieldData) fieldDataCache.cache(fieldsDataType[i], reader, indexFieldsNames[i]);
}
}
@Override public Facet facet() {
return new InternalStatisticalFacet(facetName, Strings.arrayToCommaDelimitedString(fieldsNames), statsProc.min(), statsProc.max(), statsProc.total(), statsProc.sumOfSquares(), statsProc.count());
}
public static class StatsProc implements NumericFieldData.DoubleValueInDocProc {
private double min = Double.NaN;
private double max = Double.NaN;
private double total = 0;
private double sumOfSquares = 0.0;
private long count;
@Override public void onValue(int docId, double value) {
if (value < min || Double.isNaN(min)) {
min = value;
}
if (value > max || Double.isNaN(max)) {
max = value;
}
sumOfSquares += value * value;
total += value;
count++;
}
public final double min() {
return min;
}
public final double max() {
return max;
}
public final double total() {
return total;
}
public final long count() {
return count;
}
public final double sumOfSquares() {
return sumOfSquares;
}
}
}

View File

@ -47,9 +47,7 @@ public class TermsFacetCollectorParser implements FacetCollectorParser {
String field = null; String field = null;
int size = 10; int size = 10;
String fieldName = null;
String[] fieldsNames = null; String[] fieldsNames = null;
XContentParser.Token token;
ImmutableSet<String> excluded = ImmutableSet.of(); ImmutableSet<String> excluded = ImmutableSet.of();
String regex = null; String regex = null;
String regexFlags = null; String regexFlags = null;
@ -57,21 +55,24 @@ public class TermsFacetCollectorParser implements FacetCollectorParser {
String scriptLang = null; String scriptLang = null;
String script = null; String script = null;
Map<String, Object> params = null; Map<String, Object> params = null;
String currentFieldName = null;
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) { if (token == XContentParser.Token.FIELD_NAME) {
fieldName = parser.currentName(); currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_OBJECT) { } else if (token == XContentParser.Token.START_OBJECT) {
if ("params".equals(fieldName)) { if ("params".equals(currentFieldName)) {
params = parser.map(); params = parser.map();
} }
} else if (token == XContentParser.Token.START_ARRAY) { } else if (token == XContentParser.Token.START_ARRAY) {
if ("exclude".equals(fieldName)) { if ("exclude".equals(currentFieldName)) {
ImmutableSet.Builder<String> builder = ImmutableSet.builder(); ImmutableSet.Builder<String> builder = ImmutableSet.builder();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
builder.add(parser.text()); builder.add(parser.text());
} }
excluded = builder.build(); excluded = builder.build();
} else if ("fields".equals(fieldName)) { } else if ("fields".equals(currentFieldName)) {
List<String> fields = Lists.newArrayListWithCapacity(4); List<String> fields = Lists.newArrayListWithCapacity(4);
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
fields.add(parser.text()); fields.add(parser.text());
@ -79,21 +80,21 @@ public class TermsFacetCollectorParser implements FacetCollectorParser {
fieldsNames = fields.toArray(new String[fields.size()]); fieldsNames = fields.toArray(new String[fields.size()]);
} }
} else if (token.isValue()) { } else if (token.isValue()) {
if ("field".equals(fieldName)) { if ("field".equals(currentFieldName)) {
field = parser.text(); field = parser.text();
} else if ("script_field".equals(fieldName)) { } else if ("script_field".equals(currentFieldName)) {
script = parser.text(); script = parser.text();
} else if ("size".equals(fieldName)) { } else if ("size".equals(currentFieldName)) {
size = parser.intValue(); size = parser.intValue();
} else if ("regex".equals(fieldName)) { } else if ("regex".equals(currentFieldName)) {
regex = parser.text(); regex = parser.text();
} else if ("regex_flags".equals(fieldName) || "regexFlags".equals(fieldName)) { } else if ("regex_flags".equals(currentFieldName) || "regexFlags".equals(currentFieldName)) {
regexFlags = parser.text(); regexFlags = parser.text();
} else if ("order".equals(fieldName) || "comparator".equals(field)) { } else if ("order".equals(currentFieldName) || "comparator".equals(field)) {
comparatorType = TermsFacet.ComparatorType.fromString(parser.text()); comparatorType = TermsFacet.ComparatorType.fromString(parser.text());
} else if ("script".equals(fieldName)) { } else if ("script".equals(currentFieldName)) {
script = parser.text(); script = parser.text();
} else if ("lang".equals(fieldName)) { } else if ("lang".equals(currentFieldName)) {
scriptLang = parser.text(); scriptLang = parser.text();
} }
} }

View File

@ -560,6 +560,22 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.mean(), equalTo(3d)); assertThat(facet.mean(), equalTo(3d));
assertThat(facet.sumOfSquares(), equalTo(20d)); assertThat(facet.sumOfSquares(), equalTo(20d));
// test multi field facet
searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(statisticalFacet("stats").fields("num", "multi_num"))
.execute().actionGet();
facet = searchResponse.facets().facet("stats");
assertThat(facet.name(), equalTo(facet.name()));
assertThat(facet.count(), equalTo(6l));
assertThat(facet.total(), equalTo(13d));
assertThat(facet.min(), equalTo(1d));
assertThat(facet.max(), equalTo(4d));
assertThat(facet.mean(), equalTo(13d / 6d));
assertThat(facet.sumOfSquares(), equalTo(35d));
// test cross field facet using the same facet name... // test cross field facet using the same facet name...
searchResponse = client.prepareSearch() searchResponse = client.prepareSearch()
.setQuery(matchAllQuery()) .setQuery(matchAllQuery())