SOLR-6354: stats.field can now be used to generate stats over the numeric results of arbitrary functions

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1626856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2014-09-22 17:45:53 +00:00
parent d89d1d091d
commit 34890299da
9 changed files with 776 additions and 271 deletions

View File

@ -131,6 +131,10 @@ New Features
* SOLR-6482: Add an onlyIfDown flag for DELETEREPLICA collections API command * SOLR-6482: Add an onlyIfDown flag for DELETEREPLICA collections API command
(Erick Erickson) (Erick Erickson)
* SOLR-6354: stats.field can now be used to generate stats over the numeric results
of arbitrary functions, ie: stats.field={!func}product(price,popularity)
(hossman)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -46,9 +46,8 @@ import org.apache.solr.search.SolrIndexSearcher;
public class FieldFacetStats { public class FieldFacetStats {
public final String name; public final String name;
final StatsField statsField;
final SchemaField facet_sf; final SchemaField facet_sf;
final SchemaField field_sf;
final boolean calcDistinct;
public final Map<String, StatsValues> facetStatsValues; public final Map<String, StatsValues> facetStatsValues;
private final Map<Integer, Integer> missingStats; private final Map<Integer, Integer> missingStats;
@ -62,11 +61,10 @@ public class FieldFacetStats {
SortedDocValues topLevelSortedValues = null; SortedDocValues topLevelSortedValues = null;
public FieldFacetStats(SolrIndexSearcher searcher, String name, SchemaField field_sf, SchemaField facet_sf, boolean calcDistinct) { public FieldFacetStats(SolrIndexSearcher searcher, SchemaField facet_sf, StatsField statsField) {
this.name = name; this.statsField = statsField;
this.field_sf = field_sf;
this.facet_sf = facet_sf; this.facet_sf = facet_sf;
this.calcDistinct = calcDistinct; this.name = facet_sf.getName();
topLevelReader = searcher.getAtomicReader(); topLevelReader = searcher.getAtomicReader();
valueSource = facet_sf.getType().getValueSource(facet_sf, null); valueSource = facet_sf.getType().getValueSource(facet_sf, null);
@ -79,7 +77,7 @@ public class FieldFacetStats {
private StatsValues getStatsValues(String key) throws IOException { private StatsValues getStatsValues(String key) throws IOException {
StatsValues stats = facetStatsValues.get(key); StatsValues stats = facetStatsValues.get(key);
if (stats == null) { if (stats == null) {
stats = StatsValuesFactory.createStatsValues(field_sf, calcDistinct); stats = StatsValuesFactory.createStatsValues(statsField);
facetStatsValues.put(key, stats); facetStatsValues.put(key, stats);
stats.setNextReader(context); stats.setNextReader(context);
} }
@ -142,7 +140,7 @@ public class FieldFacetStats {
String key = (String) pairs.getKey(); String key = (String) pairs.getKey();
StatsValues facetStats = facetStatsValues.get(key); StatsValues facetStats = facetStatsValues.get(key);
if (facetStats == null) { if (facetStats == null) {
facetStats = StatsValuesFactory.createStatsValues(field_sf, calcDistinct); facetStats = StatsValuesFactory.createStatsValues(statsField);
facetStatsValues.put(key, facetStats); facetStatsValues.put(key, facetStats);
} }
Integer count = (Integer) pairs.getValue(); Integer count = (Integer) pairs.getValue();

View File

@ -20,36 +20,19 @@ package org.apache.solr.handler.component;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Collection;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.search.*;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.StatsParams; import org.apache.solr.common.params.StatsParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.DocValuesStats;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
/** /**
* Stats component calculates simple statistics on numeric field values * Stats component calculates simple statistics on numeric field values
@ -198,7 +181,8 @@ class StatsInfo {
for (String paramValue : statsParams) { for (String paramValue : statsParams) {
StatsField current = new StatsField(rb, paramValue); StatsField current = new StatsField(rb, paramValue);
statsFields.add(current); statsFields.add(current);
distribStatsValues.put(current.getOutputKey(), current.buildNewStatsValues()); distribStatsValues.put(current.getOutputKey(),
StatsValuesFactory.createStatsValues(current));
} }
} }
@ -224,200 +208,3 @@ class StatsInfo {
} }
/**
* Models all of the information associated with a single {@link StatsParams#STATS_FIELD}
* instance.
*/
class StatsField {
private final SolrIndexSearcher searcher;
private final ResponseBuilder rb;
private final String originalParam; // for error messages
private final SolrParams localParams;
private final SchemaField sf;
private final String fieldName;
private final String key;
private final boolean calcDistinct;
private final String[] facets;
private final List<String> excludeTagList;
/**
* @param rb the current request/response
* @param statsParam the raw {@link StatsParams#STATS_FIELD} string
*/
public StatsField(ResponseBuilder rb, String statsParam) {
this.rb = rb;
this.searcher = rb.req.getSearcher();
this.originalParam = statsParam;
SolrParams params = rb.req.getParams();
try {
SolrParams localParams = QueryParsing.getLocalParams(statsParam, params);
if (null == localParams) {
localParams = new ModifiableSolrParams();
}
this.localParams = localParams;
} catch (SyntaxError e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse " +
StatsParams.STATS_FIELD + ": " + originalParam + " due to: "
+ e.getMessage(), e);
}
// pull fieldName out of localParams, or default to original param value
this.fieldName = localParams.get(CommonParams.VALUE, statsParam);
// allow explicit set of the key via localparams, default to fieldName
this.key = localParams.get(CommonParams.OUTPUT_KEY, fieldName);
calcDistinct = params.getFieldBool(fieldName, StatsParams.STATS_CALC_DISTINCT, false);
String[] facets = params.getFieldParams(key, StatsParams.STATS_FACET);
this.facets = (null == facets) ? new String[0] : facets;
// figure out if we need a new base DocSet
String excludeStr = localParams.get(CommonParams.EXCLUDE);
this.excludeTagList = (null == excludeStr)
? Collections.<String>emptyList()
: StrUtils.splitSmart(excludeStr,',');
this.sf = searcher.getSchema().getField(fieldName);
}
/**
* The key to be used when refering to this {@link StatsField} instance in the
* response tp clients.
*/
public String getOutputKey() {
return key;
}
/**
* Returns a new, empty, {@link StatsValues} instance that can be used for
* accumulating the appropriate stats from this {@link StatsField}
*/
public StatsValues buildNewStatsValues() {
return StatsValuesFactory.createStatsValues(sf, calcDistinct);
}
/**
* Computes a base {@link DocSet} for the current request to be used
* when computing global stats for the local index.
*
* This is typically the same as the main DocSet for the {@link ResponseBuilder}
* unless {@link CommonParams#TAG tag}ged filter queries have been excluded using
* the {@link CommonParams#EXCLUDE ex} local param
*/
public DocSet computeBaseDocSet() throws IOException {
DocSet docs = rb.getResults().docSet;
Map<?,?> tagMap = (Map<?,?>) rb.req.getContext().get("tags");
if (excludeTagList.isEmpty() || null == tagMap) {
// either the exclude list is empty, or there
// aren't any tagged filters to exclude anyway.
return docs;
}
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<Query,Boolean>();
for (String excludeTag : excludeTagList) {
Object olst = tagMap.get(excludeTag);
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
if (!(olst instanceof Collection)) continue;
for (Object o : (Collection<?>)olst) {
if (!(o instanceof QParser)) continue;
QParser qp = (QParser)o;
try {
excludeSet.put(qp.getQuery(), Boolean.TRUE);
} catch (SyntaxError e) {
// this shouldn't be possible since the request should have already
// failed when attempting to execute the query, but just in case...
throw new SolrException(ErrorCode.BAD_REQUEST, "Excluded query can't be parsed: " +
originalParam + " due to: " + e.getMessage(), e);
}
}
}
if (excludeSet.size() == 0) return docs;
List<Query> qlist = new ArrayList<Query>();
// add the base query
if (!excludeSet.containsKey(rb.getQuery())) {
qlist.add(rb.getQuery());
}
// add the filters
if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) {
if (!excludeSet.containsKey(q)) {
qlist.add(q);
}
}
}
// get the new base docset for this facet
return searcher.getDocSet(qlist);
}
/**
* Computes the {@link StatsValues} for this {@link StatsField} relative to the
* specified {@link DocSet}
* @see #computeBaseDocSet
*/
public StatsValues computeLocalStatsValues(DocSet base) throws IOException {
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
// TODO: should this also be used for single-valued string fields? (should work fine)
return DocValuesStats.getCounts(searcher, fieldName, base, calcDistinct, facets);
} else {
return getFieldCacheStats(base);
}
}
private StatsValues getFieldCacheStats(DocSet base) throws IOException {
IndexSchema schema = searcher.getSchema();
final StatsValues allstats = StatsValuesFactory.createStatsValues(sf, calcDistinct);
List<FieldFacetStats> facetStats = new ArrayList<>();
for( String facetField : facets ) {
SchemaField fsf = schema.getField(facetField);
if ( fsf.multiValued()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Stats can only facet on single-valued fields, not: " + facetField );
}
facetStats.add(new FieldFacetStats(searcher, facetField, sf, fsf, calcDistinct));
}
final Iterator<AtomicReaderContext> ctxIt = searcher.getIndexReader().leaves().iterator();
AtomicReaderContext ctx = null;
for (DocIterator docsIt = base.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
// advance
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
// propagate the context among accumulators.
allstats.setNextReader(ctx);
for (FieldFacetStats f : facetStats) {
f.setNextReader(ctx);
}
}
// accumulate
allstats.accumulate(doc - ctx.docBase);
for (FieldFacetStats f : facetStats) {
f.facet(doc - ctx.docBase);
}
}
for (FieldFacetStats f : facetStats) {
allstats.addFacet(f.name, f.facetStatsValues);
}
return allstats;
}
}

View File

@ -0,0 +1,372 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Collection;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.search.*;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.StatsParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest; // jdocs
import org.apache.solr.request.DocValuesStats;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.FunctionQParserPlugin;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
/**
* Models all of the information associated with a single {@link StatsParams#STATS_FIELD}
* instance.
*
* @see StatsComponent
*/
public class StatsField {
private final SolrIndexSearcher searcher;
private final ResponseBuilder rb;
private final String originalParam; // for error messages
private final SolrParams localParams;
private final ValueSource valueSource; // may be null if simple field stats
private final SchemaField schemaField; // may be null if function/query stats
private final String key;
private final boolean calcDistinct; // TODO: put this inside localParams ? SOLR-6349 ?
private final String[] facets;
private final List<String> excludeTagList;
/**
* @param rb the current request/response
* @param statsParam the raw {@link StatsParams#STATS_FIELD} string
*/
public StatsField(ResponseBuilder rb, String statsParam) {
this.rb = rb;
this.searcher = rb.req.getSearcher();
this.originalParam = statsParam;
SolrParams params = rb.req.getParams();
try {
SolrParams localParams = QueryParsing.getLocalParams(originalParam, params);
if (null == localParams) {
// simplest possible input: bare string (field name)
ModifiableSolrParams customParams = new ModifiableSolrParams();
customParams.add(QueryParsing.V, originalParam);
localParams = customParams;
}
this.localParams = localParams;
String parserName = localParams.get(QueryParsing.TYPE);
SchemaField sf = null;
ValueSource vs = null;
if ( StringUtils.isBlank(parserName) ) {
// basic request for field stats
sf = searcher.getSchema().getField(localParams.get(QueryParsing.V));
} else {
// we have a non trivial request to compute stats over a query (or function)
// NOTE we could use QParser.getParser(...) here, but that would redundently
// reparse everything. ( TODO: refactor a common method in QParser ?)
QParserPlugin qplug = rb.req.getCore().getQueryPlugin(parserName);
QParser qp = qplug.createParser(localParams.get(QueryParsing.V),
localParams, params, rb.req);
// figure out what type of query we are dealing, get the most direct ValueSource
vs = extractValueSource(qp.parse());
// if this ValueSource directly corrisponds to a SchemaField, act as if
// we were asked to compute stats on it directly
// ie: "stats.field={!func key=foo}field(foo)" == "stats.field=foo"
sf = extractSchemaField(vs, searcher.getSchema());
if (null != sf) {
vs = null;
}
}
assert ( (null == vs) ^ (null == sf) ) : "exactly one of vs & sf must be null";
this.schemaField = sf;
this.valueSource = vs;
} catch (SyntaxError e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse " +
StatsParams.STATS_FIELD + ": " + originalParam + " due to: "
+ e.getMessage(), e);
}
// allow explicit setting of the response key via localparams...
this.key = localParams.get(CommonParams.OUTPUT_KEY,
// default to the main param value...
localParams.get(CommonParams.VALUE,
// default to entire original param str.
originalParam));
this.calcDistinct = null == schemaField
? params.getBool(StatsParams.STATS_CALC_DISTINCT, false)
: params.getFieldBool(schemaField.getName(), StatsParams.STATS_CALC_DISTINCT, false);
String[] facets = params.getFieldParams(key, StatsParams.STATS_FACET);
this.facets = (null == facets) ? new String[0] : facets;
// figure out if we need a special base DocSet
String excludeStr = localParams.get(CommonParams.EXCLUDE);
this.excludeTagList = (null == excludeStr)
? Collections.<String>emptyList()
: StrUtils.splitSmart(excludeStr,',');
assert ( (null == this.valueSource) ^ (null == this.schemaField) )
: "exactly one of valueSource & schemaField must be null";
}
/**
* Inspects a {@link Query} to see if it directly maps to a {@link ValueSource},
* and if so returns it -- otherwise wraps it as needed.
*
* @param q Query whose scores we have been asked to compute stats of
* @returns a ValueSource to use for computing the stats
*/
private static ValueSource extractValueSource(Query q) {
return (q instanceof FunctionQuery) ?
// Common case: we're wrapping a func, so we can directly pull out ValueSource
((FunctionQuery) q).getValueSource() :
// asked to compute stats over a query, wrap it up as a ValueSource
new QueryValueSource(q, 0.0F);
}
/**
* Inspects a {@link ValueSource} to see if it directly maps to a {@link SchemaField},
* and if so returns it.
*
* @param vs ValueSource we've been asked to compute stats of
* @param schema The Schema to use
* @returns Corrisponding {@link SchemaField} or null if the ValueSource is more complex
* @see FieldCacheSource
*/
private static SchemaField extractSchemaField(ValueSource vs, IndexSchema schema) {
if (vs instanceof FieldCacheSource) {
String fieldName = ((FieldCacheSource)vs).getField();
return schema.getField(fieldName);
}
return null;
}
/**
* The key to be used when refering to this {@link StatsField} instance in the
* response tp clients.
*/
public String getOutputKey() {
return key;
}
/**
* Computes a base {@link DocSet} for the current request to be used
* when computing global stats for the local index.
*
* This is typically the same as the main DocSet for the {@link ResponseBuilder}
* unless {@link CommonParams#TAG tag}ged filter queries have been excluded using
* the {@link CommonParams#EXCLUDE ex} local param
*/
public DocSet computeBaseDocSet() throws IOException {
DocSet docs = rb.getResults().docSet;
Map<?,?> tagMap = (Map<?,?>) rb.req.getContext().get("tags");
if (excludeTagList.isEmpty() || null == tagMap) {
// either the exclude list is empty, or there
// aren't any tagged filters to exclude anyway.
return docs;
}
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<Query,Boolean>();
for (String excludeTag : excludeTagList) {
Object olst = tagMap.get(excludeTag);
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
if (!(olst instanceof Collection)) continue;
for (Object o : (Collection<?>)olst) {
if (!(o instanceof QParser)) continue;
QParser qp = (QParser)o;
try {
excludeSet.put(qp.getQuery(), Boolean.TRUE);
} catch (SyntaxError e) {
// this shouldn't be possible since the request should have already
// failed when attempting to execute the query, but just in case...
throw new SolrException(ErrorCode.BAD_REQUEST, "Excluded query can't be parsed: " +
originalParam + " due to: " + e.getMessage(), e);
}
}
}
if (excludeSet.size() == 0) return docs;
List<Query> qlist = new ArrayList<Query>();
// add the base query
if (!excludeSet.containsKey(rb.getQuery())) {
qlist.add(rb.getQuery());
}
// add the filters
if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) {
if (!excludeSet.containsKey(q)) {
qlist.add(q);
}
}
}
// get the new base docset for this facet
return searcher.getDocSet(qlist);
}
/**
* Computes the {@link StatsValues} for this {@link StatsField} relative to the
* specified {@link DocSet}
* @see #computeBaseDocSet
*/
public StatsValues computeLocalStatsValues(DocSet base) throws IOException {
if (null != schemaField
&& (schemaField.multiValued() || schemaField.getType().multiValuedFieldCache())) {
// TODO: should this also be used for single-valued string fields? (should work fine)
return DocValuesStats.getCounts(searcher, this, base, facets);
} else {
// either a single valued field we pull from FieldCache, or an explicit
// function ValueSource
return computeLocalValueSourceStats(base);
}
}
private StatsValues computeLocalValueSourceStats(DocSet base) throws IOException {
IndexSchema schema = searcher.getSchema();
final StatsValues allstats = StatsValuesFactory.createStatsValues(this);
List<FieldFacetStats> facetStats = new ArrayList<>();
for( String facetField : facets ) {
SchemaField fsf = schema.getField(facetField);
if ( fsf.multiValued()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Stats can only facet on single-valued fields, not: " + facetField );
}
facetStats.add(new FieldFacetStats(searcher, fsf, this));
}
final Iterator<AtomicReaderContext> ctxIt = searcher.getIndexReader().leaves().iterator();
AtomicReaderContext ctx = null;
for (DocIterator docsIt = base.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
// advance
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
// propagate the context among accumulators.
allstats.setNextReader(ctx);
for (FieldFacetStats f : facetStats) {
f.setNextReader(ctx);
}
}
// accumulate
allstats.accumulate(doc - ctx.docBase);
for (FieldFacetStats f : facetStats) {
f.facet(doc - ctx.docBase);
}
}
for (FieldFacetStats f : facetStats) {
allstats.addFacet(f.name, f.facetStatsValues);
}
return allstats;
}
/**
* The searcher that should be used for processing local stats
* @see SolrQueryRequest#getSearcher
*/
public SolrIndexSearcher getSearcher() {
// see AbstractStatsValues.setNextReader
return searcher;
}
/**
* The {@link SchemaField} whose results these stats are computed over, may be null
* if the stats are computed over the results of a function or query
*
* @see #getValueSource
*/
public SchemaField getSchemaField() {
return schemaField;
}
/**
* The {@link ValueSource} of a function or query whose results these stats are computed
* over, may be null if the stats are directly over a {@link SchemaField}
*
* @see #getValueSource
*/
public ValueSource getValueSource() {
return valueSource;
}
/**
* Wether or not the effective value of the {@link StatsParams#STATS_CALC_DISTINCT} param
* is true or false for this StatsField
*/
public boolean getCalcDistinct() {
return calcDistinct;
}
public String toString() {
return "StatsField<" + originalParam + ">";
}
}

View File

@ -36,22 +36,30 @@ import org.apache.solr.schema.*;
public class StatsValuesFactory { public class StatsValuesFactory {
/** /**
* Creates an instance of StatsValues which supports values from a field of the given FieldType * Creates an instance of StatsValues which supports values from the specified {@link StatsField}
* *
* @param sf SchemaField for the field whose statistics will be created by the resulting StatsValues * @param statsField {@link StatsField} whose statistics will be created by the resulting {@link StatsValues}
* @return Instance of StatsValues that will create statistics from values from a field of the given type * @return Instance of {@link StatsValues} that will create statistics from values from the specified {@link StatsField}
*/ */
public static StatsValues createStatsValues(SchemaField sf, boolean calcDistinct) { public static StatsValues createStatsValues(StatsField statsField) {
// TODO: allow for custom field types
FieldType fieldType = sf.getType(); final SchemaField sf = statsField.getSchemaField();
if (null == sf) {
// function stats
return new NumericStatsValues(statsField);
}
final FieldType fieldType = sf.getType(); // TODO: allow FieldType to provide impl.
if (TrieDateField.class.isInstance(fieldType)) { if (TrieDateField.class.isInstance(fieldType)) {
return new DateStatsValues(sf, calcDistinct); return new DateStatsValues(statsField);
} else if (TrieField.class.isInstance(fieldType)) { } else if (TrieField.class.isInstance(fieldType)) {
return new NumericStatsValues(sf, calcDistinct); return new NumericStatsValues(statsField);
} else if (StrField.class.isInstance(fieldType)) { } else if (StrField.class.isInstance(fieldType)) {
return new StringStatsValues(sf, calcDistinct); return new StringStatsValues(statsField);
} else if (sf.getType().getClass().equals(EnumField.class)) { } else if (sf.getType().getClass().equals(EnumField.class)) {
return new EnumStatsValues(sf, calcDistinct); return new EnumStatsValues(statsField);
} else { } else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported"); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported");
} }
@ -59,34 +67,81 @@ public class StatsValuesFactory {
} }
/** /**
* Abstract implementation of {@link org.apache.solr.handler.component.StatsValues} that provides the default behavior * Abstract implementation of {@link org.apache.solr.handler.component.StatsValues}
* for most StatsValues implementations. * that provides the default behavior for most StatsValues implementations.
* *
* There are very few requirements placed on what statistics concrete implementations should collect, with the only required * There are very few requirements placed on what statistics concrete implementations
* statistics being the minimum and maximum values. * should collect, with the only required statistics being the minimum and maximum values.
*/ */
abstract class AbstractStatsValues<T> implements StatsValues { abstract class AbstractStatsValues<T> implements StatsValues {
private static final String FACETS = "facets"; private static final String FACETS = "facets";
/** Tracks all data about tthe stats we need to collect */
final protected StatsField statsField;
/**
* local copy to save method dispatch in tight loops
* @see StatsField#getCalcDistinct
*/
final protected boolean calcDistinct;
/** may be null if we are collecting stats directly from a function ValueSource */
final protected SchemaField sf; final protected SchemaField sf;
/** may be null if we are collecting stats directly from a function ValueSource */
final protected FieldType ft; final protected FieldType ft;
/**
* Either a function value source to collect from, or the ValueSource associated
* with a single valued field we are collecting from. Will be null until/unless
* {@link #setNextReader} is called at least once
*/
private ValueSource valueSource;
/**
* Context to use when retrieving FunctionValues, will be null until/unless
* {@link #setNextReader} is called at least once
*/
private Map vsContext;
/**
* Values to collect, will be null until/unless {@link #setNextReader} is called
* at least once
*/
protected FunctionValues values;
protected T max; protected T max;
protected T min; protected T min;
protected long missing; protected long missing;
protected long count; protected long count;
protected long countDistinct; protected long countDistinct;
protected Set<T> distinctValues; protected Set<T> distinctValues;
private ValueSource valueSource;
protected FunctionValues values;
protected boolean calcDistinct = false;
// facetField facetValue // facetField facetValue
protected Map<String, Map<String, StatsValues>> facets = new HashMap<>(); protected Map<String, Map<String, StatsValues>> facets = new HashMap<>();
protected AbstractStatsValues(SchemaField sf, boolean calcDistinct) { protected AbstractStatsValues(StatsField statsField) {
this.sf = sf; this.statsField = statsField;
this.ft = sf.getType(); this.calcDistinct = statsField.getCalcDistinct();
this.distinctValues = new TreeSet<>(); this.distinctValues = new TreeSet<>();
this.calcDistinct = calcDistinct;
// alternatively, we could refactor a common base class that doesn't know/care
// about either SchemaField or ValueSource - but then there would be a lot of
// duplicate code between "NumericSchemaFieldStatsValues" and
// "NumericValueSourceStatsValues" which would have diff parent classes
//
// part of the complexity here being that the StatsValues API serves two
// masters: collecting concrete Values from things like DocValuesStats and
// the distributed aggregation logic, but also collecting docIds which it then
// uses to go out and pull concreate values from the ValueSource
// (from a func, or single valued field)
if (null != statsField.getSchemaField()) {
assert null == statsField.getValueSource();
this.sf = statsField.getSchemaField();
this.ft = sf.getType();
} else {
assert null != statsField.getValueSource();
assert null == statsField.getSchemaField();
this.sf = null;
this.ft = null;
}
} }
/** /**
@ -121,7 +176,7 @@ abstract class AbstractStatsValues<T> implements StatsValues {
String val = vals.getName(j); String val = vals.getName(j);
StatsValues vvals = addTo.get(val); StatsValues vvals = addTo.get(val);
if (vvals == null) { if (vvals == null) {
vvals = StatsValuesFactory.createStatsValues(sf, calcDistinct); vvals = StatsValuesFactory.createStatsValues(statsField);
addTo.put(val, vvals); addTo.put(val, vvals);
} }
vvals.accumulate((NamedList) vals.getVal(j)); vvals.accumulate((NamedList) vals.getVal(j));
@ -134,11 +189,14 @@ abstract class AbstractStatsValues<T> implements StatsValues {
*/ */
@Override @Override
public void accumulate(BytesRef value, int count) { public void accumulate(BytesRef value, int count) {
if (null == ft) {
throw new IllegalStateException("Can't collect & convert BytesRefs on stats that do't use a a FieldType: " + statsField);
}
T typedValue = (T)ft.toObject(sf, value); T typedValue = (T)ft.toObject(sf, value);
accumulate(typedValue, count); accumulate(typedValue, count);
} }
public void accumulate(T value, int count) { public void accumulate(T value, int count) {
this.count += count; this.count += count;
if (calcDistinct) { if (calcDistinct) {
distinctValues.add(value); distinctValues.add(value);
@ -203,11 +261,18 @@ abstract class AbstractStatsValues<T> implements StatsValues {
return res; return res;
} }
public void setNextReader(AtomicReaderContext ctx) throws IOException { /**
* {@inheritDoc}
*/
public void setNextReader(AtomicReaderContext ctx) throws IOException {
if (valueSource == null) { if (valueSource == null) {
valueSource = ft.getValueSource(sf, null); // first time we've collected local values, get the right ValueSource
valueSource = (null == ft)
? statsField.getValueSource()
: ft.getValueSource(sf, null);
vsContext = ValueSource.newContext(statsField.getSearcher());
} }
values = valueSource.getValues(Collections.emptyMap(), ctx); values = valueSource.getValues(vsContext, ctx);
} }
/** /**
@ -249,8 +314,8 @@ class NumericStatsValues extends AbstractStatsValues<Number> {
double sum; double sum;
double sumOfSquares; double sumOfSquares;
public NumericStatsValues(SchemaField sf, boolean calcDistinct) { public NumericStatsValues(StatsField statsField) {
super(sf, calcDistinct); super(statsField);
min = Double.POSITIVE_INFINITY; min = Double.POSITIVE_INFINITY;
max = Double.NEGATIVE_INFINITY; max = Double.NEGATIVE_INFINITY;
} }
@ -324,8 +389,8 @@ class NumericStatsValues extends AbstractStatsValues<Number> {
*/ */
class EnumStatsValues extends AbstractStatsValues<EnumFieldValue> { class EnumStatsValues extends AbstractStatsValues<EnumFieldValue> {
public EnumStatsValues(SchemaField sf, boolean calcDistinct) { public EnumStatsValues(StatsField statsField) {
super(sf, calcDistinct); super(statsField);
} }
/** /**
@ -393,8 +458,8 @@ class DateStatsValues extends AbstractStatsValues<Date> {
private long sum = 0; private long sum = 0;
double sumOfSquares = 0; double sumOfSquares = 0;
public DateStatsValues(SchemaField sf, boolean calcDistinct) { public DateStatsValues(StatsField statsField) {
super(sf, calcDistinct); super(statsField);
} }
@Override @Override
@ -479,8 +544,8 @@ class DateStatsValues extends AbstractStatsValues<Date> {
*/ */
class StringStatsValues extends AbstractStatsValues<String> { class StringStatsValues extends AbstractStatsValues<String> {
public StringStatsValues(SchemaField sf, boolean calcDistinct) { public StringStatsValues(StatsField statsField) {
super(sf, calcDistinct); super(statsField);
} }
@Override @Override

View File

@ -35,6 +35,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.handler.component.FieldFacetStats; import org.apache.solr.handler.component.FieldFacetStats;
import org.apache.solr.handler.component.StatsField;
import org.apache.solr.handler.component.StatsValues; import org.apache.solr.handler.component.StatsValues;
import org.apache.solr.handler.component.StatsValuesFactory; import org.apache.solr.handler.component.StatsValuesFactory;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
@ -52,10 +53,17 @@ import org.apache.solr.search.SolrIndexSearcher;
public class DocValuesStats { public class DocValuesStats {
private DocValuesStats() {} private DocValuesStats() {}
public static StatsValues getCounts(SolrIndexSearcher searcher, String fieldName, DocSet docs, boolean calcDistinct, String[] facet) throws IOException { public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {
SchemaField schemaField = searcher.getSchema().getField(fieldName);
FieldType ft = schemaField.getType(); final SchemaField schemaField = statsField.getSchemaField();
StatsValues res = StatsValuesFactory.createStatsValues(schemaField, calcDistinct); final boolean calcDistinct = statsField.getCalcDistinct();
assert null != statsField.getSchemaField()
: "DocValuesStats requires a StatsField using a SchemaField";
final String fieldName = schemaField.getName();
final FieldType ft = schemaField.getType();
final StatsValues res = StatsValuesFactory.createStatsValues(statsField);
//Initialize facetstats, if facets have been passed in //Initialize facetstats, if facets have been passed in
final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length]; final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
@ -69,7 +77,7 @@ public class DocValuesStats {
} }
SchemaField facetSchemaField = searcher.getSchema().getField(facetField); SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
facetStats[upto++] = new FieldFacetStats(searcher, facetField, schemaField, facetSchemaField, calcDistinct); facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
} }
// TODO: remove multiValuedFieldCache(), check dv type / uninversion type? // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache(); final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

View File

@ -40,6 +40,7 @@ import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.FieldFacetStats; import org.apache.solr.handler.component.FieldFacetStats;
import org.apache.solr.handler.component.StatsField;
import org.apache.solr.handler.component.StatsValues; import org.apache.solr.handler.component.StatsValues;
import org.apache.solr.handler.component.StatsValuesFactory; import org.apache.solr.handler.component.StatsValuesFactory;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
@ -467,22 +468,24 @@ public class UnInvertedField extends DocTermOrds {
* *
* @param searcher The Searcher to use to gather the statistics * @param searcher The Searcher to use to gather the statistics
* @param baseDocs The {@link org.apache.solr.search.DocSet} to gather the stats on * @param baseDocs The {@link org.apache.solr.search.DocSet} to gather the stats on
* @param calcDistinct whether distinct values should be collected and counted * @param statsField the {@link StatsField} param corrisponding to a real {@link SchemaField} to compute stats over
* @param facet One or more fields to facet on. * @param facet One or more fields to facet on.
* @return The {@link org.apache.solr.handler.component.StatsValues} collected * @return The {@link org.apache.solr.handler.component.StatsValues} collected
* @throws IOException If there is a low-level I/O error. * @throws IOException If there is a low-level I/O error.
*/ */
public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, boolean calcDistinct, String[] facet) throws IOException { public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, StatsField statsField, String[] facet) throws IOException {
//this function is ripped off nearly wholesale from the getCounts function to use //this function is ripped off nearly wholesale from the getCounts function to use
//for multiValued fields within the StatsComponent. may be useful to find common //for multiValued fields within the StatsComponent. may be useful to find common
//functionality between the two and refactor code somewhat //functionality between the two and refactor code somewhat
use.incrementAndGet(); use.incrementAndGet();
SchemaField sf = searcher.getSchema().getField(field); assert null != statsField.getSchemaField()
// FieldType ft = sf.getType(); : "DocValuesStats requires a StatsField using a SchemaField";
StatsValues allstats = StatsValuesFactory.createStatsValues(sf, calcDistinct); SchemaField sf = statsField.getSchemaField();
// FieldType ft = sf.getType();
StatsValues allstats = StatsValuesFactory.createStatsValues(statsField);
DocSet docs = baseDocs; DocSet docs = baseDocs;
int baseSize = docs.size(); int baseSize = docs.size();
@ -498,7 +501,7 @@ public class UnInvertedField extends DocTermOrds {
SortedDocValues si; SortedDocValues si;
for (String f : facet) { for (String f : facet) {
SchemaField facet_sf = searcher.getSchema().getField(f); SchemaField facet_sf = searcher.getSchema().getField(f);
finfo[i] = new FieldFacetStats(searcher, f, sf, facet_sf, calcDistinct); finfo[i] = new FieldFacetStats(searcher, facet_sf, statsField);
i++; i++;
} }

View File

@ -374,6 +374,12 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
"stats.field", "{!key=special_key}stats_dt", "stats.field", "{!key=special_key}stats_dt",
"stats.field", "{!ex=xxx}stats_dt"); "stats.field", "{!ex=xxx}stats_dt");
query("q","*:*", "sort",i1+" desc", "stats", "true",
// do a really simple query so distributed IDF doesn't cause problems
// when comparing with control collection
"stats.field", "{!lucene key=q_key}" + i1 + "foo_b:true",
"stats.field", "{!func key=f_key}sum(" + tlong +","+i1+")");
query("q","*:*", "sort",i1+" desc", "stats", "true", query("q","*:*", "sort",i1+" desc", "stats", "true",
"stats.field", "stats_dt", "stats.field", "stats_dt",
"stats.field", i1, "stats.field", i1,

View File

@ -27,7 +27,11 @@ import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.TimeZone; import java.util.TimeZone;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
@ -35,6 +39,7 @@ import org.apache.solr.common.params.StatsParams;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.AbstractSolrTestCase; import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -183,15 +188,72 @@ public class StatsComponentTest extends AbstractSolrTestCase {
, kpre + "double[@name='mean'][.='-25.0']" , kpre + "double[@name='mean'][.='-25.0']"
, kpre + "double[@name='stddev'][.='12.909944487358056']" , kpre + "double[@name='stddev'][.='12.909944487358056']"
); );
} }
} }
// we should be able to compute exact same stats for a field even
// when we specify it using the "field()" function, or use other
// identify equivilent functions
for (String param : new String[] {
// bare
"{!key="+key+" ex=key_ex_tag}" + f,
"{!key="+key+" ex=key_ex_tag v="+f+"}",
// field func
"{!lucene key="+key+" ex=key_ex_tag}_val_:\"field("+f+")\"",
"{!func key="+key+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+key+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+key+" ex=key_ex_tag v=field("+f+")}",
"{!type=func key="+key+" ex=key_ex_tag v='field("+f+")'}",
// identity math functions
"{!type=func key="+key+" ex=key_ex_tag v='sum(0,"+f+")'}",
"{!type=func key="+key+" ex=key_ex_tag v='product(1,"+f+")'}",
}) {
assertQ("test statistics over field specified as a function: " + param,
// NOTE: baseParams aren't used, we're looking at the function
req("q", "*:*", "stats", "true", "stats.calcdistinct", "true",
"fq", "{!tag=key_ex_tag}-id:4",
"stats.field", param)
, kpre + "double[@name='min'][.='-40.0']"
, kpre + "double[@name='max'][.='-10.0']"
, kpre + "double[@name='sum'][.='-100.0']"
, kpre + "long[@name='count'][.='4']"
, kpre + "long[@name='missing'][.='0']"
, kpre + "long[@name='countDistinct'][.='4']"
, "count(" + kpre + "arr[@name='distinctValues']/*)=4"
, kpre + "double[@name='sumOfSquares'][.='3000.0']"
, kpre + "double[@name='mean'][.='-25.0']"
, kpre + "double[@name='stddev'][.='12.909944487358056']"
);
}
// now get stats over a non-trivial function on our (single) field
String func = "product(2, " + f + ")";
assertQ("test function statistics & key override",
// NOTE: baseParams aren't used, we're looking at the function
req("q", "*:*", "stats", "true", "stats.calcdistinct", "true",
"fq", "{!tag=key_ex_tag}-id:4",
"stats.field", "{!func key="+key+" ex=key_ex_tag}"+func)
, kpre + "double[@name='min'][.='-80.0']"
, kpre + "double[@name='max'][.='-20.0']"
, kpre + "double[@name='sum'][.='-200.0']"
, kpre + "long[@name='count'][.='4']"
, kpre + "long[@name='missing'][.='0']"
, kpre + "long[@name='countDistinct'][.='4']"
, "count(" + kpre + "arr[@name='distinctValues']/*)=4"
, kpre + "double[@name='sumOfSquares'][.='12000.0']"
, kpre + "double[@name='mean'][.='-50.0']"
, kpre + "double[@name='stddev'][.='25.81988897471611']"
);
} }
public void doTestMVFieldStatisticsResult(String f) throws Exception { public void doTestMVFieldStatisticsResult(String f) throws Exception {
assertU(adoc("id", "1", f, "-10", f, "-100", "active_s", "true")); assertU(adoc("id", "1", f, "-10", f, "-100", "active_s", "true"));
@ -288,6 +350,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
, "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']" , "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']"
); );
} }
} }
public void testFieldStatisticsResultsStringField() throws Exception { public void testFieldStatisticsResultsStringField() throws Exception {
@ -313,6 +376,20 @@ public class StatsComponentTest extends AbstractSolrTestCase {
"//long[@name='missing'][.='1']", "//long[@name='missing'][.='1']",
"//long[@name='countDistinct'][.='3']", "//long[@name='countDistinct'][.='3']",
"count(//arr[@name='distinctValues']/str)=3"); "count(//arr[@name='distinctValues']/str)=3");
// stats over a string function
assertQ("strdist func stats",
req("q", "*:*",
"fq", "-id:4", // SOLR-6540
"stats","true",
"stats.field","{!func}strdist('string22',active_s,edit)")
, "//double[@name='min'][.='0.75']"
, "//double[@name='max'][.='0.875']"
, "//double[@name='sum'][.='2.375']"
, "//long[@name='count'][.='3']"
,"//long[@name='missing'][.='0']" // SOLR-6540 ==> '1'
);
} }
public void testFieldStatisticsResultsDateField() throws Exception { public void testFieldStatisticsResultsDateField() throws Exception {
@ -358,6 +435,10 @@ public class StatsComponentTest extends AbstractSolrTestCase {
assertU(adoc("id", "4", f, "-40")); assertU(adoc("id", "4", f, "-40"));
assertU(commit()); assertU(commit());
final String fpre = XPRE + "lst[@name='stats_fields']/lst[@name='"+f+"']/";
final String key = "key_key";
final String kpre = XPRE + "lst[@name='stats_fields']/lst[@name='"+key+"']/";
// status should be the same regardless of baseParams // status should be the same regardless of baseParams
for (SolrParams baseParams : baseParamsSet) { for (SolrParams baseParams : baseParamsSet) {
@ -376,6 +457,46 @@ public class StatsComponentTest extends AbstractSolrTestCase {
, "//double[@name='stddev'][.='15.275252316519467']" , "//double[@name='stddev'][.='15.275252316519467']"
); );
} }
// we should be able to compute exact same stats for a field even
// when we specify it using the "field()" function, or use other
// identify equivilent functions
for (String param : new String[] {
// bare
"{!key="+key+" ex=key_ex_tag}" + f,
"{!key="+key+" ex=key_ex_tag v="+f+"}",
// field func
"{!lucene key="+key+" ex=key_ex_tag}_val_:\"field("+f+")\"",
"{!func key="+key+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+key+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+key+" ex=key_ex_tag v=field("+f+")}",
"{!type=func key="+key+" ex=key_ex_tag v='field("+f+")'}",
// identity math functions don't work as expected due to LUCENE-5961
// "{!type=func key="+key+" ex=key_ex_tag v='sum(0,"+f+")'}",
// "{!type=func key="+key+" ex=key_ex_tag v='product(1,"+f+")'}",
}) {
assertQ("test statistics over field specified as a function: " + param,
// NOTE: baseParams aren't used, we're looking at the function
req("q", "*:*", "stats", "true", "stats.calcdistinct", "true",
"fq", "{!tag=key_ex_tag}-id:4",
"stats.field", param)
, kpre + "double[@name='min'][.='-40.0']"
, kpre + "double[@name='max'][.='-10.0']"
, kpre + "double[@name='sum'][.='-70.0']"
, kpre + "long[@name='count'][.='3']"
, kpre + "long[@name='missing'][.='1']"
, kpre + "long[@name='countDistinct'][.='3']"
, "count(" + kpre + "arr[@name='distinctValues']/*)=3"
, kpre + "double[@name='sumOfSquares'][.='2100.0']"
, kpre + "double[@name='mean'][.='-23.333333333333332']"
, kpre + "double[@name='stddev'][.='15.275252316519467']"
);
}
} }
public void doTestFacetStatisticsResult(String f, SolrParams[] baseParamsSet) throws Exception { public void doTestFacetStatisticsResult(String f, SolrParams[] baseParamsSet) throws Exception {
@ -422,6 +543,54 @@ public class StatsComponentTest extends AbstractSolrTestCase {
, pre+"/lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']" , pre+"/lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
); );
} }
// we should be able to compute exact same stats & stats.facet for a field even
// when we specify it using the "field()" function, or use other
// identify equivilent functions
for (String param : new String[] {
// bare
"{!key="+f+" ex=key_ex_tag}" + f,
"{!key="+f+" ex=key_ex_tag v="+f+"}",
// field func
"{!lucene key="+f+" ex=key_ex_tag}_val_:\"field("+f+")\"",
"{!func key="+f+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+f+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+f+" ex=key_ex_tag v=field("+f+")}",
"{!type=func key="+f+" ex=key_ex_tag v='field("+f+")'}",
// identity math functions don't work as expected due to LUCENE-5961
// "{!type=func key="+f+" ex=key_ex_tag v='sum(0,"+f+")'}",
// "{!type=func key="+f+" ex=key_ex_tag v='product(1,"+f+")'}",
}) {
assertQ("test statis & stats.facet over field specified as a function: " + param,
req("q", "*:*", "stats", "true", "stats.calcdistinct", "true",
"fq", "{!tag=key_ex_tag}-id:4",
"stats.field", param,
"stats.facet", "active_s", "stats.facet", "other_s")
, "*[count("+pre+")=1]"
, pre+"/lst[@name='true']/double[@name='min'][.='10.0']"
, pre+"/lst[@name='true']/double[@name='max'][.='20.0']"
, pre+"/lst[@name='true']/double[@name='sum'][.='30.0']"
, pre+"/lst[@name='true']/long[@name='count'][.='2']"
, pre+"/lst[@name='true']/long[@name='missing'][.='0']"
, pre + "/lst[@name='true']/long[@name='countDistinct'][.='2']"
, "count(" + pre + "/lst[@name='true']/arr[@name='distinctValues']/*)=2"
, pre+"/lst[@name='true']/double[@name='sumOfSquares'][.='500.0']"
, pre+"/lst[@name='true']/double[@name='mean'][.='15.0']"
, pre+"/lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']"
//
, pre+"/lst[@name='false']/double[@name='min'][.='30.0']"
, pre+"/lst[@name='false']/double[@name='max'][.='40.0']"
, pre+"/lst[@name='false']/double[@name='sum'][.='70.0']"
, pre+"/lst[@name='false']/long[@name='count'][.='2']"
, pre+"/lst[@name='false']/long[@name='missing'][.='0']"
, pre + "/lst[@name='true']/long[@name='countDistinct'][.='2']"
, "count(" + pre + "/lst[@name='true']/arr[@name='distinctValues']/*)=2"
, pre+"/lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']"
, pre+"/lst[@name='false']/double[@name='mean'][.='35.0']"
, pre+"/lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
);
}
} }
public void doTestFacetStatisticsMissingResult(String f, SolrParams[] baseParamsSet) throws Exception { public void doTestFacetStatisticsMissingResult(String f, SolrParams[] baseParamsSet) throws Exception {
@ -463,6 +632,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
, "//lst[@name='false']/double[@name='stddev'][.='0.0']" , "//lst[@name='false']/double[@name='stddev'][.='0.0']"
); );
} }
} }
public void testFieldStatisticsResultsNumericFieldAlwaysMissing() throws Exception { public void testFieldStatisticsResultsNumericFieldAlwaysMissing() throws Exception {
@ -704,6 +874,98 @@ public class StatsComponentTest extends AbstractSolrTestCase {
} }
public void testMiscQueryStats() throws Exception {
final String kpre = XPRE + "lst[@name='stats_fields']/lst[@name='k']/";
assertU(adoc("id", "1", "a_f", "2.3", "b_f", "9.7", "foo_t", "how now brown cow"));
assertU(adoc("id", "2", "a_f", "4.5", "b_f", "8.6", "foo_t", "cow cow cow cow"));
assertU(adoc("id", "3", "a_f", "5.6", "b_f", "7.5", "foo_t", "red fox"));
assertU(adoc("id", "4", "a_f", "6.7", "b_f", "6.3", "foo_t", "red cow"));
assertU(commit());
assertQ("functions over multiple fields",
req("q","foo_t:cow", "stats", "true",
"stats.field", "{!func key=k}product(a_f,b_f)")
, kpre + "double[@name='min'][.='22.309999465942383']"
, kpre + "double[@name='max'][.='42.209999084472656']"
, kpre + "double[@name='sum'][.='103.21999931335449']"
, kpre + "long[@name='count'][.='3']"
, kpre + "long[@name='missing'][.='0']"
, kpre + "double[@name='sumOfSquares'][.='3777.110157933046']"
, kpre + "double[@name='mean'][.='34.40666643778483']"
, kpre + "double[@name='stddev'][.='10.622007151430441']"
);
assertQ("functions over a query",
req("q","*:*", "stats", "true",
"stats.field", "{!lucene key=k}foo_t:cow")
// scores are: 1.0, 0.625, 0.5, & "missing"
, kpre + "double[@name='min'][.='0.5']"
, kpre + "double[@name='max'][.='1.0']"
, kpre + "double[@name='sum'][.='2.125']"
, kpre + "long[@name='count'][.='3']"
, kpre + "long[@name='missing'][.='1']"
, kpre + "double[@name='sumOfSquares'][.='1.640625']"
, kpre + "double[@name='mean'][.='0.7083333333333334']"
, kpre + "double[@name='stddev'][.='0.2602082499332666']"
);
}
/**
* Whitebox test of {@link StatsField} parsing to ensure expected equivilence
* operations hold up
*/
public void testStatsFieldWhitebox() throws Exception {
StatsComponent component = new StatsComponent();
List<SearchComponent> components = new ArrayList<>(1);
components.add(component);
SolrParams common = params("stats", "true", "q", "*:*", "nested","foo_t:cow");
// all of these should produce the same SchemaField based StatsField
for (String param : new String[] {
"foo_i", "{!func}field(\"foo_i\")", "{!lucene}_val_:\"field(foo_i)\""
}) {
SolrQueryRequest req = req(common);
try {
ResponseBuilder rb = new ResponseBuilder(req, new SolrQueryResponse(), components);
StatsField sf = new StatsField(rb, param);
assertNull("value source of: " + param, sf.getValueSource());
assertNotNull("schema field of: " + param, sf.getSchemaField());
assertEquals("field name of: " + param,
"foo_i", sf.getSchemaField().getName());
} finally {
req.close();
}
}
// all of these should produce the same QueryValueSource based StatsField
for (String param : new String[] {
"{!lucene}foo_t:cow", "{!func}query($nested)", "{!field f=foo_t}cow",
}) {
SolrQueryRequest req = req(common);
try {
ResponseBuilder rb = new ResponseBuilder(req, new SolrQueryResponse(), components);
StatsField sf = new StatsField(rb, param);
assertNull("schema field of: " + param, sf.getSchemaField());
assertNotNull("value source of: " + param, sf.getValueSource());
assertTrue(sf.getValueSource().getClass() + " is vs type of: " + param,
sf.getValueSource() instanceof QueryValueSource);
QueryValueSource qvs = (QueryValueSource) sf.getValueSource();
assertEquals("query of :" + param,
new TermQuery(new Term("foo_t","cow")),
qvs.getQuery());
} finally {
req.close();
}
}
}
public void testFieldStatisticsDocValuesAndMultiValuedDouble() throws Exception { public void testFieldStatisticsDocValuesAndMultiValuedDouble() throws Exception {
SolrCore core = h.getCore(); SolrCore core = h.getCore();