SOLR-6351: Stats can now be nested under pivot values by adding a 'stats' local param

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1636772 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2014-11-04 23:09:41 +00:00
parent 2f1493339e
commit 7e07a9c578
20 changed files with 1978 additions and 118 deletions

View File

@ -201,6 +201,10 @@ New Features
* SOLR-6670: change BALANCESLICEUNIQUE to BALANCESHARDUNIQUE. Also, the parameter * SOLR-6670: change BALANCESLICEUNIQUE to BALANCESHARDUNIQUE. Also, the parameter
for ADDREPLICAPROP that used to be sliceUnique is now shardUnique. (Erick Erickson) for ADDREPLICAPROP that used to be sliceUnique is now shardUnique. (Erick Erickson)
* SOLR-6351: Stats can now be nested under pivot values by adding a 'stats' local param to
facet.pivot which refers to a 'tag' local param in one or more stats.field params.
(hossman, Vitaliy Zhovtyuk)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -18,15 +18,21 @@
package org.apache.solr.handler.component; package org.apache.solr.handler.component;
import org.apache.solr.util.PivotListEntry; import org.apache.solr.util.PivotListEntry;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.StrUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Collections; import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
public class PivotFacetHelper { public class PivotFacetHelper {
@ -91,31 +97,63 @@ public class PivotFacetHelper {
/** @see PivotListEntry#VALUE */ /** @see PivotListEntry#VALUE */
public static Comparable getValue(NamedList<Object> pivotList) { public static Comparable getValue(NamedList<Object> pivotList) {
return (Comparable) PivotFacetHelper.retrieve(PivotListEntry.VALUE, return (Comparable) PivotListEntry.VALUE.extract(pivotList);
pivotList);
} }
/** @see PivotListEntry#FIELD */ /** @see PivotListEntry#FIELD */
public static String getField(NamedList<Object> pivotList) { public static String getField(NamedList<Object> pivotList) {
return (String) PivotFacetHelper.retrieve(PivotListEntry.FIELD, pivotList); return (String) PivotListEntry.FIELD.extract(pivotList);
} }
/** @see PivotListEntry#COUNT */ /** @see PivotListEntry#COUNT */
public static Integer getCount(NamedList<Object> pivotList) { public static Integer getCount(NamedList<Object> pivotList) {
return (Integer) PivotFacetHelper.retrieve(PivotListEntry.COUNT, pivotList); return (Integer) PivotListEntry.COUNT.extract(pivotList);
} }
/** @see PivotListEntry#PIVOT */ /** @see PivotListEntry#PIVOT */
public static List<NamedList<Object>> getPivots(NamedList<Object> pivotList) { public static List<NamedList<Object>> getPivots(NamedList<Object> pivotList) {
int pivotIdx = pivotList.indexOf(PivotListEntry.PIVOT.getName(), 0); return (List<NamedList<Object>>) PivotListEntry.PIVOT.extract(pivotList);
if (pivotIdx > -1) {
return (List<NamedList<Object>>) pivotList.getVal(pivotIdx);
}
return null;
} }
private static Object retrieve(PivotListEntry entryToGet, NamedList<Object> pivotList) { /** @see PivotListEntry#STATS */
return pivotList.get(entryToGet.getName(), entryToGet.getIndex()); public static NamedList<NamedList<NamedList<?>>> getStats(NamedList<Object> pivotList) {
return (NamedList<NamedList<NamedList<?>>>) PivotListEntry.STATS.extract(pivotList);
}
/**
* Given a mapping of keys to {@link StatsValues} representing the currently
* known "merged" stats (which may be null if none exist yet), and a
* {@link NamedList} containing the "stats" response block returned by an individual
* shard, this method accumulates the stasts for each {@link StatsField} found in
* the shard response with the existing mergeStats
*
* @return the original <code>merged</code> Map after modifying, or a new Map if the <code>merged</code> param was originally null.
* @see StatsInfo#getStatsField
* @see StatsValuesFactory#createStatsValues
* @see StatsValues#accumulate(NamedList)
*/
public static Map<String,StatsValues> mergeStats
(Map<String,StatsValues> merged,
NamedList<NamedList<NamedList<?>>> remoteWrapper,
StatsInfo statsInfo) {
if (null == merged) merged = new LinkedHashMap<String,StatsValues>();
NamedList<NamedList<?>> remoteStats = StatsComponent.unwrapStats(remoteWrapper);
for (Entry<String,NamedList<?>> entry : remoteStats) {
StatsValues receivingStatsValues = merged.get(entry.getKey());
if (receivingStatsValues == null) {
StatsField recievingStatsField = statsInfo.getStatsField(entry.getKey());
if (null == recievingStatsField) {
throw new SolrException(ErrorCode.SERVER_ERROR , "No stats.field found corrisponding to pivot stats recieved from shard: "+entry.getKey());
}
receivingStatsValues = StatsValuesFactory.createStatsValues(recievingStatsField);
merged.put(entry.getKey(), receivingStatsValues);
}
receivingStatsValues.accumulate(entry.getValue());
}
return merged;
} }
} }

View File

@ -23,20 +23,26 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.util.PivotListEntry;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.StatsParams;
import org.apache.solr.request.SimpleFacets; import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque; import java.util.Deque;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -63,9 +69,15 @@ public class PivotFacetProcessor extends SimpleFacets
if (!rb.doFacets || pivots == null) if (!rb.doFacets || pivots == null)
return null; return null;
// rb._statsInfo may be null if stats=false, ie: refine requests
// if that's the case, but we need to refine w/stats, then we'll lazy init our
// own instance of StatsInfo
StatsInfo statsInfo = rb._statsInfo;
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>(); SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
for (String pivotList : pivots) { for (String pivotList : pivots) {
try { try {
// NOTE: this sets localParams (SimpleFacets is stateful)
this.parseParams(FacetParams.FACET_PIVOT, pivotList); this.parseParams(FacetParams.FACET_PIVOT, pivotList);
} catch (SyntaxError e) { } catch (SyntaxError e) {
throw new SolrException(ErrorCode.BAD_REQUEST, e); throw new SolrException(ErrorCode.BAD_REQUEST, e);
@ -84,15 +96,37 @@ public class PivotFacetProcessor extends SimpleFacets
} }
} }
//REFINEMENT // start by assuing no local params...
String fieldValueKey = localParams == null ? null : localParams.get(PivotFacet.REFINE_PARAM);
if(fieldValueKey != null ){ String refineKey = null; // no local => no refinement
String[] refinementValuesByField = params.getParams(PivotFacet.REFINE_PARAM+fieldValueKey); List<StatsField> statsFields = Collections.emptyList(); // no local => no stats
if (null != localParams) {
// we might be refining..
refineKey = localParams.get(PivotFacet.REFINE_PARAM);
String statsLocalParam = localParams.get(StatsParams.STATS);
if (null != refineKey
&& null != statsLocalParam
&& null == statsInfo) {
// we are refining and need to compute stats,
// but stats component hasn't inited StatsInfo (because we
// don't need/want top level stats when refining) so we lazy init
// our own copy of StatsInfo
statsInfo = new StatsInfo(rb);
}
statsFields = getTaggedStatsFields(statsInfo, statsLocalParam);
}
if (null != refineKey) {
String[] refinementValuesByField
= params.getParams(PivotFacet.REFINE_PARAM + refineKey);
for(String refinements : refinementValuesByField){ for(String refinements : refinementValuesByField){
pivotResponse.addAll(processSingle(pivotFields, refinements)); pivotResponse.addAll(processSingle(pivotFields, refinements, statsFields));
} }
} else{ } else{
pivotResponse.addAll(processSingle(pivotFields, null)); pivotResponse.addAll(processSingle(pivotFields, null, statsFields));
} }
} }
return pivotResponse; return pivotResponse;
@ -102,9 +136,13 @@ public class PivotFacetProcessor extends SimpleFacets
* Process a single branch of refinement values for a specific pivot * Process a single branch of refinement values for a specific pivot
* @param pivotFields the ordered list of fields in this pivot * @param pivotFields the ordered list of fields in this pivot
* @param refinements the comma seperate list of refinement values corrisponding to each field in the pivot, or null if there are no refinements * @param refinements the comma seperate list of refinement values corrisponding to each field in the pivot, or null if there are no refinements
* @param statsFields List of {@link StatsField} instances to compute for each pivot value
*/ */
private SimpleOrderedMap<List<NamedList<Object>>> processSingle(List<String> pivotFields, private SimpleOrderedMap<List<NamedList<Object>>> processSingle
String refinements) throws IOException { (List<String> pivotFields,
String refinements,
List<StatsField> statsFields) throws IOException {
SolrIndexSearcher searcher = rb.req.getSearcher(); SolrIndexSearcher searcher = rb.req.getSearcher();
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>(); SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
@ -141,18 +179,54 @@ public class PivotFacetProcessor extends SimpleFacets
if(pivotFields.size() > 1) { if(pivotFields.size() > 1) {
String subField = pivotFields.get(1); String subField = pivotFields.get(1);
pivotResponse.add(key, pivotResponse.add(key,
doPivots(facetCounts, field, subField, fnames, vnames, this.docs)); doPivots(facetCounts, field, subField, fnames, vnames, this.docs, statsFields));
} else { } else {
pivotResponse.add(key, doPivots(facetCounts, field, null, fnames, vnames, this.docs)); pivotResponse.add(key, doPivots(facetCounts, field, null, fnames, vnames, this.docs, statsFields));
} }
return pivotResponse; return pivotResponse;
} }
/**
* returns the {@link StatsField} instances that should be computed for a pivot
* based on the 'stats' local params used.
*
* @return A list of StatsFields to comput for this pivot, or the empty list if none
*/
private static List<StatsField> getTaggedStatsFields(StatsInfo statsInfo,
String statsLocalParam) {
if (null == statsLocalParam || null == statsInfo) {
return Collections.emptyList();
}
List<StatsField> fields = new ArrayList<>(7);
List<String> statsAr = StrUtils.splitSmart(statsLocalParam, ',');
// TODO: for now, we only support a single tag name - we reserve using
// ',' as a possible delimeter for logic related to only computing stats
// at certain levels -- see SOLR-6663
if (1 < statsAr.size()) {
String msg = StatsParams.STATS + " local param of " + FacetParams.FACET_PIVOT +
"may not include tags separated by a comma - please use a common tag on all " +
StatsParams.STATS_FIELD + " params you wish to compute under this pivot";
throw new SolrException(ErrorCode.BAD_REQUEST, msg);
}
for(String stat : statsAr) {
fields.addAll(statsInfo.getStatsFieldsByTag(stat));
}
return fields;
}
/** /**
* Recursive function to compute all the pivot counts for the values under teh specified field * Recursive function to compute all the pivot counts for the values under teh specified field
*/ */
protected List<NamedList<Object>> doPivots(NamedList<Integer> superFacets, protected List<NamedList<Object>> doPivots(NamedList<Integer> superFacets,
String field, String subField, Deque<String> fnames,Deque<String> vnames,DocSet docs) throws IOException { String field, String subField,
Deque<String> fnames, Deque<String> vnames,
DocSet docs, List<StatsField> statsFields)
throws IOException {
boolean isShard = rb.req.getParams().getBool(ShardParams.IS_SHARD, false);
SolrIndexSearcher searcher = rb.req.getSearcher(); SolrIndexSearcher searcher = rb.req.getSearcher();
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below // TODO: optimize to avoid converting to an external string and then having to convert back to internal below
@ -169,6 +243,7 @@ public class PivotFacetProcessor extends SimpleFacets
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though // Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
if (kv.getValue() >= getMinCountForField(field)) { if (kv.getValue() >= getMinCountForField(field)) {
final String fieldValue = kv.getKey(); final String fieldValue = kv.getKey();
final int pivotCount = kv.getValue();
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<>(); SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<>();
pivot.add( "field", field ); pivot.add( "field", field );
@ -178,7 +253,7 @@ public class PivotFacetProcessor extends SimpleFacets
ftype.readableToIndexed(fieldValue, termval); ftype.readableToIndexed(fieldValue, termval);
pivot.add( "value", ftype.toObject(sfield, termval.get()) ); pivot.add( "value", ftype.toObject(sfield, termval.get()) );
} }
pivot.add( "count", kv.getValue() ); pivot.add( "count", pivotCount );
DocSet subset = getSubset(docs, sfield, fieldValue); DocSet subset = getSubset(docs, sfield, fieldValue);
@ -195,9 +270,17 @@ public class PivotFacetProcessor extends SimpleFacets
} }
if (facetCounts.size() >= 1) { if (facetCounts.size() >= 1) {
pivot.add( "pivot", doPivots( facetCounts, subField, nextField, fnames, vnames, subset) ); pivot.add( "pivot", doPivots( facetCounts, subField, nextField, fnames, vnames, subset, statsFields ) );
} }
} }
if ((isShard || 0 < pivotCount) && ! statsFields.isEmpty()) {
Map<String, StatsValues> stv = new LinkedHashMap<>();
for (StatsField statsField : statsFields) {
stv.put(statsField.getOutputKey(), statsField.computeLocalStatsValues(subset));
}
// for pivots, we *always* include requested stats - even if 'empty'
pivot.add("stats", StatsComponent.convertToResponse(true, stv));
}
values.add( pivot ); values.add( pivot );
} }

View File

@ -21,11 +21,13 @@ import java.util.BitSet;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.TrieDateField; import org.apache.solr.schema.TrieDateField;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.util.PivotListEntry; import org.apache.solr.util.PivotListEntry;
/** /**
@ -45,6 +47,7 @@ public class PivotFacetValue {
// child can't be final, circular ref on construction // child can't be final, circular ref on construction
private PivotFacetField childPivot = null; private PivotFacetField childPivot = null;
private int count; // mutable private int count; // mutable
private Map<String, StatsValues> statsValues = null;
private PivotFacetValue(PivotFacetField parent, Comparable val) { private PivotFacetValue(PivotFacetField parent, Comparable val) {
this.parentPivot = parent; this.parentPivot = parent;
@ -114,6 +117,7 @@ public class PivotFacetValue {
Comparable pivotVal = null; Comparable pivotVal = null;
int pivotCount = 0; int pivotCount = 0;
List<NamedList<Object>> childPivotData = null; List<NamedList<Object>> childPivotData = null;
NamedList<NamedList<NamedList<?>>> statsValues = null;
for (int i = 0; i < pivotData.size(); i++) { for (int i = 0; i < pivotData.size(); i++) {
String key = pivotData.getName(i); String key = pivotData.getName(i);
@ -135,6 +139,9 @@ public class PivotFacetValue {
case PIVOT: case PIVOT:
childPivotData = (List<NamedList<Object>>)value; childPivotData = (List<NamedList<Object>>)value;
break; break;
case STATS:
statsValues = (NamedList<NamedList<NamedList<?>>>) value;
break;
default: default:
throw new RuntimeException("PivotListEntry contains unaccounted for item: " + entry); throw new RuntimeException("PivotListEntry contains unaccounted for item: " + entry);
} }
@ -143,6 +150,9 @@ public class PivotFacetValue {
PivotFacetValue newPivotFacet = new PivotFacetValue(parentField, pivotVal); PivotFacetValue newPivotFacet = new PivotFacetValue(parentField, pivotVal);
newPivotFacet.count = pivotCount; newPivotFacet.count = pivotCount;
newPivotFacet.sourceShards.set(shardNumber); newPivotFacet.sourceShards.set(shardNumber);
if(statsValues != null) {
newPivotFacet.statsValues = PivotFacetHelper.mergeStats(null, statsValues, rb._statsInfo);
}
newPivotFacet.childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, newPivotFacet, childPivotData); newPivotFacet.childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, newPivotFacet, childPivotData);
@ -171,6 +181,11 @@ public class PivotFacetValue {
if (childPivot != null && childPivot.convertToListOfNamedLists() != null) { if (childPivot != null && childPivot.convertToListOfNamedLists() != null) {
newList.add(PivotListEntry.PIVOT.getName(), childPivot.convertToListOfNamedLists()); newList.add(PivotListEntry.PIVOT.getName(), childPivot.convertToListOfNamedLists());
} }
if (null != statsValues) {
newList.add(PivotListEntry.STATS.getName(),
// for pivots, we *always* include requested stats - even if 'empty'
StatsComponent.convertToResponse(true, statsValues));
}
return newList; return newList;
} }
@ -187,6 +202,10 @@ public class PivotFacetValue {
if (!shardHasContributed(shardNumber)) { if (!shardHasContributed(shardNumber)) {
sourceShards.set(shardNumber); sourceShards.set(shardNumber);
count += PivotFacetHelper.getCount(value); count += PivotFacetHelper.getCount(value);
NamedList<NamedList<NamedList<?>>> stats = PivotFacetHelper.getStats(value);
if (stats != null) {
statsValues = PivotFacetHelper.mergeStats(statsValues, stats, rb._statsInfo);
}
} }
List<NamedList<Object>> shardChildPivots = PivotFacetHelper.getPivots(value); List<NamedList<Object>> shardChildPivots = PivotFacetHelper.getPivots(value);

View File

@ -25,8 +25,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.StatsParams; import org.apache.solr.common.params.StatsParams;
@ -56,22 +54,14 @@ public class StatsComponent extends SearchComponent {
if (!rb.doStats) return; if (!rb.doStats) return;
boolean isShard = rb.req.getParams().getBool(ShardParams.IS_SHARD, false); boolean isShard = rb.req.getParams().getBool(ShardParams.IS_SHARD, false);
NamedList<Object> out = new SimpleOrderedMap<>(); Map<String, StatsValues> statsValues = new LinkedHashMap<>();
NamedList<Object> stats_fields = new SimpleOrderedMap<>();
for (StatsField statsField : rb._statsInfo.getStatsFields()) { for (StatsField statsField : rb._statsInfo.getStatsFields()) {
DocSet docs = statsField.computeBaseDocSet(); DocSet docs = statsField.computeBaseDocSet();
NamedList<?> stv = statsField.computeLocalStatsValues(docs).getStatsValues(); statsValues.put(statsField.getOutputKey(), statsField.computeLocalStatsValues(docs));
if (isShard == true || (Long) stv.get("count") > 0) {
stats_fields.add(statsField.getOutputKey(), stv);
} else {
stats_fields.add(statsField.getOutputKey(), null);
}
} }
out.add("stats_fields", stats_fields); rb.rsp.add( "stats", convertToResponse(isShard, statsValues) );
rb.rsp.add( "stats", out );
} }
@Override @Override
@ -86,6 +76,8 @@ public class StatsComponent extends SearchComponent {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) { if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
sreq.purpose |= ShardRequest.PURPOSE_GET_STATS; sreq.purpose |= ShardRequest.PURPOSE_GET_STATS;
} else { } else {
// turn off stats on other requests // turn off stats on other requests
sreq.params.set(StatsParams.STATS, "false"); sreq.params.set(StatsParams.STATS, "false");
// we could optionally remove stats params // we could optionally remove stats params
@ -101,7 +93,8 @@ public class StatsComponent extends SearchComponent {
for (ShardResponse srsp : sreq.responses) { for (ShardResponse srsp : sreq.responses) {
NamedList stats = null; NamedList stats = null;
try { try {
stats = (NamedList) srsp.getSolrResponse().getResponse().get("stats"); stats = (NamedList<NamedList<NamedList<?>>>)
srsp.getSolrResponse().getResponse().get("stats");
} catch (Exception e) { } catch (Exception e) {
if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) { if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) {
continue; // looks like a shard did not return anything continue; // looks like a shard did not return anything
@ -110,7 +103,7 @@ public class StatsComponent extends SearchComponent {
"Unable to read stats info for shard: " + srsp.getShard(), e); "Unable to read stats info for shard: " + srsp.getShard(), e);
} }
NamedList stats_fields = (NamedList) stats.get("stats_fields"); NamedList stats_fields = unwrapStats(stats);
if (stats_fields != null) { if (stats_fields != null) {
for (int i = 0; i < stats_fields.size(); i++) { for (int i = 0; i < stats_fields.size(); i++) {
String key = stats_fields.getName(i); String key = stats_fields.getName(i);
@ -129,26 +122,44 @@ public class StatsComponent extends SearchComponent {
// so that "result" is already stored in the response (for aesthetics) // so that "result" is already stored in the response (for aesthetics)
Map<String, StatsValues> allStatsValues = rb._statsInfo.getAggregateStatsValues(); Map<String, StatsValues> allStatsValues = rb._statsInfo.getAggregateStatsValues();
rb.rsp.add("stats", convertToResponse(false, allStatsValues));
NamedList<NamedList<Object>> stats = new SimpleOrderedMap<>(); rb._statsInfo = null; // free some objects
NamedList<Object> stats_fields = new SimpleOrderedMap<>(); }
/**
* Helper to pull the "stats_fields" out of the extra "stats" wrapper
*/
public static NamedList<NamedList<?>> unwrapStats(NamedList<NamedList<NamedList<?>>> stats) {
if (null == stats) return null;
return stats.get("stats_fields");
}
/**
* Given a map of {@link StatsValues} using the appropriate response key,
* builds up the neccessary "stats" data structure for including in the response --
* including the esoteric "stats_fields" wrapper.
*/
public static NamedList<NamedList<NamedList<?>>> convertToResponse
(boolean force, Map<String,StatsValues> statsValues) {
NamedList<NamedList<NamedList<?>>> stats = new SimpleOrderedMap<>();
NamedList<NamedList<?>> stats_fields = new SimpleOrderedMap<>();
stats.add("stats_fields", stats_fields); stats.add("stats_fields", stats_fields);
for (Map.Entry<String,StatsValues> entry : allStatsValues.entrySet()) { for (Map.Entry<String,StatsValues> entry : statsValues.entrySet()) {
String key = entry.getKey(); String key = entry.getKey();
NamedList stv = entry.getValue().getStatsValues(); NamedList stv = entry.getValue().getStatsValues();
if ((Long) stv.get("count") != 0) { if (force || ((Long) stv.get("count") != 0)) {
stats_fields.add(key, stv); stats_fields.add(key, stv);
} else { } else {
stats_fields.add(key, null); stats_fields.add(key, null);
} }
} }
return stats;
rb.rsp.add("stats", stats);
rb._statsInfo = null; // free some objects
} }
///////////////////////////////////////////// /////////////////////////////////////////////
/// SolrInfoMBean /// SolrInfoMBean
//////////////////////////////////////////// ////////////////////////////////////////////
@ -168,6 +179,8 @@ class StatsInfo {
private final ResponseBuilder rb; private final ResponseBuilder rb;
private final List<StatsField> statsFields = new ArrayList<>(7); private final List<StatsField> statsFields = new ArrayList<>(7);
private final Map<String, StatsValues> distribStatsValues = new LinkedHashMap<>(); private final Map<String, StatsValues> distribStatsValues = new LinkedHashMap<>();
private final Map<String, StatsField> statsFieldMap = new LinkedHashMap<>();
private final Map<String, List<StatsField>> tagToStatsFields = new LinkedHashMap<>();
public StatsInfo(ResponseBuilder rb) { public StatsInfo(ResponseBuilder rb) {
this.rb = rb; this.rb = rb;
@ -181,6 +194,15 @@ class StatsInfo {
for (String paramValue : statsParams) { for (String paramValue : statsParams) {
StatsField current = new StatsField(rb, paramValue); StatsField current = new StatsField(rb, paramValue);
statsFields.add(current); statsFields.add(current);
for (String tag : current.getTagList()) {
List<StatsField> fieldList = tagToStatsFields.get(tag);
if (fieldList == null) {
fieldList = new ArrayList<>();
}
fieldList.add(current);
tagToStatsFields.put(tag, fieldList);
}
statsFieldMap.put(current.getOutputKey(), current);
distribStatsValues.put(current.getOutputKey(), distribStatsValues.put(current.getOutputKey(),
StatsValuesFactory.createStatsValues(current)); StatsValuesFactory.createStatsValues(current));
} }
@ -192,7 +214,31 @@ class StatsInfo {
* as part of this request * as part of this request
*/ */
public List<StatsField> getStatsFields() { public List<StatsField> getStatsFields() {
return Collections.<StatsField>unmodifiableList(statsFields); return Collections.unmodifiableList(statsFields);
}
/**
* Returns the {@link StatsField} associated with the specified (effective)
* outputKey, or null if there was no {@link StatsParams#STATS_FIELD} param
* that would corrispond with that key.
*/
public StatsField getStatsField(String outputKey) {
return statsFieldMap.get(outputKey);
}
/**
* Return immutable list of {@link StatsField} instances by string tag local parameter.
*
* @param tag tag local parameter
* @return list of stats fields
*/
public List<StatsField> getStatsFieldsByTag(String tag) {
List<StatsField> raw = tagToStatsFields.get(tag);
if (null == raw) {
return Collections.emptyList();
} else {
return Collections.unmodifiableList(raw);
}
} }
/** /**
@ -203,7 +249,7 @@ class StatsInfo {
* will never be null. * will never be null.
*/ */
public Map<String, StatsValues> getAggregateStatsValues() { public Map<String, StatsValues> getAggregateStatsValues() {
return Collections.<String, StatsValues>unmodifiableMap(distribStatsValues); return Collections.unmodifiableMap(distribStatsValues);
} }
} }

View File

@ -70,6 +70,7 @@ public class StatsField {
private final String key; private final String key;
private final boolean calcDistinct; // TODO: put this inside localParams ? SOLR-6349 ? private final boolean calcDistinct; // TODO: put this inside localParams ? SOLR-6349 ?
private final String[] facets; private final String[] facets;
private final List<String> tagList;
private final List<String> excludeTagList; private final List<String> excludeTagList;
/** /**
@ -147,6 +148,10 @@ public class StatsField {
String[] facets = params.getFieldParams(key, StatsParams.STATS_FACET); String[] facets = params.getFieldParams(key, StatsParams.STATS_FACET);
this.facets = (null == facets) ? new String[0] : facets; this.facets = (null == facets) ? new String[0] : facets;
String tagStr = localParams.get(CommonParams.TAG);
this.tagList = (null == tagStr)
? Collections.<String>emptyList()
: StrUtils.splitSmart(tagStr,',');
// figure out if we need a special base DocSet // figure out if we need a special base DocSet
String excludeStr = localParams.get(CommonParams.EXCLUDE); String excludeStr = localParams.get(CommonParams.EXCLUDE);
@ -363,6 +368,11 @@ public class StatsField {
return calcDistinct; return calcDistinct;
} }
public List<String> getTagList() {
return tagList;
}
public String toString() { public String toString() {
return "StatsField<" + originalParam + ">"; return "StatsField<" + originalParam + ">";
} }

View File

@ -316,8 +316,6 @@ class NumericStatsValues extends AbstractStatsValues<Number> {
public NumericStatsValues(StatsField statsField) { public NumericStatsValues(StatsField statsField) {
super(statsField); super(statsField);
min = Double.POSITIVE_INFINITY;
max = Double.NEGATIVE_INFINITY;
} }
@Override @Override
@ -353,8 +351,22 @@ class NumericStatsValues extends AbstractStatsValues<Number> {
*/ */
@Override @Override
protected void updateMinMax(Number min, Number max) { protected void updateMinMax(Number min, Number max) {
this.min = Math.min(this.min.doubleValue(), min.doubleValue()); if (null == min) {
this.max = Math.max(this.max.doubleValue(), max.doubleValue()); assert null == max : "min is null but max isn't ? ==> " + max;
return; // No-Op
}
assert null != max : "max is null but min isn't ? ==> " + min;
// we always use the double value, because that way the response Object class is
// consistent regardless of wether we only have 1 value or many that we min/max
//
// TODO: would be nice to have subclasses for each type of Number ... breaks backcompat
double minD = min.doubleValue();
double maxD = max.doubleValue();
this.min = (null == this.min) ? minD : Math.min(this.min.doubleValue(), minD);
this.max = (null == this.max) ? maxD : Math.max(this.max.doubleValue(), maxD);
} }
/** /**

View File

@ -17,6 +17,10 @@ package org.apache.solr.util;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import java.util.Locale; import java.util.Locale;
/** /**
@ -24,16 +28,28 @@ import java.util.Locale;
*/ */
public enum PivotListEntry { public enum PivotListEntry {
// mandatory entries with exact indexes
FIELD(0), FIELD(0),
VALUE(1), VALUE(1),
COUNT(2), COUNT(2),
PIVOT(3); // optional entries
PIVOT,
STATS;
// we could just use the ordinal(), but safer to be very explicit private static final int MIN_INDEX_OF_OPTIONAL = 3;
private final int index;
private PivotListEntry(int index) { /**
this.index = index; * Given a NamedList representing a Pivot Value, this is Minimum Index at
* which this PivotListEntry may exist
*/
private final int minIndex;
private PivotListEntry() {
this.minIndex = MIN_INDEX_OF_OPTIONAL;
}
private PivotListEntry(int minIndex) {
assert minIndex < MIN_INDEX_OF_OPTIONAL;
this.minIndex = minIndex;
} }
/** /**
@ -53,10 +69,19 @@ public enum PivotListEntry {
} }
/** /**
* Indec of this entry when used in response * Given a {@link NamedList} representing a Pivot Value, extracts the Object
* which corrisponds to this {@link PivotListEntry}, or returns null if not found.
*/ */
public int getIndex() { public Object extract(NamedList<Object> pivotList) {
return index; if (this.minIndex < MIN_INDEX_OF_OPTIONAL) {
// a mandatory entry at an exact index.
assert this.getName().equals(pivotList.getName(this.minIndex));
assert this.minIndex < pivotList.size();
return pivotList.getVal(this.minIndex);
}
// otherweise...
// scan starting at the min/optional index
return pivotList.get(this.getName(), this.minIndex);
} }
} }

View File

@ -16,17 +16,22 @@
*/ */
package org.apache.solr.cloud; package org.apache.solr.cloud;
import org.apache.commons.collections.CollectionUtils;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.PivotField; import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.StatsParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.schema.TrieDateField; import org.apache.solr.schema.TrieDateField;
import org.apache.solr.common.params.FacetParams; // jdoc lint
import static org.apache.solr.common.params.FacetParams.*; import static org.apache.solr.common.params.FacetParams.*;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
@ -92,6 +97,9 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
@Override @Override
public void doTest() throws Exception { public void doTest() throws Exception {
sanityCheckAssertDoubles();
waitForThingsToLevelOut(30000); // TODO: why whould we have to wait? waitForThingsToLevelOut(30000); // TODO: why whould we have to wait?
// //
handle.clear(); handle.clear();
@ -107,7 +115,7 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
for (int i = 1; i <= numDocs; i++) { for (int i = 1; i <= numDocs; i++) {
SolrInputDocument doc = buildRandomDocument(i); SolrInputDocument doc = buildRandomDocument(i);
// not efficient, but it garuntees that even if people change buildRandomDocument // not efficient, but it guarantees that even if people change buildRandomDocument
// we'll always have the full list of fields w/o needing to keep code in sync // we'll always have the full list of fields w/o needing to keep code in sync
fieldNameSet.addAll(doc.getFieldNames()); fieldNameSet.addAll(doc.getFieldNames());
@ -119,7 +127,7 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
assertTrue("WTF, bogus field exists?", fieldNameSet.add("bogus_not_in_any_doc_s")); assertTrue("WTF, bogus field exists?", fieldNameSet.add("bogus_not_in_any_doc_s"));
final String[] fieldNames = fieldNameSet.toArray(new String[fieldNameSet.size()]); final String[] fieldNames = fieldNameSet.toArray(new String[fieldNameSet.size()]);
Arrays.sort(fieldNames); // need determinism for buildRandomPivot calls Arrays.sort(fieldNames); // need determinism when picking random fields
for (int i = 0; i < 5; i++) { for (int i = 0; i < 5; i++) {
@ -134,10 +142,28 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
baseP.add("fq", "id:[* TO " + TestUtil.nextInt(random(),200,numDocs) + "]"); baseP.add("fq", "id:[* TO " + TestUtil.nextInt(random(),200,numDocs) + "]");
} }
ModifiableSolrParams pivotP = params(FACET,"true", final boolean stats = random().nextBoolean();
FACET_PIVOT, buildRandomPivot(fieldNames)); if (stats) {
baseP.add(StatsParams.STATS, "true");
// if we are doing stats, then always generated the same # of STATS_FIELD
// params, using multiple tags from a fixed set, but with diff fieldName values.
// later, each pivot will randomly pick a tag.
baseP.add(StatsParams.STATS_FIELD, "{!key=sk1 tag=st1,st2}" +
pickRandomStatsFields(fieldNames));
baseP.add(StatsParams.STATS_FIELD, "{!key=sk2 tag=st2,st3}" +
pickRandomStatsFields(fieldNames));
baseP.add(StatsParams.STATS_FIELD, "{!key=sk3 tag=st3,st4}" +
pickRandomStatsFields(fieldNames));
// NOTE: there's a chance that some of those stats field names
// will be the same, but if so, all the better to test that edge case
}
ModifiableSolrParams pivotP = params(FACET,"true");
pivotP.add(FACET_PIVOT, buildPivotParamValue(buildRandomPivot(fieldNames)));
if (random().nextBoolean()) { if (random().nextBoolean()) {
pivotP.add(FACET_PIVOT, buildRandomPivot(fieldNames)); pivotP.add(FACET_PIVOT, buildPivotParamValue(buildRandomPivot(fieldNames)));
} }
// keep limit low - lots of unique values, and lots of depth in pivots // keep limit low - lots of unique values, and lots of depth in pivots
@ -268,7 +294,7 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
params("fq", buildFilter(constraint))); params("fq", buildFilter(constraint)));
List<PivotField> subPivots = null; List<PivotField> subPivots = null;
try { try {
assertNumFound(pivotName, constraint.getCount(), p); assertPivotData(pivotName, constraint, p);
subPivots = constraint.getPivot(); subPivots = constraint.getPivot();
} catch (Exception e) { } catch (Exception e) {
throw new RuntimeException(pivotName + ": count query failed: " + p + ": " + throw new RuntimeException(pivotName + ": count query failed: " + p + ": " +
@ -285,6 +311,97 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
return depth + 1; return depth + 1;
} }
/**
* Executes a query and compares the results with the data available in the
* {@link PivotField} constraint -- this method is not recursive, and doesn't
* check anything about the sub-pivots (if any).
*
* @param pivotName pivot name
* @param constraint filters on pivot
* @param params base solr parameters
*/
private void assertPivotData(String pivotName, PivotField constraint, SolrParams params)
throws SolrServerException {
SolrParams p = SolrParams.wrapDefaults(params("rows","0"), params);
QueryResponse res = cloudClient.query(p);
String msg = pivotName + ": " + p;
assertNumFound(msg, constraint.getCount(), res);
if ( p.getBool(StatsParams.STATS, false) ) {
// only check stats if stats expected
assertPivotStats(msg, constraint, res);
}
}
/**
* Compare top level stats in response with stats from pivot constraint
*/
private void assertPivotStats(String message, PivotField constraint, QueryResponse response) throws SolrServerException {
if (null == constraint.getFieldStatsInfo()) {
// no stats for this pivot, nothing to check
// TODO: use a trace param to know if/how-many to expect ?
log.info("No stats to check for => " + message);
return;
}
Map<String, FieldStatsInfo> actualFieldStatsInfoMap = response.getFieldStatsInfo();
for (FieldStatsInfo pivotStats : constraint.getFieldStatsInfo().values()) {
String statsKey = pivotStats.getName();
FieldStatsInfo actualStats = actualFieldStatsInfoMap.get(statsKey);
if (actualStats == null) {
// handle case for not found stats (using stats query)
//
// these has to be a special case check due to the legacy behavior of "top level"
// StatsComponent results being "null" (and not even included in the
// getFieldStatsInfo() Map due to specila SolrJ logic)
log.info("Requested stats missing in verification query, pivot stats: " + pivotStats);
assertEquals("Special Count", 0L, pivotStats.getCount().longValue());
assertEquals("Special Missing",
constraint.getCount(), pivotStats.getMissing().longValue());
} else {
// regular stats, compare everything...
assert actualStats != null;
String msg = " of " + statsKey + " => " + message;
assertEquals("Min" + msg, pivotStats.getMin(), actualStats.getMin());
assertEquals("Max" + msg, pivotStats.getMax(), actualStats.getMax());
assertEquals("Mean" + msg, pivotStats.getMean(), actualStats.getMean());
assertEquals("Sum" + msg, pivotStats.getSum(), actualStats.getSum());
assertEquals("Count" + msg, pivotStats.getCount(), actualStats.getCount());
assertEquals("Missing" + msg, pivotStats.getMissing(), actualStats.getMissing());
assertDoubles("Stddev" + msg, pivotStats.getStddev(), actualStats.getStddev());
assertDoubles("SumOfSquares" + msg,
pivotStats.getSumOfSquares(), actualStats.getSumOfSquares());
}
}
if (constraint.getFieldStatsInfo().containsKey("sk2")) { // cheeseball hack
// if "sk2" was one of hte stats we computed, then we must have also seen
// sk1 or sk3 because of the way the tags are fixed
assertEquals("had stats sk2, but not another stat?",
2, constraint.getFieldStatsInfo().size());
} else {
// if we did not see "sk2", then 1 of the others must be alone
assertEquals("only expected 1 stat",
1, constraint.getFieldStatsInfo().size());
assertTrue("not sk1 or sk3",
constraint.getFieldStatsInfo().containsKey("sk1") ||
constraint.getFieldStatsInfo().containsKey("sk3"));
}
}
/** /**
* Verify that the PivotFields we're lookin at doesn't violate any of the expected * Verify that the PivotFields we're lookin at doesn't violate any of the expected
* behaviors based on the <code>TRACE_*</code> params found in the base params * behaviors based on the <code>TRACE_*</code> params found in the base params
@ -364,6 +481,39 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
return StringUtils.join(fields, ","); return StringUtils.join(fields, ",");
} }
/**
* Picks a random field to use for Stats
*/
private static String pickRandomStatsFields(String[] fieldNames) {
// we need to skip boolean fields when computing stats
String fieldName;
do {
fieldName = fieldNames[TestUtil.nextInt(random(),0,fieldNames.length-1)];
}
while(fieldName.endsWith("_b") || fieldName.endsWith("_b1")) ;
return fieldName;
}
/**
* Generates a random {@link FacetParams#FACET_PIVOT} value w/ local params
* using the specified pivotValue.
*/
private static String buildPivotParamValue(String pivotValue) {
// randomly decide which stat tag to use
// if this is 0, or stats aren't enabled, we'll be asking for a tag that doesn't exist
// ...which should be fine (just like excluding a taged fq that doesn't exist)
final int statTag = TestUtil.nextInt(random(), -1, 4);
if (0 <= statTag) {
// only use 1 tag name in the 'stats' localparam - see SOLR-6663
return "{!stats=st"+statTag+"}" + pivotValue;
} else {
// statTag < 0 == sanity check the case of a pivot w/o any stats
return pivotValue;
}
}
/** /**
* Creates a document with randomized field values, some of which be missing values, * Creates a document with randomized field values, some of which be missing values,
@ -512,16 +662,80 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
} }
/** /**
* Asserts the number of docs matching the SolrParams aganst the cloudClient * Asserts the number of docs found in the response
*/ */
private void assertNumFound(String msg, int expected, SolrParams p) private void assertNumFound(String msg, int expected, QueryResponse response)
throws SolrServerException { throws SolrServerException {
countNumFoundChecks++; countNumFoundChecks++;
SolrParams params = SolrParams.wrapDefaults(params("rows","0"), p); assertEquals(msg, expected, response.getResults().getNumFound());
assertEquals(msg + ": " + params, }
expected, cloudClient.query(params).getResults().getNumFound());
/**
* Given two objects, asserts that they are either both null, or both Numbers
* with double values that are equally-ish with a "small" epsilon (relative to the
* scale of the expected value)
*
* @see Number#doubleValue
*/
private void assertDoubles(String msg, Object expected, Object actual) {
if (null == expected || null == actual) {
assertEquals(msg, expected, actual);
} else {
assertTrue(msg + " ... expected not a double: " +
expected + "=>" + expected.getClass(),
expected instanceof Number);
assertTrue(msg + " ... actual not a double: " +
actual + "=>" + actual.getClass(),
actual instanceof Number);
// compute an epsilon relative to the size of the expected value
double expect = ((Number)expected).doubleValue();
double epsilon = expect * 0.1E-7D;
assertEquals(msg, expect, ((Number)actual).doubleValue(), epsilon);
}
}
/**
* test the test
*/
private void sanityCheckAssertDoubles() {
assertDoubles("Null?", null, null);
assertDoubles("big",
new Double(2.3005390038169265E9),
new Double(2.300539003816927E9));
assertDoubles("small",
new Double(2.3005390038169265E-9),
new Double(2.300539003816927E-9));
try {
assertDoubles("non-null", null, 42);
fail("expected was null");
} catch (AssertionError e) {}
try {
assertDoubles("non-null", 42, null);
fail("actual was null");
} catch (AssertionError e) {}
try {
assertDoubles("non-number", 42, "foo");
fail("actual was non-number");
} catch (AssertionError e) {}
try {
assertDoubles("diff",
new Double(2.3005390038169265E9),
new Double(2.267272520100462E9));
fail("big & diff");
} catch (AssertionError e) {}
try {
assertDoubles("diff",
new Double(2.3005390038169265E-9),
new Double(2.267272520100462E-9));
fail("small & diff");
} catch (AssertionError e) {}
} }
/** /**
@ -529,4 +743,5 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
* @see #assertPivotCountsAreCorrect(SolrParams,SolrParams) * @see #assertPivotCountsAreCorrect(SolrParams,SolrParams)
*/ */
private int countNumFoundChecks = 0; private int countNumFoundChecks = 0;
} }

View File

@ -24,6 +24,7 @@ import java.io.IOException;
import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.response.PivotField; import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
@ -666,6 +667,79 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas
FacetParams.FACET_OVERREQUEST_RATIO, "0", FacetParams.FACET_OVERREQUEST_RATIO, "0",
FacetParams.FACET_OVERREQUEST_COUNT, "0"); FacetParams.FACET_OVERREQUEST_COUNT, "0");
doTestDeepPivotStats();
}
private void doTestDeepPivotStats() throws Exception {
QueryResponse rsp = query("q", "*:*",
"rows", "0",
"facet", "true",
"facet.pivot","{!stats=s1}place_s,company_t",
"stats", "true",
"stats.field", "{!key=avg_price tag=s1}pay_i");
List<PivotField> pivots = rsp.getFacetPivot().get("place_s,company_t");
PivotField cardiffPivotField = pivots.get(0);
assertEquals("cardiff", cardiffPivotField.getValue());
assertEquals(257, cardiffPivotField.getCount());
FieldStatsInfo cardiffStatsInfo = cardiffPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("avg_price", cardiffStatsInfo.getName());
assertEquals(0.0, cardiffStatsInfo.getMin());
assertEquals(8742.0, cardiffStatsInfo.getMax());
assertEquals(257, (long) cardiffStatsInfo.getCount());
assertEquals(0, (long) cardiffStatsInfo.getMissing());
assertEquals(347554.0, cardiffStatsInfo.getSum());
assertEquals(8.20968772E8, cardiffStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(1352.35019455253, (double) cardiffStatsInfo.getMean(), 0.1E-7);
assertEquals(1170.86048165857, cardiffStatsInfo.getStddev(), 0.1E-7);
PivotField bbcCardifftPivotField = cardiffPivotField.getPivot().get(0);
assertEquals("bbc", bbcCardifftPivotField.getValue());
assertEquals(101, bbcCardifftPivotField.getCount());
FieldStatsInfo bbcCardifftPivotFieldStatsInfo = bbcCardifftPivotField.getFieldStatsInfo().get("avg_price");
assertEquals(2400.0, bbcCardifftPivotFieldStatsInfo.getMin());
assertEquals(8742.0, bbcCardifftPivotFieldStatsInfo.getMax());
assertEquals(101, (long) bbcCardifftPivotFieldStatsInfo.getCount());
assertEquals(0, (long) bbcCardifftPivotFieldStatsInfo.getMissing());
assertEquals(248742.0, bbcCardifftPivotFieldStatsInfo.getSum());
assertEquals(6.52422564E8, bbcCardifftPivotFieldStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(2462.792079208, (double) bbcCardifftPivotFieldStatsInfo.getMean(), 0.1E-7);
assertEquals(631.0525860312, bbcCardifftPivotFieldStatsInfo.getStddev(), 0.1E-7);
PivotField placeholder0PivotField = pivots.get(2);
assertEquals("0placeholder", placeholder0PivotField.getValue());
assertEquals(6, placeholder0PivotField.getCount());
FieldStatsInfo placeholder0PivotFieldStatsInfo = placeholder0PivotField.getFieldStatsInfo().get("avg_price");
assertEquals("avg_price", placeholder0PivotFieldStatsInfo.getName());
assertEquals(2000.0, placeholder0PivotFieldStatsInfo.getMin());
assertEquals(6400.0, placeholder0PivotFieldStatsInfo.getMax());
assertEquals(6, (long) placeholder0PivotFieldStatsInfo.getCount());
assertEquals(0, (long) placeholder0PivotFieldStatsInfo.getMissing());
assertEquals(22700.0, placeholder0PivotFieldStatsInfo.getSum());
assertEquals(1.0105E8, placeholder0PivotFieldStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(3783.333333333, (double) placeholder0PivotFieldStatsInfo.getMean(), 0.1E-7);
assertEquals(1741.742422595, placeholder0PivotFieldStatsInfo.getStddev(), 0.1E-7);
PivotField microsoftPlaceholder0PivotField = placeholder0PivotField.getPivot().get(1);
assertEquals("microsoft", microsoftPlaceholder0PivotField.getValue());
assertEquals(6, microsoftPlaceholder0PivotField.getCount());
FieldStatsInfo microsoftPlaceholder0PivotFieldStatsInfo = microsoftPlaceholder0PivotField.getFieldStatsInfo().get("avg_price");
assertEquals("avg_price", microsoftPlaceholder0PivotFieldStatsInfo.getName());
assertEquals(2000.0, microsoftPlaceholder0PivotFieldStatsInfo.getMin());
assertEquals(6400.0, microsoftPlaceholder0PivotFieldStatsInfo.getMax());
assertEquals(6, (long) microsoftPlaceholder0PivotFieldStatsInfo.getCount());
assertEquals(0, (long) microsoftPlaceholder0PivotFieldStatsInfo.getMissing());
assertEquals(22700.0, microsoftPlaceholder0PivotFieldStatsInfo.getSum());
assertEquals(1.0105E8, microsoftPlaceholder0PivotFieldStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(3783.333333333, (double) microsoftPlaceholder0PivotFieldStatsInfo.getMean(), 0.1E-7);
assertEquals(1741.742422595, microsoftPlaceholder0PivotFieldStatsInfo.getStddev(), 0.1E-7);
} }
/** /**

View File

@ -27,9 +27,8 @@ import java.io.IOException;
import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.response.PivotField; import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
@ -68,9 +67,9 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest
// the 5 top foo_s terms have 100 docs each on every shard // the 5 top foo_s terms have 100 docs each on every shard
for (int i = 0; i < 100; i++) { for (int i = 0; i < 100; i++) {
for (int j = 0; j < 5; j++) { for (int j = 0; j < 5; j++) {
shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j)); shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j, "stat_i", j * 13 - i));
shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j)); shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j, "stat_i", j * 3 + i));
shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j)); shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j, "stat_i", i * 7 + j));
} }
} }
@ -78,14 +77,14 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest
// on both shard0 & shard1 ("bbb_") // on both shard0 & shard1 ("bbb_")
for (int i = 0; i < 50; i++) { for (int i = 0; i < 50; i++) {
for (int j = 0; j < 20; j++) { for (int j = 0; j < 20; j++) {
shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j)); shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j, "stat_i", 0));
shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j)); shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j, "stat_i", 1));
} }
// distracting term appears on only on shard2 50 times // distracting term appears on only on shard2 50 times
shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA")); shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA"));
} }
// put "bbb0" on shard2 exactly once to sanity check refinement // put "bbb0" on shard2 exactly once to sanity check refinement
shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0")); shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0", "stat_i", -2));
// long 'tail' foo_s term appears in 45 docs on every shard // long 'tail' foo_s term appears in 45 docs on every shard
// foo_s:tail is the only term with bar_s sub-pivot terms // foo_s:tail is the only term with bar_s sub-pivot terms
@ -95,11 +94,12 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest
// but the top 5 terms are ccc(0-4) -- 7 on each shard // but the top 5 terms are ccc(0-4) -- 7 on each shard
// (4 docs each have junk terms) // (4 docs each have junk terms)
String sub_term = (i < 35) ? "ccc"+(i % 5) : ((i < 41) ? "tailB" : "junkA"); String sub_term = (i < 35) ? "ccc"+(i % 5) : ((i < 41) ? "tailB" : "junkA");
shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term)); shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i));
shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term)); shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i));
// shard2's top 5 sub-pivot terms are junk only it has with 8 docs each // shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
// and 5 docs that use "tailB" // and 5 docs that use "tailB"
// NOTE: none of these get stat_i ! !
sub_term = (i < 40) ? "junkB"+(i % 5) : "tailB"; sub_term = (i < 40) ? "junkB"+(i % 5) : "tailB";
shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term)); shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
} }
@ -175,7 +175,9 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest
FacetParams.FACET_OVERREQUEST_RATIO, "0", FacetParams.FACET_OVERREQUEST_RATIO, "0",
"facet", "true", "facet", "true",
"facet.limit", "6", "facet.limit", "6",
"facet.pivot", "foo_s,bar_s" ) "facet.pivot", "{!stats=sxy}foo_s,bar_s",
"stats", "true",
"stats.field", "{!tag=sxy}stat_i")
).getFacetPivot().get("foo_s,bar_s"); ).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size()); assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) { for (int i = 0; i < 5; i++) {
@ -183,9 +185,23 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa")); assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount()); assertEquals(pivot.toString(), 300, pivot.getCount());
} }
// even w/o the long tail, we should have still asked shard2 to refine bbb0 { // even w/o the long tail, we should have still asked shard2 to refine bbb0
assertTrue(pivots.get(5).toString(), pivots.get(5).getValue().equals("bbb0")); pivot = pivots.get(5);
assertEquals(pivots.get(5).toString(), 101, pivots.get(5).getCount()); assertTrue(pivot.toString(), pivot.getValue().equals("bbb0"));
assertEquals(pivot.toString(), 101, pivot.getCount());
// basic check of refined stats
FieldStatsInfo bbb0Stats = pivot.getFieldStatsInfo().get("stat_i");
assertEquals("stat_i", bbb0Stats.getName());
assertEquals(-2.0, bbb0Stats.getMin());
assertEquals(1.0, bbb0Stats.getMax());
assertEquals(101, (long) bbb0Stats.getCount());
assertEquals(0, (long) bbb0Stats.getMissing());
assertEquals(48.0, bbb0Stats.getSum());
assertEquals(0.475247524752475, (double) bbb0Stats.getMean(), 0.1E-7);
assertEquals(54.0, bbb0Stats.getSumOfSquares(), 0.1E-7);
assertEquals(0.55846323792, bbb0Stats.getStddev(), 0.1E-7);
}
// with default overrequesting, we should find the correct top 6 including // with default overrequesting, we should find the correct top 6 including
// long tail and top sub-pivots // long tail and top sub-pivots
@ -284,6 +300,65 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc")); assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount()); assertEquals(pivot.toString(), 14, pivot.getCount());
} }
doTestDeepPivotStats();
}
public void doTestDeepPivotStats() throws Exception {
// Deep checking of some Facet stats - no refinement involved here
List<PivotField> pivots =
query("q", "*:*",
"shards", getShardsString(),
"facet", "true",
"rows" , "0",
"facet.pivot","{!stats=s1}foo_s,bar_s",
"stats", "true",
"stats.field", "{!key=avg_price tag=s1}stat_i").getFacetPivot().get("foo_s,bar_s");
PivotField aaa0PivotField = pivots.get(0);
assertEquals("aaa0", aaa0PivotField.getValue());
assertEquals(300, aaa0PivotField.getCount());
FieldStatsInfo aaa0StatsInfo = aaa0PivotField.getFieldStatsInfo().get("avg_price");
assertEquals("avg_price", aaa0StatsInfo.getName());
assertEquals(-99.0, aaa0StatsInfo.getMin());
assertEquals(693.0, aaa0StatsInfo.getMax());
assertEquals(300, (long) aaa0StatsInfo.getCount());
assertEquals(0, (long) aaa0StatsInfo.getMissing());
assertEquals(34650.0, aaa0StatsInfo.getSum());
assertEquals(1.674585E7, aaa0StatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(115.5, (double) aaa0StatsInfo.getMean(), 0.1E-7);
assertEquals(206.4493184076, aaa0StatsInfo.getStddev(), 0.1E-7);
PivotField tailPivotField = pivots.get(5);
assertEquals("tail", tailPivotField.getValue());
assertEquals(135, tailPivotField.getCount());
FieldStatsInfo tailPivotFieldStatsInfo = tailPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("avg_price", tailPivotFieldStatsInfo.getName());
assertEquals(0.0, tailPivotFieldStatsInfo.getMin());
assertEquals(44.0, tailPivotFieldStatsInfo.getMax());
assertEquals(90, (long) tailPivotFieldStatsInfo.getCount());
assertEquals(45, (long) tailPivotFieldStatsInfo.getMissing());
assertEquals(1980.0, tailPivotFieldStatsInfo.getSum());
assertEquals(22.0, (double) tailPivotFieldStatsInfo.getMean(), 0.1E-7);
assertEquals(58740.0, tailPivotFieldStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(13.0599310011, tailPivotFieldStatsInfo.getStddev(), 0.1E-7);
PivotField tailBPivotField = tailPivotField.getPivot().get(0);
assertEquals("tailB", tailBPivotField.getValue());
assertEquals(17, tailBPivotField.getCount());
FieldStatsInfo tailBPivotFieldStatsInfo = tailBPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("avg_price", tailBPivotFieldStatsInfo.getName());
assertEquals(35.0, tailBPivotFieldStatsInfo.getMin());
assertEquals(40.0, tailBPivotFieldStatsInfo.getMax());
assertEquals(12, (long) tailBPivotFieldStatsInfo.getCount());
assertEquals(5, (long) tailBPivotFieldStatsInfo.getMissing());
assertEquals(450.0, tailBPivotFieldStatsInfo.getSum());
assertEquals(37.5, (double) tailBPivotFieldStatsInfo.getMean(), 0.1E-7);
assertEquals(16910.0, tailBPivotFieldStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(1.78376517, tailBPivotFieldStatsInfo.getStddev(), 0.1E-7);
} }
} }

View File

@ -0,0 +1,234 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/**
* tests some edge cases of pivot faceting with stats
*
* NOTE: This test ignores the control collection (in single node mode, there is no
* need for the overrequesting, all the data is local -- so comparisons with it wouldn't
* be valid in some cases we are testing here)
*/
public class DistributedFacetPivotSmallAdvancedTest extends BaseDistributedSearchTestCase {
public DistributedFacetPivotSmallAdvancedTest() {
this.fixShardCount = true;
this.shardCount = 2;
}
@Override
public void doTest() throws Exception {
del("*:*");
final SolrServer shard0 = clients.get(0);
final SolrServer shard1 = clients.get(1);
// NOTE: we use the literal (4 character) string "null" as a company name
// to help ensure there isn't any bugs where the literal string is treated as if it
// were a true NULL value.
// shard0
shard0.add(sdoc(id, 19, "place_t", "cardiff dublin",
"company_t", "microsoft polecat",
"price_ti", "15", "foo_s", "aaa", "foo_i", 10));
shard0.add(sdoc(id, 20, "place_t", "dublin",
"company_t", "polecat microsoft null",
"price_ti", "19", "foo_s", "bbb", "foo_i", 4));
shard0.add(sdoc(id, 21, "place_t", "london la dublin",
"company_t", "microsoft fujitsu null polecat",
"price_ti", "29", "foo_s", "bbb", "foo_i", 3));
shard0.add(sdoc(id, 22, "place_t", "krakow london cardiff",
"company_t", "polecat null bbc",
"price_ti", "39", "foo_s", "bbb", "foo_i", 6));
shard0.add(sdoc(id, 23, "place_t", "london",
"company_t", "",
"price_ti", "29", "foo_s", "bbb", "foo_i", 9));
// shard1
shard1.add(sdoc(id, 24, "place_t", "la",
"company_t", "",
"foo_s", "aaa", "foo_i", 21));
shard1.add(sdoc(id, 25,
"company_t", "microsoft polecat null fujitsu null bbc",
"price_ti", "59", "foo_s", "aaa", "foo_i", 5));
shard1.add(sdoc(id, 26, "place_t", "krakow",
"company_t", "null",
"foo_s", "aaa", "foo_i", 23));
shard1.add(sdoc(id, 27, "place_t", "krakow cardiff dublin london la",
"company_t", "null microsoft polecat bbc fujitsu",
"foo_s", "aaa", "foo_i", 91));
shard1.add(sdoc(id, 28, "place_t", "cork",
"company_t", "fujitsu rte", "foo_s", "aaa", "foo_i", 76));
commit();
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("maxScore", SKIPVAL);
doTestDeepPivotStatsOnString();
doTestTopStatsWithRefinement();
}
/**
* we need to ensure that stats never "overcount" the values from a single shard
* even if we hit that shard with a refinement request
*/
private void doTestTopStatsWithRefinement() throws Exception {
ModifiableSolrParams coreParams = params("q", "*:*", "rows", "0",
"stats", "true",
"stats.field", "{!tag=s1}foo_i" );
ModifiableSolrParams facetParams = new ModifiableSolrParams(coreParams);
facetParams.add(params("facet", "true",
"facet.limit", "1",
"facet.pivot", "{!stats=s1}place_t,company_t"));
ModifiableSolrParams facetForceRefineParams = new ModifiableSolrParams(facetParams);
facetForceRefineParams.add(params(FacetParams.FACET_OVERREQUEST_COUNT, "0",
FacetParams.FACET_OVERREQUEST_RATIO, "0"));
for (ModifiableSolrParams params : new ModifiableSolrParams[] {
coreParams, facetParams, facetForceRefineParams }) {
// for all three sets of these params, the "top level"
// stats in the response of a distributed query should be the same
ModifiableSolrParams q = new ModifiableSolrParams(params);
q.set("shards", getShardsString());
QueryResponse rsp = queryServer(q);
FieldStatsInfo fieldStatsInfo = rsp.getFieldStatsInfo().get("foo_i");
String msg = q.toString();
assertEquals(msg, 3.0, fieldStatsInfo.getMin());
assertEquals(msg, 91.0, fieldStatsInfo.getMax());
assertEquals(msg, 10, (long) fieldStatsInfo.getCount());
assertEquals(msg, 0, (long) fieldStatsInfo.getMissing());
assertEquals(msg, 248.0, fieldStatsInfo.getSum());
assertEquals(msg, 15294.0, fieldStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(msg, 24.8, (double) fieldStatsInfo.getMean(), 0.1E-7);
assertEquals(msg, 31.87405772027709, fieldStatsInfo.getStddev(), 0.1E-7);
if (params.getBool("facet", false)) {
// if this was a facet request, then the top pivot constraint and pivot
// stats should match what we expect - regardless of wether refine
// was used, or if the query was initially satisfied by the default overrequest
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
assertEquals(1, placePivots.size());
PivotField dublinPivotField = placePivots.get(0);
assertEquals("dublin", dublinPivotField.getValue());
assertEquals(4, dublinPivotField.getCount());
assertEquals(1, dublinPivotField.getPivot().size());
PivotField microsoftPivotField = dublinPivotField.getPivot().get(0);
assertEquals("microsoft", microsoftPivotField.getValue());
assertEquals(4, microsoftPivotField.getCount());
FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("foo_i");
assertEquals(3.0D, dublinMicrosoftStatsInfo.getMin());
assertEquals(91.0D, dublinMicrosoftStatsInfo.getMax());
assertEquals(4, (long) dublinMicrosoftStatsInfo.getCount());
assertEquals(0, (long) dublinMicrosoftStatsInfo.getMissing());
}
}
// sanity check that the top pivot from each shard is diff, to prove to
// ourselves that the above queries really must have involved refinement.
Object s0pivValue = clients.get(0)
.query(facetParams).getFacetPivot().get("place_t,company_t").get(0).getValue();
Object s1pivValue = clients.get(1)
.query(facetParams).getFacetPivot().get("place_t,company_t").get(0).getValue();
assertFalse("both shards have same top constraint, test is invalid" +
"(did someone change the test data?) ==> " +
s0pivValue + "==" + s1pivValue, s0pivValue.equals(s1pivValue));
}
private void doTestDeepPivotStatsOnString() throws Exception {
SolrParams params = params("q", "*:*", "rows", "0",
"shards", getShardsString(),
"facet", "true", "stats", "true",
"facet.pivot", "{!stats=s1}place_t,company_t",
"stats.field", "{!key=avg_price tag=s1}foo_s");
QueryResponse rsp = queryServer(new ModifiableSolrParams(params));
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
PivotField dublinPivotField = placePivots.get(0);
assertEquals("dublin", dublinPivotField.getValue());
assertEquals(4, dublinPivotField.getCount());
PivotField microsoftPivotField = dublinPivotField.getPivot().get(0);
assertEquals("microsoft", microsoftPivotField.getValue());
assertEquals(4, microsoftPivotField.getCount());
FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("aaa", dublinMicrosoftStatsInfo.getMin());
assertEquals("bbb", dublinMicrosoftStatsInfo.getMax());
assertEquals(4, (long) dublinMicrosoftStatsInfo.getCount());
assertEquals(0, (long) dublinMicrosoftStatsInfo.getMissing());
PivotField cardiffPivotField = placePivots.get(2);
assertEquals("cardiff", cardiffPivotField.getValue());
assertEquals(3, cardiffPivotField.getCount());
PivotField polecatPivotField = cardiffPivotField.getPivot().get(0);
assertEquals("polecat", polecatPivotField.getValue());
assertEquals(3, polecatPivotField.getCount());
FieldStatsInfo cardiffPolecatStatsInfo = polecatPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("aaa", cardiffPolecatStatsInfo.getMin());
assertEquals("bbb", cardiffPolecatStatsInfo.getMax());
assertEquals(3, (long) cardiffPolecatStatsInfo.getCount());
assertEquals(0, (long) cardiffPolecatStatsInfo.getMissing());
PivotField krakowPivotField = placePivots.get(3);
assertEquals("krakow", krakowPivotField.getValue());
assertEquals(3, krakowPivotField.getCount());
PivotField fujitsuPivotField = krakowPivotField.getPivot().get(3);
assertEquals("fujitsu", fujitsuPivotField.getValue());
assertEquals(1, fujitsuPivotField.getCount());
FieldStatsInfo krakowFujitsuStatsInfo = fujitsuPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("aaa", krakowFujitsuStatsInfo.getMin());
assertEquals("aaa", krakowFujitsuStatsInfo.getMax());
assertEquals(1, (long) krakowFujitsuStatsInfo.getCount());
assertEquals(0, (long) krakowFujitsuStatsInfo.getMissing());
}
}

View File

@ -20,9 +20,11 @@ package org.apache.solr.handler.component;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.Date;
import java.util.List; import java.util.List;
import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.response.PivotField; import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.FacetParams;
@ -46,20 +48,22 @@ public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCas
// NOTE: we use the literal (4 character) string "null" as a company name // NOTE: we use the literal (4 character) string "null" as a company name
// to help ensure there isn't any bugs where the literal string is treated as if it // to help ensure there isn't any bugs where the literal string is treated as if it
// were a true NULL value. // were a true NULL value.
index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat"); index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat", "price_ti", "15");
index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null"); index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null", "price_ti", "19",
// this is the only doc to have solo_* fields, therefore only 1 shard has them
// TODO: add enum field - blocked by SOLR-6682
"solo_i", 42, "solo_s", "lonely", "solo_dt", "1976-03-06T01:23:45Z");
index(id, 21, "place_t", "london la dublin", "company_t", index(id, 21, "place_t", "london la dublin", "company_t",
"microsoft fujitsu null polecat"); "microsoft fujitsu null polecat", "price_ti", "29");
index(id, 22, "place_t", "krakow london cardiff", "company_t", index(id, 22, "place_t", "krakow london cardiff", "company_t",
"polecat null bbc"); "polecat null bbc", "price_ti", "39");
index(id, 23, "place_t", "london", "company_t", ""); index(id, 23, "place_t", "london", "company_t", "", "price_ti", "29");
index(id, 24, "place_t", "la", "company_t", ""); index(id, 24, "place_t", "la", "company_t", "");
index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc"); index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc", "price_ti", "59");
index(id, 26, "place_t", "krakow", "company_t", "null"); index(id, 26, "place_t", "krakow", "company_t", "null");
index(id, 27, "place_t", "krakow cardiff dublin london la", "company_t", index(id, 27, "place_t", "krakow cardiff dublin london la",
"null microsoft polecat bbc fujitsu"); "company_t", "null microsoft polecat bbc fujitsu");
index(id, 28, "place_t", "cork", "company_t", index(id, 28, "place_t", "cork", "company_t", "fujitsu rte");
"fujitsu rte");
commit(); commit();
handle.clear(); handle.clear();
@ -332,6 +336,76 @@ public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCas
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
} }
} }
doTestDeepPivotStats();
doTestPivotStatsFromOneShard();
}
private void doTestDeepPivotStats() throws Exception {
SolrParams params = params("q", "*:*", "rows", "0",
"facet", "true", "stats", "true",
"facet.pivot", "{!stats=s1}place_t,company_t",
"stats.field", "{!key=avg_price tag=s1}price_ti");
QueryResponse rsp = query(params);
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
PivotField dublinPivotField = placePivots.get(0);
assertEquals("dublin", dublinPivotField.getValue());
assertEquals(4, dublinPivotField.getCount());
PivotField microsoftPivotField = dublinPivotField.getPivot().get(0);
assertEquals("microsoft", microsoftPivotField.getValue());
assertEquals(4, microsoftPivotField.getCount());
FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("avg_price");
assertEquals(15.0, dublinMicrosoftStatsInfo.getMin());
assertEquals(29.0, dublinMicrosoftStatsInfo.getMax());
assertEquals(3, (long) dublinMicrosoftStatsInfo.getCount());
assertEquals(1, (long) dublinMicrosoftStatsInfo.getMissing());
assertEquals(63.0, dublinMicrosoftStatsInfo.getSum());
assertEquals(1427.0, dublinMicrosoftStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(21.0, (double) dublinMicrosoftStatsInfo.getMean(), 0.1E-7);
assertEquals(7.211102550927978, dublinMicrosoftStatsInfo.getStddev(), 0.1E-7);
PivotField cardiffPivotField = placePivots.get(2);
assertEquals("cardiff", cardiffPivotField.getValue());
assertEquals(3, cardiffPivotField.getCount());
PivotField polecatPivotField = cardiffPivotField.getPivot().get(0);
assertEquals("polecat", polecatPivotField.getValue());
assertEquals(3, polecatPivotField.getCount());
FieldStatsInfo cardiffPolecatStatsInfo = polecatPivotField.getFieldStatsInfo().get("avg_price");
assertEquals(15.0, cardiffPolecatStatsInfo.getMin());
assertEquals(39.0, cardiffPolecatStatsInfo.getMax());
assertEquals(2, (long) cardiffPolecatStatsInfo.getCount());
assertEquals(1, (long) cardiffPolecatStatsInfo.getMissing());
assertEquals(54.0, cardiffPolecatStatsInfo.getSum());
assertEquals(1746.0, cardiffPolecatStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(27.0, (double) cardiffPolecatStatsInfo.getMean(), 0.1E-7);
assertEquals(16.97056274847714, cardiffPolecatStatsInfo.getStddev(), 0.1E-7);
PivotField krakowPivotField = placePivots.get(3);
assertEquals("krakow", krakowPivotField.getValue());
assertEquals(3, krakowPivotField.getCount());
PivotField fujitsuPivotField = krakowPivotField.getPivot().get(3);
assertEquals("fujitsu", fujitsuPivotField.getValue());
assertEquals(1, fujitsuPivotField.getCount());
FieldStatsInfo krakowFujitsuStatsInfo = fujitsuPivotField.getFieldStatsInfo().get("avg_price");
assertEquals(null, krakowFujitsuStatsInfo.getMin());
assertEquals(null, krakowFujitsuStatsInfo.getMax());
assertEquals(0, (long) krakowFujitsuStatsInfo.getCount());
assertEquals(1, (long) krakowFujitsuStatsInfo.getMissing());
assertEquals(0.0, krakowFujitsuStatsInfo.getSum());
assertEquals(0.0, krakowFujitsuStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(Double.NaN, (double) krakowFujitsuStatsInfo.getMean(), 0.1E-7);
assertEquals(0.0, krakowFujitsuStatsInfo.getStddev(), 0.1E-7);
} }
// Useful to check for errors, orders lists and does toString() equality check // Useful to check for errors, orders lists and does toString() equality check
@ -352,6 +426,46 @@ public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCas
assertEquals(expectedPlacePivots.toString(), placePivots.toString()); assertEquals(expectedPlacePivots.toString(), placePivots.toString());
} }
/**
* sanity check the stat values nested under a pivot when at least one shard
* has nothing but missing values for the stat
*/
private void doTestPivotStatsFromOneShard() throws Exception {
SolrParams params = params("q", "*:*", "rows", "0",
"facet", "true", "stats", "true",
"facet.pivot", "{!stats=s1}place_t,company_t",
"stats.field", "{!tag=s1}solo_i",
"stats.field", "{!tag=s1}solo_s",
"stats.field", "{!tag=s1}solo_dt");
QueryResponse rsp = query(params);
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
PivotField placePivot = placePivots.get(0);
assertEquals("dublin", placePivot.getValue());
assertEquals(4, placePivot.getCount());
PivotField companyPivot = placePivot.getPivot().get(2);
assertEquals("null", companyPivot.getValue());
assertEquals(3, companyPivot.getCount());
for (PivotField pf : new PivotField[] { placePivot, companyPivot }) {
assertThereCanBeOnlyOne(pf, pf.getFieldStatsInfo().get("solo_s"), "lonely");
assertThereCanBeOnlyOne(pf, pf.getFieldStatsInfo().get("solo_i"), 42.0D);
assertEquals(pf.getField()+":"+pf.getValue()+": int mean",
42.0D, pf.getFieldStatsInfo().get("solo_i").getMean());
Object expected = new Date(194923425000L); // 1976-03-06T01:23:45Z
assertThereCanBeOnlyOne(pf, pf.getFieldStatsInfo().get("solo_dt"), expected);
assertEquals(pf.getField()+":"+pf.getValue()+": date mean",
expected, pf.getFieldStatsInfo().get("solo_dt").getMean());
// TODO: add enum field asserts - blocked by SOLR-6682
}
}
private void testCountSorting(List<PivotField> pivots) { private void testCountSorting(List<PivotField> pivots) {
Integer lastCount = null; Integer lastCount = null;
for (PivotField pivot : pivots) { for (PivotField pivot : pivots) {
@ -365,12 +479,27 @@ public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCas
} }
} }
/**
* given a PivotField, a FieldStatsInfo, and a value; asserts that:
* <ul>
* <li>stat count == 1</li>
* <li>stat missing == pivot count - 1</li>
* <li>stat min == stat max == value</li>
* </ul>
*/
private void assertThereCanBeOnlyOne(PivotField pf, FieldStatsInfo stats, Object val) {
String msg = pf.getField() + ":" + pf.getValue();
assertEquals(msg + " stats count", 1L, (long) stats.getCount());
assertEquals(msg + " stats missing", pf.getCount()-1L, (long) stats.getMissing());
assertEquals(msg + " stats min", val, stats.getMin());
assertEquals(msg + " stats max", val, stats.getMax());
}
public static class ComparablePivotField extends PivotField { public static class ComparablePivotField extends PivotField {
public ComparablePivotField(String f, Object v, int count, public ComparablePivotField(String f, Object v, int count, List<PivotField> pivot) {
List<PivotField> pivot) { super(f,v,count,pivot, null);
super(f,v,count,pivot);
} }
@Override @Override

View File

@ -0,0 +1,138 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import java.util.List;
public class DistributedFacetPivotWhiteBoxTest extends BaseDistributedSearchTestCase {
public DistributedFacetPivotWhiteBoxTest() {
this.fixShardCount = true;
this.shardCount = 4;
}
@Override
public void doTest() throws Exception {
del("*:*");
// NOTE: we use the literal (4 character) string "null" as a company name
// to help ensure there isn't any bugs where the literal string is treated as if it
// were a true NULL value.
index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat", "price_ti", "15");
index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null", "price_ti", "19",
// this is the only doc to have solo_* fields, therefore only 1 shard has them
// TODO: add enum field - blocked by SOLR-6682
"solo_i", 42, "solo_s", "lonely", "solo_dt", "1976-03-06T01:23:45Z");
index(id, 21, "place_t", "krakow london la dublin", "company_t",
"microsoft fujitsu null polecat", "price_ti", "29");
index(id, 22, "place_t", "krakow london cardiff", "company_t",
"polecat null bbc", "price_ti", "39");
index(id, 23, "place_t", "krakow london", "company_t", "", "price_ti", "29");
index(id, 24, "place_t", "krakow la", "company_t", "");
index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc", "price_ti", "59");
index(id, 26, "place_t", "krakow", "company_t", "null");
index(id, 27, "place_t", "krakow cardiff dublin london la",
"company_t", "null microsoft polecat bbc fujitsu");
index(id, 28, "place_t", "krakow cork", "company_t", "fujitsu rte");
commit();
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("maxScore", SKIPVAL);
doShardTestTopStats();
doTestRefinementRequest();
}
/**
* recreates the initial request to a shard in a distributed query
* confirming that both top level stats, and per-pivot stats are returned.
*/
private void doShardTestTopStats() throws Exception {
SolrParams params = params("facet", "true",
"q", "*:*",
// "wt", "javabin",
"facet.pivot", "{!stats=s1}place_t,company_t",
// "version", "2",
"start", "0", "rows", "0",
"fsv", "true",
"fl", "id,score",
"stats", "true",
"stats.field", "{!key=avg_price tag=s1}price_ti",
"f.place_t.facet.limit", "160",
"f.place_t.facet.pivot.mincount", "0",
"f.company_t.facet.limit", "160",
"f.company_t.facet.pivot.mincount", "0",
"isShard", "true", "distrib", "false");
QueryResponse rsp = queryServer(new ModifiableSolrParams(params));
assertNotNull("initial shard request should include non-null top level stats",
rsp.getFieldStatsInfo());
assertFalse("initial shard request should include top level stats",
rsp.getFieldStatsInfo().isEmpty());
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
for (PivotField pivotField : placePivots) {
assertFalse("pivot stats should not be empty in initial request",
pivotField.getFieldStatsInfo().isEmpty());
}
}
/**
* recreates a pivot refinement request to a shard in a distributed query
* confirming that the per-pivot stats are returned, but not the top level stats
* because they shouldn't be overcounted.
*/
private void doTestRefinementRequest() throws Exception {
SolrParams params = params("facet.missing", "true",
"facet", "true",
"facet.limit", "4",
"distrib", "false",
// "wt", "javabin",
// "version", "2",
"rows", "0",
"facet.sort", "index",
"fpt0", "~krakow",
"facet.pivot.mincount", "-1",
"isShard", "true",
"facet.pivot", "{!fpt=0 stats=st1}place_t,company_t",
"stats", "false",
"stats.field", "{!key=sk1 tag=st1,st2}price_ti");
QueryResponse rsp = clients.get(0).query(new ModifiableSolrParams(params));
assertNull("pivot refine request should *NOT* include top level stats",
rsp.getFieldStatsInfo());
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
assertEquals("asked to refine exactly one place",
1, placePivots.size());
assertFalse("pivot stats should not be empty in refinement request",
placePivots.get(0).getFieldStatsInfo().isEmpty());
}
}

View File

@ -0,0 +1,504 @@
package org.apache.solr.handler.component;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
/**
* Single node testing of pivot facets
*/
public class FacetPivotSmallTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml", "schema11.xml");
}
@Override
public void setUp() throws Exception {
super.setUp();
clearIndex();
assertU(commit());
lrf = h.getRequestFactory("standard", 0, 20);
}
/**
* we don't support comma's in the "stats" local param ... yet: SOLR-6663
*/
public void testStatsTagHasComma() throws Exception {
if (random().nextBoolean()) {
// behavior should be same either way
index();
}
assertQEx("Can't use multiple tags in stats local param until SOLR-6663 is decided",
req("q","*:*", "facet", "true",
"stats", "true",
"stats.field", "{!tag=foo}price_ti",
"stats.field", "{!tag=bar}id",
"facet.pivot", "{!stats=foo,bar}place_t,company_t"),
400);
}
/**
* if bogus stats are requested, the pivots should still work
*/
public void testBogusStatsTag() throws Exception {
index();
assertQ(req("q","*:*", "facet", "true",
"facet.pivot", "{!stats=bogus}place_t,company_t")
// check we still get pivots...
, "//arr[@name='place_t,company_t']/lst[str[@name='value'][.='dublin']]"
// .. but sanity check we don't have any stats
, "count(//arr[@name='place_t,company_t']/lst[str[@name='value'][.='dublin']]/lst[@name='stats'])=0");
}
public void testPivotFacetUnsorted() throws Exception {
index();
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("facet", "true");
params.add("facet.pivot", "place_t,company_t");
SolrQueryRequest req = req(params);
final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst";
assertQ(req, facetPivotPrefix + "/str[@name='field'][.='place_t']",
// dublin
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=4]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=4]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]",
// london
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=2]",
// cardiff
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]",
// krakow
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=1]",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]",
// la
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]",
// cork
facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=1]",
facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='rte']",
facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=1]"
);
}
public void testPivotFacetStatsUnsortedTagged() throws Exception {
index();
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("facet", "true");
params.add("facet.pivot", "{!stats=s1}place_t,company_t");
params.add("stats", "true");
params.add("stats.field", "{!key=avg_price tag=s1 mean=true}price_ti");
SolrQueryRequest req = req(params);
final String statsPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst";
String dublinMicrosoftStats = statsPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[str[@name='value'][.='microsoft']]/lst[@name='stats']/lst[@name='stats_fields']/lst[@name='avg_price']";
String cardiffPolecatStats = statsPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[str[@name='value'][.='polecat']]/lst[@name='stats']/lst[@name='stats_fields']/lst[@name='avg_price']";
String krakowFujitsuStats = statsPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[str[@name='value'][.='fujitsu']]/lst[@name='stats']/lst[@name='stats_fields']/lst[@name='avg_price']";
assertQ(req,
dublinMicrosoftStats + "/double[@name='min'][.=15.0]",
dublinMicrosoftStats + "/double[@name='max'][.=29.0]",
dublinMicrosoftStats + "/long[@name='count'][.=3]",
dublinMicrosoftStats + "/long[@name='missing'][.=1]",
dublinMicrosoftStats + "/double[@name='sum'][.=63.0]",
dublinMicrosoftStats + "/double[@name='sumOfSquares'][.=1427.0]",
dublinMicrosoftStats + "/double[@name='mean'][.=21.0]",
dublinMicrosoftStats + "/double[@name='stddev'][.=7.211102550927978]",
cardiffPolecatStats + "/double[@name='min'][.=15.0]",
cardiffPolecatStats + "/double[@name='max'][.=39.0]",
cardiffPolecatStats + "/long[@name='count'][.=2]",
cardiffPolecatStats + "/long[@name='missing'][.=1]",
cardiffPolecatStats + "/double[@name='sum'][.=54.0]",
cardiffPolecatStats + "/double[@name='sumOfSquares'][.=1746.0]",
cardiffPolecatStats + "/double[@name='mean'][.=27.0]",
cardiffPolecatStats + "/double[@name='stddev'][.=16.97056274847714]",
krakowFujitsuStats + "/null[@name='min']",
krakowFujitsuStats + "/null[@name='max']",
krakowFujitsuStats + "/long[@name='count'][.=0]",
krakowFujitsuStats + "/long[@name='missing'][.=1]",
krakowFujitsuStats + "/double[@name='sum'][.=0.0]",
krakowFujitsuStats + "/double[@name='sumOfSquares'][.=0.0]",
krakowFujitsuStats + "/double[@name='mean'][.='NaN']",
krakowFujitsuStats + "/double[@name='stddev'][.=0.0]"
);
}
public void testPivotFacetSortedCount() throws Exception {
index();
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("facet", "true");
params.add("facet.pivot", "place_t,company_t");
// Test sorting by count
//TODO clarify why facet count active by default
// The default is count if facet.limit is greater than 0, index otherwise, but facet.limit was not defined
params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT);
final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst";
SolrQueryRequest req = req(params);
assertQ(req, facetPivotPrefix + "/str[@name='field'][.='place_t']",
// dublin
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=4]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=4]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]",
// london
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=2]",
// cardiff
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]",
// krakow
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=1]",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]",
// la
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='bbc']",
facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]",
// cork
facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=1]",
facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='rte']",
facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=1]"
);
}
public void testPivotFacetLimit() throws Exception {
index();
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("facet", "true");
params.add("facet.pivot", "place_t,company_t");
params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT);
params.set(FacetParams.FACET_LIMIT, 2);
final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst";
SolrQueryRequest req = req(params);
assertQ(req, facetPivotPrefix + "/str[@name='field'][.='place_t']",
// dublin
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=4]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=4]",
// london
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=3]"
);
}
public void testPivotIndividualFacetLimit() throws Exception {
index();
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("facet", "true");
params.add("facet.pivot", "place_t,company_t");
params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT);
params.set("f.place_t." + FacetParams.FACET_LIMIT, 1);
params.set("f.company_t." + FacetParams.FACET_LIMIT, 4);
final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst";
SolrQueryRequest req = req(params);
assertQ(req, facetPivotPrefix + "/str[@name='field'][.='place_t']",
// dublin
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=4]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=4]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=3]",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]"
);
}
public void testPivotFacetMissing() throws Exception {
// Test facet.missing=true with diff sorts
index();
indexMissing();
SolrParams missingA = params("q", "*:*",
"rows", "0",
"facet", "true",
"facet.pivot", "place_t,company_t",
// default facet.sort
FacetParams.FACET_MISSING, "true");
final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst";
SolrQueryRequest req = req(missingA);
assertQ(req, facetPivotPrefix + "/arr[@name='pivot'][count(.) > 0]", // not enough values for pivot
facetPivotPrefix + "[7]/null[@name='value'][.='']", // not the missing place value
facetPivotPrefix + "[7]/int[@name='count'][.=2]", // wrong missing place count
facetPivotPrefix + "[7]/arr[@name='pivot'][count(.) > 0]", // not enough sub-pivots for missing place
facetPivotPrefix + "[7]/arr[@name='pivot']/lst[6]/null[@name='value'][.='']", // not the missing company value
facetPivotPrefix + "[7]/arr[@name='pivot']/lst[6]/int[@name='count'][.=1]", // wrong missing company count
facetPivotPrefix + "[7]/arr[@name='pivot']/lst[6][not(arr[@name='pivot'])]" // company shouldn't have sub-pivots
);
SolrParams missingB = SolrParams.wrapDefaults(missingA,
params(FacetParams.FACET_LIMIT, "4",
"facet.sort", "index"));
req = req(missingB);
assertQ(req, facetPivotPrefix + "/arr[@name='pivot'][count(.) > 0]", // not enough values for pivot
facetPivotPrefix + "[5]/null[@name='value'][.='']", // not the missing place value
facetPivotPrefix + "[5]/int[@name='count'][.=2]", // wrong missing place count
facetPivotPrefix + "[5]/arr[@name='pivot'][count(.) > 0]", // not enough sub-pivots for missing place
facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5]/null[@name='value'][.='']", // not the missing company value
facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", // wrong missing company count
facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5][not(arr[@name='pivot'])]" // company shouldn't have sub-pivots
);
}
public void testPivotFacetIndexSortMincountAndLimit() throws Exception {
// sort=index + mincount + limit
index();
indexMissing();
for (SolrParams variableParams : new SolrParams[]{
// we should get the same results regardless of overrequest
params(),
params()}) {
SolrParams p = SolrParams.wrapDefaults(params("q", "*:*",
"rows", "0",
"facet", "true",
"facet.pivot", "company_t",
"facet.sort", "index",
"facet.pivot.mincount", "4",
"facet.limit", "4"),
variableParams);
final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='company_t']";
SolrQueryRequest req = req(p);
assertQ(req, facetPivotPrefix + "[count(./lst) = 4]", // not enough values for pivot
facetPivotPrefix + "/lst[1]/str[@name='value'][.='fujitsu']",
facetPivotPrefix + "/lst[1]/int[@name='count'][.=4]",
facetPivotPrefix + "/lst[2]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "/lst[2]/int[@name='count'][.=5]",
facetPivotPrefix + "/lst[3]/str[@name='value'][.='null']",
facetPivotPrefix + "/lst[3]/int[@name='count'][.=6]",
facetPivotPrefix + "/lst[4]/str[@name='value'][.='polecat']",
facetPivotPrefix + "/lst[4]/int[@name='count'][.=6]"
);
}
}
public void testPivotFacetIndexSortMincountLimitAndOffset() throws Exception {
// sort=index + mincount + limit + offset
index();
indexMissing();
for (SolrParams variableParams : new SolrParams[]{
// we should get the same results regardless of overrequest
params(),
params()}) {
SolrParams p = SolrParams.wrapDefaults(params("q", "*:*",
"rows", "0",
"facet", "true",
"facet.pivot", "company_t",
"facet.sort", "index",
"facet.pivot.mincount", "4",
"facet.offset", "1",
"facet.limit", "4"),
variableParams);
final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='company_t']";
SolrQueryRequest req = req(p);
assertQ(req, facetPivotPrefix + "[count(./lst) = 3]", // asked for 4, but not enough meet the mincount
facetPivotPrefix + "/lst[1]/str[@name='value'][.='microsoft']",
facetPivotPrefix + "/lst[1]/int[@name='count'][.=5]",
facetPivotPrefix + "/lst[2]/str[@name='value'][.='null']",
facetPivotPrefix + "/lst[2]/int[@name='count'][.=6]",
facetPivotPrefix + "/lst[3]/str[@name='value'][.='polecat']",
facetPivotPrefix + "/lst[3]/int[@name='count'][.=6]"
);
}
}
public void testPivotFacetIndexSortMincountLimitAndOffsetPermutations() throws Exception {
// sort=index + mincount + limit + offset (more permutations)
index();
indexMissing();
for (SolrParams variableParams : new SolrParams[]{
// all of these combinations should result in the same first value
params("facet.pivot.mincount", "4",
"facet.offset", "2"),
params("facet.pivot.mincount", "5",
"facet.offset", "1"),
params("facet.pivot.mincount", "6",
"facet.offset", "0")}) {
SolrParams p = SolrParams.wrapDefaults(params("q", "*:*",
"rows", "0",
"facet", "true",
"facet.limit", "1",
"facet.sort", "index",
"facet.overrequest.ratio", "0",
"facet.pivot", "company_t"),
variableParams);
final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='company_t']";
SolrQueryRequest req = req(p);
assertQ(req, facetPivotPrefix + "[count(./lst) = 1]", // asked for 4, but not enough meet the mincount
facetPivotPrefix + "/lst[1]/str[@name='value'][.='null']",
facetPivotPrefix + "/lst[1]/int[@name='count'][.=6]"
);
}
}
private void indexMissing() {
String[] missingDoc = {"id", "777"};
assertU(adoc(missingDoc));
assertU(commit());
}
private void index() {
// NOTE: we use the literal (4 character) string "null" as a company name
// to help ensure there isn't any bugs where the literal string is treated as if it
// were a true NULL value.
String[] doc = {"id", "19", "place_t", "cardiff dublin", "company_t", "microsoft polecat", "price_ti", "15"};
assertU(adoc(doc));
String[] doc1 = {"id", "20", "place_t", "dublin", "company_t", "polecat microsoft null", "price_ti", "19"};
assertU(adoc(doc1));
String[] doc2 = {"id", "21", "place_t", "london la dublin", "company_t",
"microsoft fujitsu null polecat", "price_ti", "29"};
assertU(adoc(doc2));
String[] doc3 = {"id", "22", "place_t", "krakow london cardiff", "company_t",
"polecat null bbc", "price_ti", "39"};
assertU(adoc(doc3));
String[] doc4 = {"id", "23", "place_t", "london", "company_t", "", "price_ti", "29"};
assertU(adoc(doc4));
String[] doc5 = {"id", "24", "place_t", "la", "company_t", ""};
assertU(adoc(doc5));
String[] doc6 = {"id", "25", "company_t", "microsoft polecat null fujitsu null bbc", "price_ti", "59"};
assertU(adoc(doc6));
String[] doc7 = {"id", "26", "place_t", "krakow", "company_t", "null"};
assertU(adoc(doc7));
String[] doc8 = {"id", "27", "place_t", "krakow cardiff dublin london la", "company_t",
"null microsoft polecat bbc fujitsu"};
assertU(adoc(doc8));
String[] doc9 = {"id", "28", "place_t", "cork", "company_t",
"fujitsu rte"};
assertU(adoc(doc9));
assertU(commit());
}
}

View File

@ -806,6 +806,13 @@ public class SolrQuery extends ModifiableSolrParams
this.add( StatsParams.STATS_FIELD, field ); this.add( StatsParams.STATS_FIELD, field );
} }
public void addGetFieldStatistics( String ... field )
{
this.set( StatsParams.STATS, true );
this.add( StatsParams.STATS_FIELD, field );
}
public void addStatsFieldFacets( String field, String ... facets ) public void addStatsFieldFacets( String field, String ... facets )
{ {
if( field == null ) { if( field == null ) {

View File

@ -180,6 +180,10 @@ public class FieldStatsInfo implements Serializable {
return stddev; return stddev;
} }
public Double getSumOfSquares() {
return sumOfSquares;
}
public Map<String, List<FieldStatsInfo>> getFacets() { public Map<String, List<FieldStatsInfo>> getFacets() {
return facets; return facets;
} }

View File

@ -20,6 +20,7 @@ package org.apache.solr.client.solrj.response;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.Serializable; import java.io.Serializable;
import java.util.List; import java.util.List;
import java.util.Map;
public class PivotField implements Serializable public class PivotField implements Serializable
{ {
@ -27,13 +28,23 @@ public class PivotField implements Serializable
final Object _value; final Object _value;
final int _count; final int _count;
final List<PivotField> _pivot; final List<PivotField> _pivot;
final Map<String,FieldStatsInfo> _statsInfo;
public PivotField( String f, Object v, int count, List<PivotField> pivot ) /**
* @deprecated Use {@link #PivotField(String,Object,int,List,Map)} with a null <code>statsInfo</code>
*/
@Deprecated
public PivotField( String f, Object v, int count, List<PivotField> pivot) {
this(f, v, count, pivot, null);
}
public PivotField( String f, Object v, int count, List<PivotField> pivot, Map<String,FieldStatsInfo> statsInfo)
{ {
_field = f; _field = f;
_value = v; _value = v;
_count = count; _count = count;
_pivot = pivot; _pivot = pivot;
_statsInfo = statsInfo;
} }
public String getField() { public String getField() {
@ -52,6 +63,10 @@ public class PivotField implements Serializable
return _pivot; return _pivot;
} }
public Map<String,FieldStatsInfo> getFieldStatsInfo() {
return _statsInfo;
}
@Override @Override
public String toString() public String toString()
{ {
@ -63,7 +78,16 @@ public class PivotField implements Serializable
for( int i=0; i<indent; i++ ) { for( int i=0; i<indent; i++ ) {
out.print( " " ); out.print( " " );
} }
out.println( _field + "=" + _value + " ("+_count+")" ); out.print( _field + "=" + _value + " ("+_count+")" );
if (null != _statsInfo) {
out.print( "->stats:[" );
for( FieldStatsInfo fieldStatsInfo : _statsInfo.values() ) {
out.print(fieldStatsInfo.toString());
out.print(",");
}
out.print("]");
}
out.println();
if( _pivot != null ) { if( _pivot != null ) {
for( PivotField p : _pivot ) { for( PivotField p : _pivot ) {
p.write( out, indent+1 ); p.write( out, indent+1 );

View File

@ -23,6 +23,7 @@ import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeMap;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.beans.DocumentObjectBinder; import org.apache.solr.client.solrj.beans.DocumentObjectBinder;
@ -163,19 +164,25 @@ public class QueryResponse extends SolrResponseBase
} }
private void extractStatsInfo(NamedList<Object> info) { private void extractStatsInfo(NamedList<Object> info) {
_fieldStatsInfo = extractFieldStatsInfo(info);
}
private Map<String, FieldStatsInfo> extractFieldStatsInfo(NamedList<Object> info) {
if( info != null ) { if( info != null ) {
_fieldStatsInfo = new HashMap<>(); Map<String, FieldStatsInfo> fieldStatsInfoMap = new TreeMap<>();
NamedList<NamedList<Object>> ff = (NamedList<NamedList<Object>>) info.get( "stats_fields" ); NamedList<NamedList<Object>> ff = (NamedList<NamedList<Object>>) info.get( "stats_fields" );
if( ff != null ) { if( ff != null ) {
for( Map.Entry<String,NamedList<Object>> entry : ff ) { for( Map.Entry<String,NamedList<Object>> entry : ff ) {
NamedList<Object> v = entry.getValue(); NamedList<Object> v = entry.getValue();
if( v != null ) { if( v != null ) {
_fieldStatsInfo.put( entry.getKey(), fieldStatsInfoMap.put( entry.getKey(),
new FieldStatsInfo( v, entry.getKey() ) ); new FieldStatsInfo( v, entry.getKey() ) );
} }
} }
} }
return fieldStatsInfoMap;
} }
return null;
} }
private void extractDebugInfo( NamedList<Object> debug ) private void extractDebugInfo( NamedList<Object> debug )
@ -396,14 +403,38 @@ public class QueryResponse extends SolrResponseBase
Object v = nl.getVal( 1 ); Object v = nl.getVal( 1 );
assert "count".equals(nl.getName(2)); assert "count".equals(nl.getName(2));
int cnt = ((Integer)nl.getVal( 2 )).intValue(); int cnt = ((Integer)nl.getVal( 2 )).intValue();
List<PivotField> p = null;
List<PivotField> subPivots = null;
Map<String,FieldStatsInfo> fieldStatsInfos = null;
if (4 <= nl.size()) { if (4 <= nl.size()) {
assert "pivot".equals(nl.getName(3)); for(int index = 3; index < nl.size(); index++) {
Object subPiv = nl.getVal(3); final String key = nl.getName(index);
assert null != subPiv : "Server sent back 'null' for sub pivots?"; final Object val = nl.getVal(index);
p = readPivots( (List<NamedList>) subPiv ); switch (key) {
case "pivot": {
assert null != val : "Server sent back 'null' for sub pivots?";
assert val instanceof List : "Server sent non-List for sub pivots?";
subPivots = readPivots( (List<NamedList>) val );
break;
}
case "stats": {
assert null != val : "Server sent back 'null' for stats?";
assert val instanceof NamedList : "Server sent non-NamedList for stats?";
fieldStatsInfos = extractFieldStatsInfo((NamedList<Object>) val);
break;
}
default:
throw new RuntimeException( "unknown key in pivot: "+ key+ " ["+val+"]");
}
}
} }
values.add( new PivotField( f, v, cnt, p ) );
values.add( new PivotField( f, v, cnt, subPivots, fieldStatsInfos ) );
} }
return values; return values;
} }

View File

@ -57,12 +57,9 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Set;
/** /**
* This should include tests against the example solr config * This should include tests against the example solr config
@ -814,6 +811,197 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
doPivotFacetTest(false); doPivotFacetTest(false);
} }
@Test
public void testPivotFacetsStats() throws Exception {
SolrServer server = getSolrServer();
// Empty the database...
server.deleteByQuery("*:*");// delete everything!
server.commit();
assertNumFound("*:*", 0); // make sure it got in
int id = 1;
ArrayList<SolrInputDocument> docs = new ArrayList<>();
docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "apple", "cat", "a", "inStock", true, "popularity", 12, "price", .017));
docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "lg", "cat", "a", "inStock", false, "popularity", 13, "price", 16.04));
docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "samsung", "cat", "a", "inStock", true, "popularity", 14, "price", 12.34));
docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "lg", "cat", "b", "inStock", false, "popularity", 24, "price", 51.39));
docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "nokia", "cat", "b", "inStock", true, "popularity", 28, "price", 131.39));
docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "ztc", "cat", "a", "inStock", false, "popularity", 32));
docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "htc", "cat", "a", "inStock", true, "popularity", 31, "price", 131.39));
docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "apple", "cat", "b", "inStock", false, "popularity", 36));
docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "lg", "cat", "b", "inStock", true, "popularity", 37, "price", 1.39));
docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "ztc", "cat", "b", "inStock", false, "popularity", 38, "price", 47.98));
docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "ztc", "cat", "b", "inStock", true, "popularity", -38));
docs.add(makeTestDoc("id", id++, "cat", "b")); // something not matching all fields
server.add(docs);
server.commit();
for (String pivot : new String[] { "{!key=pivot_key stats=s1}features,manu",
"{!key=pivot_key stats=s1}features,manu,cat",
"{!key=pivot_key stats=s1}features,manu,cat,inStock"
}) {
// for any of these pivot params, the assertions we check should be teh same
// (we stop asserting at the "manu" level)
SolrQuery query = new SolrQuery("*:*");
query.addFacetPivotField(pivot);
query.setFacetLimit(1);
query.addGetFieldStatistics("{!key=foo_price tag=s1}price", "{!tag=s1}popularity");
query.setFacetMinCount(0);
query.setRows(0);
QueryResponse rsp = server.query(query);
// check top (ie: non-pivot) stats
Map<String, FieldStatsInfo> map = rsp.getFieldStatsInfo();
FieldStatsInfo intValueStatsInfo = map.get("popularity");
assertEquals(-38.0d, intValueStatsInfo.getMin());
assertEquals(38.0d, intValueStatsInfo.getMax());
assertEquals(11l, intValueStatsInfo.getCount().longValue());
assertEquals(1l, intValueStatsInfo.getMissing().longValue());
assertEquals(227.0d, intValueStatsInfo.getSum());
assertEquals(20.636363636363637d, intValueStatsInfo.getMean());
FieldStatsInfo doubleValueStatsInfo = map.get("foo_price");
assertEquals(.017d, (double) doubleValueStatsInfo.getMin(), .01d);
assertEquals(131.39d, (double) doubleValueStatsInfo.getMax(), .01d);
assertEquals(8l, doubleValueStatsInfo.getCount().longValue());
assertEquals(4l, doubleValueStatsInfo.getMissing().longValue());
assertEquals(391.93d, (double) doubleValueStatsInfo.getSum(), .01d);
assertEquals(48.99d, (double) doubleValueStatsInfo.getMean(), .01d);
// now get deeper and look at the pivots...
NamedList<List<PivotField>> pivots = rsp.getFacetPivot();
assertTrue( ! pivots.get("pivot_key").isEmpty() );
List<PivotField> list = pivots.get("pivot_key");
PivotField featuresBBBPivot = list.get(0);
assertEquals("features", featuresBBBPivot.getField());
assertEquals("bbb", featuresBBBPivot.getValue());
assertNotNull(featuresBBBPivot.getFieldStatsInfo());
assertEquals(2, featuresBBBPivot.getFieldStatsInfo().size());
FieldStatsInfo featuresBBBPivotStats1 = featuresBBBPivot.getFieldStatsInfo().get("foo_price");
assertEquals("foo_price", featuresBBBPivotStats1.getName());
assertEquals(131.39d, (double) featuresBBBPivotStats1.getMax(), .01d);
assertEquals(1.38d, (double) featuresBBBPivotStats1.getMin(), .01d);
assertEquals(180.75d, (double) featuresBBBPivotStats1.getSum(), .01d);
assertEquals(3, (long) featuresBBBPivotStats1.getCount());
assertEquals(3, (long) featuresBBBPivotStats1.getMissing());
assertEquals(60.25d, (double) featuresBBBPivotStats1.getMean(), .01d);
assertEquals(65.86d, featuresBBBPivotStats1.getStddev(), .01d);
assertEquals(19567.34d, featuresBBBPivotStats1.getSumOfSquares(), .01d);
FieldStatsInfo featuresBBBPivotStats2 = featuresBBBPivot.getFieldStatsInfo().get("popularity");
assertEquals("popularity", featuresBBBPivotStats2.getName());
assertEquals(38.0d, (double) featuresBBBPivotStats2.getMax(), .01d);
assertEquals(-38.0d, (double) featuresBBBPivotStats2.getMin(), .01d);
assertEquals(136.0d, (double) featuresBBBPivotStats2.getSum(), .01d);
assertEquals(6, (long) featuresBBBPivotStats2.getCount());
assertEquals(0, (long) featuresBBBPivotStats2.getMissing());
assertEquals(22.66d, (double) featuresBBBPivotStats2.getMean(), .01d);
assertEquals(29.85d, featuresBBBPivotStats2.getStddev(), .01d);
assertEquals(7538.0d, featuresBBBPivotStats2.getSumOfSquares(), .01d);
List<PivotField> nestedPivotList = featuresBBBPivot.getPivot();
PivotField featuresBBBPivotPivot = nestedPivotList.get(0);
assertEquals("manu", featuresBBBPivotPivot.getField());
assertEquals("ztc", featuresBBBPivotPivot.getValue());
assertNotNull(featuresBBBPivotPivot.getFieldStatsInfo());
assertEquals(2, featuresBBBPivotPivot.getFieldStatsInfo().size());
FieldStatsInfo featuresBBBManuZtcPivotStats1 = featuresBBBPivotPivot.getFieldStatsInfo().get("foo_price");
assertEquals("foo_price", featuresBBBManuZtcPivotStats1.getName());
assertEquals(47.97d, (double) featuresBBBManuZtcPivotStats1.getMax(), .01d);
assertEquals(47.97d, (double) featuresBBBManuZtcPivotStats1.getMin(), .01d);
assertEquals(47.97d, (double) featuresBBBManuZtcPivotStats1.getSum(), .01d);
assertEquals(1, (long) featuresBBBManuZtcPivotStats1.getCount());
assertEquals(2, (long) featuresBBBManuZtcPivotStats1.getMissing());
assertEquals(47.97d, (double) featuresBBBManuZtcPivotStats1.getMean(), .01d);
assertEquals(0.0d, featuresBBBManuZtcPivotStats1.getStddev(), .01d);
assertEquals(2302.08d, featuresBBBManuZtcPivotStats1.getSumOfSquares(), .01d);
FieldStatsInfo featuresBBBManuZtcPivotStats2 = featuresBBBPivotPivot.getFieldStatsInfo().get("popularity");
assertEquals("popularity", featuresBBBManuZtcPivotStats2.getName());
assertEquals(38.0d, (double) featuresBBBManuZtcPivotStats2.getMax(), .01d);
assertEquals(-38.0d, (double) featuresBBBManuZtcPivotStats2.getMin(), .01d);
assertEquals(32.0, (double) featuresBBBManuZtcPivotStats2.getSum(), .01d);
assertEquals(3, (long) featuresBBBManuZtcPivotStats2.getCount());
assertEquals(0, (long) featuresBBBManuZtcPivotStats2.getMissing());
assertEquals(10.66d, (double) featuresBBBManuZtcPivotStats2.getMean(), .01d);
assertEquals(42.25d, featuresBBBManuZtcPivotStats2.getStddev(), .01d);
assertEquals(3912.0d, featuresBBBManuZtcPivotStats2.getSumOfSquares(), .01d);
}
}
@Test
public void testPivotFacetsStatsNotSupported() throws Exception {
SolrServer server = getSolrServer();
// Empty the database...
server.deleteByQuery("*:*");// delete everything!
server.commit();
assertNumFound("*:*", 0); // make sure it got in
// results of this test should be the same regardless of wether any docs in index
if (random().nextBoolean()) {
server.add(makeTestDoc("id", 1, "features", "aaa", "cat", "a", "inStock", true, "popularity", 12, "price", .017));
server.commit();
}
ignoreException("is not currently supported");
// boolean field
SolrQuery query = new SolrQuery("*:*");
query.addFacetPivotField("{!stats=s1}features,manu");
query.addGetFieldStatistics("{!key=inStock_val tag=s1}inStock");
try {
server.query(query);
fail("SolrException should be thrown on query");
} catch (SolrException e) {
assertEquals("Pivot facet on boolean is not currently supported, bad request returned", 400, e.code());
assertTrue(e.getMessage().contains("is not currently supported"));
assertTrue(e.getMessage().contains("boolean"));
}
// asking for multiple stat tags -- see SOLR-6663
query = new SolrQuery("*:*");
query.addFacetPivotField("{!stats=tag1,tag2}features,manu");
query.addGetFieldStatistics("{!tag=tag1}price", "{!tag=tag2}popularity");
query.setFacetMinCount(0);
query.setRows(0);
try {
server.query(query);
fail("SolrException should be thrown on query");
} catch (SolrException e) {
assertEquals(400, e.code());
assertTrue(e.getMessage().contains("stats"));
assertTrue(e.getMessage().contains("comma"));
assertTrue(e.getMessage().contains("tag"));
}
// text field
query = new SolrQuery("*:*");
query.addFacetPivotField("{!stats=s1}features,manu");
query.addGetFieldStatistics("{!tag=s1}features");
query.setFacetMinCount(0);
query.setRows(0);
try {
server.query(query);
fail("SolrException should be thrown on query");
} catch (SolrException e) {
assertEquals("Pivot facet on string is not currently supported, bad request returned", 400, e.code());
assertTrue(e.getMessage().contains("is not currently supported"));
assertTrue(e.getMessage().contains("text_general"));
}
}
public void testPivotFacetsMissing() throws Exception { public void testPivotFacetsMissing() throws Exception {
doPivotFacetTest(true); doPivotFacetTest(true);
} }