From 7e07a9c578a4d0182f214497c4f828b11d1432b2 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Tue, 4 Nov 2014 23:09:41 +0000 Subject: [PATCH] SOLR-6351: Stats can now be nested under pivot values by adding a 'stats' local param git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1636772 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 4 + .../handler/component/PivotFacetHelper.java | 60 ++- .../component/PivotFacetProcessor.java | 109 +++- .../handler/component/PivotFacetValue.java | 21 +- .../handler/component/StatsComponent.java | 98 +++- .../solr/handler/component/StatsField.java | 10 + .../handler/component/StatsValuesFactory.java | 22 +- .../org/apache/solr/util/PivotListEntry.java | 43 +- .../solr/cloud/TestCloudPivotFacet.java | 237 +++++++- .../DistributedFacetPivotLargeTest.java | 76 ++- .../DistributedFacetPivotLongTailTest.java | 103 +++- ...istributedFacetPivotSmallAdvancedTest.java | 234 ++++++++ .../DistributedFacetPivotSmallTest.java | 155 +++++- .../DistributedFacetPivotWhiteBoxTest.java | 138 +++++ .../component/FacetPivotSmallTest.java | 504 ++++++++++++++++++ .../apache/solr/client/solrj/SolrQuery.java | 7 + .../client/solrj/response/FieldStatsInfo.java | 4 + .../client/solrj/response/PivotField.java | 30 +- .../client/solrj/response/QueryResponse.java | 47 +- .../solr/client/solrj/SolrExampleTests.java | 194 ++++++- 20 files changed, 1978 insertions(+), 118 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallAdvancedTest.java create mode 100644 solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotWhiteBoxTest.java create mode 100644 solr/core/src/test/org/apache/solr/handler/component/FacetPivotSmallTest.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e342b0c266c..82d022d8a75 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -201,6 +201,10 @@ New Features * SOLR-6670: change BALANCESLICEUNIQUE to BALANCESHARDUNIQUE. Also, the parameter for ADDREPLICAPROP that used to be sliceUnique is now shardUnique. (Erick Erickson) +* SOLR-6351: Stats can now be nested under pivot values by adding a 'stats' local param to + facet.pivot which refers to a 'tag' local param in one or more stats.field params. + (hossman, Vitaliy Zhovtyuk) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java index 94ade7a0b19..4123a0754b5 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java @@ -18,15 +18,21 @@ package org.apache.solr.handler.component; import org.apache.solr.util.PivotListEntry; +import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Collections; +import java.util.Map; +import java.util.Map.Entry; public class PivotFacetHelper { @@ -91,31 +97,63 @@ public class PivotFacetHelper { /** @see PivotListEntry#VALUE */ public static Comparable getValue(NamedList pivotList) { - return (Comparable) PivotFacetHelper.retrieve(PivotListEntry.VALUE, - pivotList); + return (Comparable) PivotListEntry.VALUE.extract(pivotList); } /** @see PivotListEntry#FIELD */ public static String getField(NamedList pivotList) { - return (String) PivotFacetHelper.retrieve(PivotListEntry.FIELD, pivotList); + return (String) PivotListEntry.FIELD.extract(pivotList); } /** @see PivotListEntry#COUNT */ public static Integer getCount(NamedList pivotList) { - return (Integer) PivotFacetHelper.retrieve(PivotListEntry.COUNT, pivotList); + return (Integer) PivotListEntry.COUNT.extract(pivotList); } /** @see PivotListEntry#PIVOT */ public static List> getPivots(NamedList pivotList) { - int pivotIdx = pivotList.indexOf(PivotListEntry.PIVOT.getName(), 0); - if (pivotIdx > -1) { - return (List>) pivotList.getVal(pivotIdx); - } - return null; + return (List>) PivotListEntry.PIVOT.extract(pivotList); } - private static Object retrieve(PivotListEntry entryToGet, NamedList pivotList) { - return pivotList.get(entryToGet.getName(), entryToGet.getIndex()); + /** @see PivotListEntry#STATS */ + public static NamedList>> getStats(NamedList pivotList) { + return (NamedList>>) PivotListEntry.STATS.extract(pivotList); + } + + /** + * Given a mapping of keys to {@link StatsValues} representing the currently + * known "merged" stats (which may be null if none exist yet), and a + * {@link NamedList} containing the "stats" response block returned by an individual + * shard, this method accumulates the stasts for each {@link StatsField} found in + * the shard response with the existing mergeStats + * + * @return the original merged Map after modifying, or a new Map if the merged param was originally null. + * @see StatsInfo#getStatsField + * @see StatsValuesFactory#createStatsValues + * @see StatsValues#accumulate(NamedList) + */ + public static Map mergeStats + (Map merged, + NamedList>> remoteWrapper, + StatsInfo statsInfo) { + + if (null == merged) merged = new LinkedHashMap(); + + NamedList> remoteStats = StatsComponent.unwrapStats(remoteWrapper); + + for (Entry> entry : remoteStats) { + StatsValues receivingStatsValues = merged.get(entry.getKey()); + if (receivingStatsValues == null) { + StatsField recievingStatsField = statsInfo.getStatsField(entry.getKey()); + if (null == recievingStatsField) { + throw new SolrException(ErrorCode.SERVER_ERROR , "No stats.field found corrisponding to pivot stats recieved from shard: "+entry.getKey()); + } + receivingStatsValues = StatsValuesFactory.createStatsValues(recievingStatsField); + merged.put(entry.getKey(), receivingStatsValues); + } + receivingStatsValues.accumulate(entry.getValue()); + } + return merged; } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetProcessor.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetProcessor.java index 8ccbc199595..72c14a7f18c 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetProcessor.java +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetProcessor.java @@ -23,20 +23,26 @@ import org.apache.solr.schema.FieldType; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.DocSet; import org.apache.solr.search.SyntaxError; +import org.apache.solr.util.PivotListEntry; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.StatsParams; import org.apache.solr.request.SimpleFacets; import org.apache.solr.request.SolrQueryRequest; import org.apache.lucene.search.Query; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.Deque; +import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -63,9 +69,15 @@ public class PivotFacetProcessor extends SimpleFacets if (!rb.doFacets || pivots == null) return null; + // rb._statsInfo may be null if stats=false, ie: refine requests + // if that's the case, but we need to refine w/stats, then we'll lazy init our + // own instance of StatsInfo + StatsInfo statsInfo = rb._statsInfo; + SimpleOrderedMap>> pivotResponse = new SimpleOrderedMap<>(); for (String pivotList : pivots) { try { + // NOTE: this sets localParams (SimpleFacets is stateful) this.parseParams(FacetParams.FACET_PIVOT, pivotList); } catch (SyntaxError e) { throw new SolrException(ErrorCode.BAD_REQUEST, e); @@ -84,15 +96,37 @@ public class PivotFacetProcessor extends SimpleFacets } } - //REFINEMENT - String fieldValueKey = localParams == null ? null : localParams.get(PivotFacet.REFINE_PARAM); - if(fieldValueKey != null ){ - String[] refinementValuesByField = params.getParams(PivotFacet.REFINE_PARAM+fieldValueKey); + // start by assuing no local params... + + String refineKey = null; // no local => no refinement + List statsFields = Collections.emptyList(); // no local => no stats + + if (null != localParams) { + // we might be refining.. + refineKey = localParams.get(PivotFacet.REFINE_PARAM); + + String statsLocalParam = localParams.get(StatsParams.STATS); + if (null != refineKey + && null != statsLocalParam + && null == statsInfo) { + // we are refining and need to compute stats, + // but stats component hasn't inited StatsInfo (because we + // don't need/want top level stats when refining) so we lazy init + // our own copy of StatsInfo + statsInfo = new StatsInfo(rb); + } + statsFields = getTaggedStatsFields(statsInfo, statsLocalParam); + } + + if (null != refineKey) { + String[] refinementValuesByField + = params.getParams(PivotFacet.REFINE_PARAM + refineKey); + for(String refinements : refinementValuesByField){ - pivotResponse.addAll(processSingle(pivotFields, refinements)); + pivotResponse.addAll(processSingle(pivotFields, refinements, statsFields)); } } else{ - pivotResponse.addAll(processSingle(pivotFields, null)); + pivotResponse.addAll(processSingle(pivotFields, null, statsFields)); } } return pivotResponse; @@ -102,9 +136,13 @@ public class PivotFacetProcessor extends SimpleFacets * Process a single branch of refinement values for a specific pivot * @param pivotFields the ordered list of fields in this pivot * @param refinements the comma seperate list of refinement values corrisponding to each field in the pivot, or null if there are no refinements + * @param statsFields List of {@link StatsField} instances to compute for each pivot value */ - private SimpleOrderedMap>> processSingle(List pivotFields, - String refinements) throws IOException { + private SimpleOrderedMap>> processSingle + (List pivotFields, + String refinements, + List statsFields) throws IOException { + SolrIndexSearcher searcher = rb.req.getSearcher(); SimpleOrderedMap>> pivotResponse = new SimpleOrderedMap<>(); @@ -141,18 +179,54 @@ public class PivotFacetProcessor extends SimpleFacets if(pivotFields.size() > 1) { String subField = pivotFields.get(1); pivotResponse.add(key, - doPivots(facetCounts, field, subField, fnames, vnames, this.docs)); + doPivots(facetCounts, field, subField, fnames, vnames, this.docs, statsFields)); } else { - pivotResponse.add(key, doPivots(facetCounts, field, null, fnames, vnames, this.docs)); + pivotResponse.add(key, doPivots(facetCounts, field, null, fnames, vnames, this.docs, statsFields)); } return pivotResponse; } + /** + * returns the {@link StatsField} instances that should be computed for a pivot + * based on the 'stats' local params used. + * + * @return A list of StatsFields to comput for this pivot, or the empty list if none + */ + private static List getTaggedStatsFields(StatsInfo statsInfo, + String statsLocalParam) { + if (null == statsLocalParam || null == statsInfo) { + return Collections.emptyList(); + } + + List fields = new ArrayList<>(7); + List statsAr = StrUtils.splitSmart(statsLocalParam, ','); + + // TODO: for now, we only support a single tag name - we reserve using + // ',' as a possible delimeter for logic related to only computing stats + // at certain levels -- see SOLR-6663 + if (1 < statsAr.size()) { + String msg = StatsParams.STATS + " local param of " + FacetParams.FACET_PIVOT + + "may not include tags separated by a comma - please use a common tag on all " + + StatsParams.STATS_FIELD + " params you wish to compute under this pivot"; + throw new SolrException(ErrorCode.BAD_REQUEST, msg); + } + + for(String stat : statsAr) { + fields.addAll(statsInfo.getStatsFieldsByTag(stat)); + } + return fields; + } + /** * Recursive function to compute all the pivot counts for the values under teh specified field */ protected List> doPivots(NamedList superFacets, - String field, String subField, Deque fnames,Deque vnames,DocSet docs) throws IOException { + String field, String subField, + Deque fnames, Deque vnames, + DocSet docs, List statsFields) + throws IOException { + + boolean isShard = rb.req.getParams().getBool(ShardParams.IS_SHARD, false); SolrIndexSearcher searcher = rb.req.getSearcher(); // TODO: optimize to avoid converting to an external string and then having to convert back to internal below @@ -169,6 +243,7 @@ public class PivotFacetProcessor extends SimpleFacets // Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though if (kv.getValue() >= getMinCountForField(field)) { final String fieldValue = kv.getKey(); + final int pivotCount = kv.getValue(); SimpleOrderedMap pivot = new SimpleOrderedMap<>(); pivot.add( "field", field ); @@ -178,7 +253,7 @@ public class PivotFacetProcessor extends SimpleFacets ftype.readableToIndexed(fieldValue, termval); pivot.add( "value", ftype.toObject(sfield, termval.get()) ); } - pivot.add( "count", kv.getValue() ); + pivot.add( "count", pivotCount ); DocSet subset = getSubset(docs, sfield, fieldValue); @@ -195,9 +270,17 @@ public class PivotFacetProcessor extends SimpleFacets } if (facetCounts.size() >= 1) { - pivot.add( "pivot", doPivots( facetCounts, subField, nextField, fnames, vnames, subset) ); + pivot.add( "pivot", doPivots( facetCounts, subField, nextField, fnames, vnames, subset, statsFields ) ); } } + if ((isShard || 0 < pivotCount) && ! statsFields.isEmpty()) { + Map stv = new LinkedHashMap<>(); + for (StatsField statsField : statsFields) { + stv.put(statsField.getOutputKey(), statsField.computeLocalStatsValues(subset)); + } + // for pivots, we *always* include requested stats - even if 'empty' + pivot.add("stats", StatsComponent.convertToResponse(true, stv)); + } values.add( pivot ); } diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetValue.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetValue.java index 69e5de6375a..062464f9653 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetValue.java +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetValue.java @@ -21,11 +21,13 @@ import java.util.BitSet; import java.util.Date; import java.util.List; import java.util.Locale; +import java.util.Map; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.schema.TrieDateField; +import org.apache.solr.search.QueryParsing; import org.apache.solr.util.PivotListEntry; /** @@ -45,6 +47,7 @@ public class PivotFacetValue { // child can't be final, circular ref on construction private PivotFacetField childPivot = null; private int count; // mutable + private Map statsValues = null; private PivotFacetValue(PivotFacetField parent, Comparable val) { this.parentPivot = parent; @@ -114,6 +117,7 @@ public class PivotFacetValue { Comparable pivotVal = null; int pivotCount = 0; List> childPivotData = null; + NamedList>> statsValues = null; for (int i = 0; i < pivotData.size(); i++) { String key = pivotData.getName(i); @@ -135,6 +139,9 @@ public class PivotFacetValue { case PIVOT: childPivotData = (List>)value; break; + case STATS: + statsValues = (NamedList>>) value; + break; default: throw new RuntimeException("PivotListEntry contains unaccounted for item: " + entry); } @@ -143,6 +150,9 @@ public class PivotFacetValue { PivotFacetValue newPivotFacet = new PivotFacetValue(parentField, pivotVal); newPivotFacet.count = pivotCount; newPivotFacet.sourceShards.set(shardNumber); + if(statsValues != null) { + newPivotFacet.statsValues = PivotFacetHelper.mergeStats(null, statsValues, rb._statsInfo); + } newPivotFacet.childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, newPivotFacet, childPivotData); @@ -171,6 +181,11 @@ public class PivotFacetValue { if (childPivot != null && childPivot.convertToListOfNamedLists() != null) { newList.add(PivotListEntry.PIVOT.getName(), childPivot.convertToListOfNamedLists()); } + if (null != statsValues) { + newList.add(PivotListEntry.STATS.getName(), + // for pivots, we *always* include requested stats - even if 'empty' + StatsComponent.convertToResponse(true, statsValues)); + } return newList; } @@ -187,6 +202,10 @@ public class PivotFacetValue { if (!shardHasContributed(shardNumber)) { sourceShards.set(shardNumber); count += PivotFacetHelper.getCount(value); + NamedList>> stats = PivotFacetHelper.getStats(value); + if (stats != null) { + statsValues = PivotFacetHelper.mergeStats(statsValues, stats, rb._statsInfo); + } } List> shardChildPivots = PivotFacetHelper.getPivots(value); @@ -197,7 +216,7 @@ public class PivotFacetValue { childPivot.contributeFromShard(shardNumber, rb, shardChildPivots); } } - + public String toString(){ return String.format(Locale.ROOT, "F:%s V:%s Co:%d Ch?:%s", parentPivot.field, value, count, (this.childPivot !=null)); diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java index c252812e789..2d07ea7a74d 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java @@ -25,8 +25,6 @@ import java.util.List; import java.util.Map; import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.StatsParams; @@ -56,22 +54,14 @@ public class StatsComponent extends SearchComponent { if (!rb.doStats) return; boolean isShard = rb.req.getParams().getBool(ShardParams.IS_SHARD, false); - NamedList out = new SimpleOrderedMap<>(); - NamedList stats_fields = new SimpleOrderedMap<>(); + Map statsValues = new LinkedHashMap<>(); for (StatsField statsField : rb._statsInfo.getStatsFields()) { DocSet docs = statsField.computeBaseDocSet(); - NamedList stv = statsField.computeLocalStatsValues(docs).getStatsValues(); - - if (isShard == true || (Long) stv.get("count") > 0) { - stats_fields.add(statsField.getOutputKey(), stv); - } else { - stats_fields.add(statsField.getOutputKey(), null); - } + statsValues.put(statsField.getOutputKey(), statsField.computeLocalStatsValues(docs)); } - out.add("stats_fields", stats_fields); - rb.rsp.add( "stats", out ); + rb.rsp.add( "stats", convertToResponse(isShard, statsValues) ); } @Override @@ -86,6 +76,8 @@ public class StatsComponent extends SearchComponent { if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) { sreq.purpose |= ShardRequest.PURPOSE_GET_STATS; } else { + + // turn off stats on other requests sreq.params.set(StatsParams.STATS, "false"); // we could optionally remove stats params @@ -101,7 +93,8 @@ public class StatsComponent extends SearchComponent { for (ShardResponse srsp : sreq.responses) { NamedList stats = null; try { - stats = (NamedList) srsp.getSolrResponse().getResponse().get("stats"); + stats = (NamedList>>) + srsp.getSolrResponse().getResponse().get("stats"); } catch (Exception e) { if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) { continue; // looks like a shard did not return anything @@ -110,7 +103,7 @@ public class StatsComponent extends SearchComponent { "Unable to read stats info for shard: " + srsp.getShard(), e); } - NamedList stats_fields = (NamedList) stats.get("stats_fields"); + NamedList stats_fields = unwrapStats(stats); if (stats_fields != null) { for (int i = 0; i < stats_fields.size(); i++) { String key = stats_fields.getName(i); @@ -129,26 +122,44 @@ public class StatsComponent extends SearchComponent { // so that "result" is already stored in the response (for aesthetics) Map allStatsValues = rb._statsInfo.getAggregateStatsValues(); + rb.rsp.add("stats", convertToResponse(false, allStatsValues)); - NamedList> stats = new SimpleOrderedMap<>(); - NamedList stats_fields = new SimpleOrderedMap<>(); + rb._statsInfo = null; // free some objects + } + + /** + * Helper to pull the "stats_fields" out of the extra "stats" wrapper + */ + public static NamedList> unwrapStats(NamedList>> stats) { + if (null == stats) return null; + + return stats.get("stats_fields"); + } + + /** + * Given a map of {@link StatsValues} using the appropriate response key, + * builds up the neccessary "stats" data structure for including in the response -- + * including the esoteric "stats_fields" wrapper. + */ + public static NamedList>> convertToResponse + (boolean force, Map statsValues) { + + NamedList>> stats = new SimpleOrderedMap<>(); + NamedList> stats_fields = new SimpleOrderedMap<>(); stats.add("stats_fields", stats_fields); - for (Map.Entry entry : allStatsValues.entrySet()) { + for (Map.Entry entry : statsValues.entrySet()) { String key = entry.getKey(); NamedList stv = entry.getValue().getStatsValues(); - if ((Long) stv.get("count") != 0) { + if (force || ((Long) stv.get("count") != 0)) { stats_fields.add(key, stv); } else { stats_fields.add(key, null); } } - - rb.rsp.add("stats", stats); - rb._statsInfo = null; // free some objects + return stats; } - ///////////////////////////////////////////// /// SolrInfoMBean //////////////////////////////////////////// @@ -168,6 +179,8 @@ class StatsInfo { private final ResponseBuilder rb; private final List statsFields = new ArrayList<>(7); private final Map distribStatsValues = new LinkedHashMap<>(); + private final Map statsFieldMap = new LinkedHashMap<>(); + private final Map> tagToStatsFields = new LinkedHashMap<>(); public StatsInfo(ResponseBuilder rb) { this.rb = rb; @@ -177,10 +190,19 @@ class StatsInfo { // no stats.field params, nothing to parse. return; } - + for (String paramValue : statsParams) { StatsField current = new StatsField(rb, paramValue); statsFields.add(current); + for (String tag : current.getTagList()) { + List fieldList = tagToStatsFields.get(tag); + if (fieldList == null) { + fieldList = new ArrayList<>(); + } + fieldList.add(current); + tagToStatsFields.put(tag, fieldList); + } + statsFieldMap.put(current.getOutputKey(), current); distribStatsValues.put(current.getOutputKey(), StatsValuesFactory.createStatsValues(current)); } @@ -192,7 +214,31 @@ class StatsInfo { * as part of this request */ public List getStatsFields() { - return Collections.unmodifiableList(statsFields); + return Collections.unmodifiableList(statsFields); + } + + /** + * Returns the {@link StatsField} associated with the specified (effective) + * outputKey, or null if there was no {@link StatsParams#STATS_FIELD} param + * that would corrispond with that key. + */ + public StatsField getStatsField(String outputKey) { + return statsFieldMap.get(outputKey); + } + + /** + * Return immutable list of {@link StatsField} instances by string tag local parameter. + * + * @param tag tag local parameter + * @return list of stats fields + */ + public List getStatsFieldsByTag(String tag) { + List raw = tagToStatsFields.get(tag); + if (null == raw) { + return Collections.emptyList(); + } else { + return Collections.unmodifiableList(raw); + } } /** @@ -203,7 +249,7 @@ class StatsInfo { * will never be null. */ public Map getAggregateStatsValues() { - return Collections.unmodifiableMap(distribStatsValues); + return Collections.unmodifiableMap(distribStatsValues); } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsField.java b/solr/core/src/java/org/apache/solr/handler/component/StatsField.java index 115f288c977..9d6042b2415 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsField.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsField.java @@ -70,6 +70,7 @@ public class StatsField { private final String key; private final boolean calcDistinct; // TODO: put this inside localParams ? SOLR-6349 ? private final String[] facets; + private final List tagList; private final List excludeTagList; /** @@ -147,6 +148,10 @@ public class StatsField { String[] facets = params.getFieldParams(key, StatsParams.STATS_FACET); this.facets = (null == facets) ? new String[0] : facets; + String tagStr = localParams.get(CommonParams.TAG); + this.tagList = (null == tagStr) + ? Collections.emptyList() + : StrUtils.splitSmart(tagStr,','); // figure out if we need a special base DocSet String excludeStr = localParams.get(CommonParams.EXCLUDE); @@ -363,6 +368,11 @@ public class StatsField { return calcDistinct; } + + public List getTagList() { + return tagList; + } + public String toString() { return "StatsField<" + originalParam + ">"; } diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java index 322d02a1730..445b57443dc 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java @@ -316,8 +316,6 @@ class NumericStatsValues extends AbstractStatsValues { public NumericStatsValues(StatsField statsField) { super(statsField); - min = Double.POSITIVE_INFINITY; - max = Double.NEGATIVE_INFINITY; } @Override @@ -353,8 +351,22 @@ class NumericStatsValues extends AbstractStatsValues { */ @Override protected void updateMinMax(Number min, Number max) { - this.min = Math.min(this.min.doubleValue(), min.doubleValue()); - this.max = Math.max(this.max.doubleValue(), max.doubleValue()); + if (null == min) { + assert null == max : "min is null but max isn't ? ==> " + max; + return; // No-Op + } + + assert null != max : "max is null but min isn't ? ==> " + min; + + // we always use the double value, because that way the response Object class is + // consistent regardless of wether we only have 1 value or many that we min/max + // + // TODO: would be nice to have subclasses for each type of Number ... breaks backcompat + double minD = min.doubleValue(); + double maxD = max.doubleValue(); + + this.min = (null == this.min) ? minD : Math.min(this.min.doubleValue(), minD); + this.max = (null == this.max) ? maxD : Math.max(this.max.doubleValue(), maxD); } /** @@ -594,7 +606,7 @@ class StringStatsValues extends AbstractStatsValues { // Add no statistics } - /** + /** * Determines which of the given Strings is the maximum, as computed by {@link String#compareTo(String)} * * @param str1 String to compare against b diff --git a/solr/core/src/java/org/apache/solr/util/PivotListEntry.java b/solr/core/src/java/org/apache/solr/util/PivotListEntry.java index 4fd2b1768d5..770da599a88 100644 --- a/solr/core/src/java/org/apache/solr/util/PivotListEntry.java +++ b/solr/core/src/java/org/apache/solr/util/PivotListEntry.java @@ -17,6 +17,10 @@ package org.apache.solr.util; * limitations under the License. */ +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; + import java.util.Locale; /** @@ -24,16 +28,28 @@ import java.util.Locale; */ public enum PivotListEntry { - FIELD(0), + // mandatory entries with exact indexes + FIELD(0), VALUE(1), COUNT(2), - PIVOT(3); + // optional entries + PIVOT, + STATS; - // we could just use the ordinal(), but safer to be very explicit - private final int index; + private static final int MIN_INDEX_OF_OPTIONAL = 3; + + /** + * Given a NamedList representing a Pivot Value, this is Minimum Index at + * which this PivotListEntry may exist + */ + private final int minIndex; - private PivotListEntry(int index) { - this.index = index; + private PivotListEntry() { + this.minIndex = MIN_INDEX_OF_OPTIONAL; + } + private PivotListEntry(int minIndex) { + assert minIndex < MIN_INDEX_OF_OPTIONAL; + this.minIndex = minIndex; } /** @@ -53,10 +69,19 @@ public enum PivotListEntry { } /** - * Indec of this entry when used in response + * Given a {@link NamedList} representing a Pivot Value, extracts the Object + * which corrisponds to this {@link PivotListEntry}, or returns null if not found. */ - public int getIndex() { - return index; + public Object extract(NamedList pivotList) { + if (this.minIndex < MIN_INDEX_OF_OPTIONAL) { + // a mandatory entry at an exact index. + assert this.getName().equals(pivotList.getName(this.minIndex)); + assert this.minIndex < pivotList.size(); + return pivotList.getVal(this.minIndex); + } + // otherweise... + // scan starting at the min/optional index + return pivotList.get(this.getName(), this.minIndex); } } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java index 12ae89d72cf..5c0d4b77c7e 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java @@ -16,17 +16,22 @@ */ package org.apache.solr.cloud; +import org.apache.commons.collections.CollectionUtils; +import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.TestUtil; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.FieldStatsInfo; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.PivotField; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.StatsParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.schema.TrieDateField; +import org.apache.solr.common.params.FacetParams; // jdoc lint import static org.apache.solr.common.params.FacetParams.*; import org.apache.commons.lang.StringUtils; @@ -92,6 +97,9 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { @Override public void doTest() throws Exception { + + sanityCheckAssertDoubles(); + waitForThingsToLevelOut(30000); // TODO: why whould we have to wait? // handle.clear(); @@ -107,7 +115,7 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { for (int i = 1; i <= numDocs; i++) { SolrInputDocument doc = buildRandomDocument(i); - // not efficient, but it garuntees that even if people change buildRandomDocument + // not efficient, but it guarantees that even if people change buildRandomDocument // we'll always have the full list of fields w/o needing to keep code in sync fieldNameSet.addAll(doc.getFieldNames()); @@ -119,7 +127,7 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { assertTrue("WTF, bogus field exists?", fieldNameSet.add("bogus_not_in_any_doc_s")); final String[] fieldNames = fieldNameSet.toArray(new String[fieldNameSet.size()]); - Arrays.sort(fieldNames); // need determinism for buildRandomPivot calls + Arrays.sort(fieldNames); // need determinism when picking random fields for (int i = 0; i < 5; i++) { @@ -134,10 +142,28 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { baseP.add("fq", "id:[* TO " + TestUtil.nextInt(random(),200,numDocs) + "]"); } - ModifiableSolrParams pivotP = params(FACET,"true", - FACET_PIVOT, buildRandomPivot(fieldNames)); + final boolean stats = random().nextBoolean(); + if (stats) { + baseP.add(StatsParams.STATS, "true"); + + // if we are doing stats, then always generated the same # of STATS_FIELD + // params, using multiple tags from a fixed set, but with diff fieldName values. + // later, each pivot will randomly pick a tag. + baseP.add(StatsParams.STATS_FIELD, "{!key=sk1 tag=st1,st2}" + + pickRandomStatsFields(fieldNames)); + baseP.add(StatsParams.STATS_FIELD, "{!key=sk2 tag=st2,st3}" + + pickRandomStatsFields(fieldNames)); + baseP.add(StatsParams.STATS_FIELD, "{!key=sk3 tag=st3,st4}" + + pickRandomStatsFields(fieldNames)); + // NOTE: there's a chance that some of those stats field names + // will be the same, but if so, all the better to test that edge case + } + + ModifiableSolrParams pivotP = params(FACET,"true"); + pivotP.add(FACET_PIVOT, buildPivotParamValue(buildRandomPivot(fieldNames))); + if (random().nextBoolean()) { - pivotP.add(FACET_PIVOT, buildRandomPivot(fieldNames)); + pivotP.add(FACET_PIVOT, buildPivotParamValue(buildRandomPivot(fieldNames))); } // keep limit low - lots of unique values, and lots of depth in pivots @@ -268,7 +294,7 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { params("fq", buildFilter(constraint))); List subPivots = null; try { - assertNumFound(pivotName, constraint.getCount(), p); + assertPivotData(pivotName, constraint, p); subPivots = constraint.getPivot(); } catch (Exception e) { throw new RuntimeException(pivotName + ": count query failed: " + p + ": " + @@ -285,6 +311,97 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { return depth + 1; } + /** + * Executes a query and compares the results with the data available in the + * {@link PivotField} constraint -- this method is not recursive, and doesn't + * check anything about the sub-pivots (if any). + * + * @param pivotName pivot name + * @param constraint filters on pivot + * @param params base solr parameters + */ + private void assertPivotData(String pivotName, PivotField constraint, SolrParams params) + throws SolrServerException { + + SolrParams p = SolrParams.wrapDefaults(params("rows","0"), params); + QueryResponse res = cloudClient.query(p); + String msg = pivotName + ": " + p; + + assertNumFound(msg, constraint.getCount(), res); + + if ( p.getBool(StatsParams.STATS, false) ) { + // only check stats if stats expected + assertPivotStats(msg, constraint, res); + } + } + + /** + * Compare top level stats in response with stats from pivot constraint + */ + private void assertPivotStats(String message, PivotField constraint, QueryResponse response) throws SolrServerException { + + if (null == constraint.getFieldStatsInfo()) { + // no stats for this pivot, nothing to check + + // TODO: use a trace param to know if/how-many to expect ? + log.info("No stats to check for => " + message); + return; + } + + Map actualFieldStatsInfoMap = response.getFieldStatsInfo(); + + for (FieldStatsInfo pivotStats : constraint.getFieldStatsInfo().values()) { + String statsKey = pivotStats.getName(); + + FieldStatsInfo actualStats = actualFieldStatsInfoMap.get(statsKey); + + if (actualStats == null) { + // handle case for not found stats (using stats query) + // + // these has to be a special case check due to the legacy behavior of "top level" + // StatsComponent results being "null" (and not even included in the + // getFieldStatsInfo() Map due to specila SolrJ logic) + + log.info("Requested stats missing in verification query, pivot stats: " + pivotStats); + assertEquals("Special Count", 0L, pivotStats.getCount().longValue()); + assertEquals("Special Missing", + constraint.getCount(), pivotStats.getMissing().longValue()); + + } else { + // regular stats, compare everything... + + assert actualStats != null; + String msg = " of " + statsKey + " => " + message; + + assertEquals("Min" + msg, pivotStats.getMin(), actualStats.getMin()); + assertEquals("Max" + msg, pivotStats.getMax(), actualStats.getMax()); + assertEquals("Mean" + msg, pivotStats.getMean(), actualStats.getMean()); + assertEquals("Sum" + msg, pivotStats.getSum(), actualStats.getSum()); + assertEquals("Count" + msg, pivotStats.getCount(), actualStats.getCount()); + assertEquals("Missing" + msg, pivotStats.getMissing(), actualStats.getMissing()); + + assertDoubles("Stddev" + msg, pivotStats.getStddev(), actualStats.getStddev()); + assertDoubles("SumOfSquares" + msg, + pivotStats.getSumOfSquares(), actualStats.getSumOfSquares()); + } + } + + if (constraint.getFieldStatsInfo().containsKey("sk2")) { // cheeseball hack + // if "sk2" was one of hte stats we computed, then we must have also seen + // sk1 or sk3 because of the way the tags are fixed + assertEquals("had stats sk2, but not another stat?", + 2, constraint.getFieldStatsInfo().size()); + } else { + // if we did not see "sk2", then 1 of the others must be alone + assertEquals("only expected 1 stat", + 1, constraint.getFieldStatsInfo().size()); + assertTrue("not sk1 or sk3", + constraint.getFieldStatsInfo().containsKey("sk1") || + constraint.getFieldStatsInfo().containsKey("sk3")); + } + + } + /** * Verify that the PivotFields we're lookin at doesn't violate any of the expected * behaviors based on the TRACE_* params found in the base params @@ -364,6 +481,39 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { return StringUtils.join(fields, ","); } + /** + * Picks a random field to use for Stats + */ + private static String pickRandomStatsFields(String[] fieldNames) { + // we need to skip boolean fields when computing stats + String fieldName; + do { + fieldName = fieldNames[TestUtil.nextInt(random(),0,fieldNames.length-1)]; + } + while(fieldName.endsWith("_b") || fieldName.endsWith("_b1")) ; + + return fieldName; + } + + /** + * Generates a random {@link FacetParams#FACET_PIVOT} value w/ local params + * using the specified pivotValue. + */ + private static String buildPivotParamValue(String pivotValue) { + // randomly decide which stat tag to use + + // if this is 0, or stats aren't enabled, we'll be asking for a tag that doesn't exist + // ...which should be fine (just like excluding a taged fq that doesn't exist) + final int statTag = TestUtil.nextInt(random(), -1, 4); + + if (0 <= statTag) { + // only use 1 tag name in the 'stats' localparam - see SOLR-6663 + return "{!stats=st"+statTag+"}" + pivotValue; + } else { + // statTag < 0 == sanity check the case of a pivot w/o any stats + return pivotValue; + } + } /** * Creates a document with randomized field values, some of which be missing values, @@ -512,16 +662,80 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { } /** - * Asserts the number of docs matching the SolrParams aganst the cloudClient + * Asserts the number of docs found in the response */ - private void assertNumFound(String msg, int expected, SolrParams p) + private void assertNumFound(String msg, int expected, QueryResponse response) throws SolrServerException { countNumFoundChecks++; - SolrParams params = SolrParams.wrapDefaults(params("rows","0"), p); - assertEquals(msg + ": " + params, - expected, cloudClient.query(params).getResults().getNumFound()); + assertEquals(msg, expected, response.getResults().getNumFound()); + } + + /** + * Given two objects, asserts that they are either both null, or both Numbers + * with double values that are equally-ish with a "small" epsilon (relative to the + * scale of the expected value) + * + * @see Number#doubleValue + */ + private void assertDoubles(String msg, Object expected, Object actual) { + if (null == expected || null == actual) { + assertEquals(msg, expected, actual); + } else { + assertTrue(msg + " ... expected not a double: " + + expected + "=>" + expected.getClass(), + expected instanceof Number); + assertTrue(msg + " ... actual not a double: " + + actual + "=>" + actual.getClass(), + actual instanceof Number); + + // compute an epsilon relative to the size of the expected value + double expect = ((Number)expected).doubleValue(); + double epsilon = expect * 0.1E-7D; + + assertEquals(msg, expect, ((Number)actual).doubleValue(), epsilon); + + } + } + + /** + * test the test + */ + private void sanityCheckAssertDoubles() { + assertDoubles("Null?", null, null); + assertDoubles("big", + new Double(2.3005390038169265E9), + new Double(2.300539003816927E9)); + assertDoubles("small", + new Double(2.3005390038169265E-9), + new Double(2.300539003816927E-9)); + try { + assertDoubles("non-null", null, 42); + fail("expected was null"); + } catch (AssertionError e) {} + try { + assertDoubles("non-null", 42, null); + fail("actual was null"); + } catch (AssertionError e) {} + try { + assertDoubles("non-number", 42, "foo"); + fail("actual was non-number"); + } catch (AssertionError e) {} + try { + assertDoubles("diff", + new Double(2.3005390038169265E9), + new Double(2.267272520100462E9)); + fail("big & diff"); + } catch (AssertionError e) {} + try { + assertDoubles("diff", + new Double(2.3005390038169265E-9), + new Double(2.267272520100462E-9)); + fail("small & diff"); + } catch (AssertionError e) {} + + } /** @@ -529,4 +743,5 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { * @see #assertPivotCountsAreCorrect(SolrParams,SolrParams) */ private int countNumFoundChecks = 0; + } diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java index 31e23339fa0..6e48f6d71da 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java @@ -24,6 +24,7 @@ import java.io.IOException; import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.FieldStatsInfo; import org.apache.solr.client.solrj.response.PivotField; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrInputDocument; @@ -665,7 +666,80 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas "facet.pivot","place_s,company_t", FacetParams.FACET_OVERREQUEST_RATIO, "0", FacetParams.FACET_OVERREQUEST_COUNT, "0"); - + + doTestDeepPivotStats(); + } + + private void doTestDeepPivotStats() throws Exception { + + QueryResponse rsp = query("q", "*:*", + "rows", "0", + "facet", "true", + "facet.pivot","{!stats=s1}place_s,company_t", + "stats", "true", + "stats.field", "{!key=avg_price tag=s1}pay_i"); + + List pivots = rsp.getFacetPivot().get("place_s,company_t"); + + PivotField cardiffPivotField = pivots.get(0); + assertEquals("cardiff", cardiffPivotField.getValue()); + assertEquals(257, cardiffPivotField.getCount()); + + FieldStatsInfo cardiffStatsInfo = cardiffPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("avg_price", cardiffStatsInfo.getName()); + assertEquals(0.0, cardiffStatsInfo.getMin()); + assertEquals(8742.0, cardiffStatsInfo.getMax()); + assertEquals(257, (long) cardiffStatsInfo.getCount()); + assertEquals(0, (long) cardiffStatsInfo.getMissing()); + assertEquals(347554.0, cardiffStatsInfo.getSum()); + assertEquals(8.20968772E8, cardiffStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(1352.35019455253, (double) cardiffStatsInfo.getMean(), 0.1E-7); + assertEquals(1170.86048165857, cardiffStatsInfo.getStddev(), 0.1E-7); + + PivotField bbcCardifftPivotField = cardiffPivotField.getPivot().get(0); + assertEquals("bbc", bbcCardifftPivotField.getValue()); + assertEquals(101, bbcCardifftPivotField.getCount()); + + FieldStatsInfo bbcCardifftPivotFieldStatsInfo = bbcCardifftPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals(2400.0, bbcCardifftPivotFieldStatsInfo.getMin()); + assertEquals(8742.0, bbcCardifftPivotFieldStatsInfo.getMax()); + assertEquals(101, (long) bbcCardifftPivotFieldStatsInfo.getCount()); + assertEquals(0, (long) bbcCardifftPivotFieldStatsInfo.getMissing()); + assertEquals(248742.0, bbcCardifftPivotFieldStatsInfo.getSum()); + assertEquals(6.52422564E8, bbcCardifftPivotFieldStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(2462.792079208, (double) bbcCardifftPivotFieldStatsInfo.getMean(), 0.1E-7); + assertEquals(631.0525860312, bbcCardifftPivotFieldStatsInfo.getStddev(), 0.1E-7); + + + PivotField placeholder0PivotField = pivots.get(2); + assertEquals("0placeholder", placeholder0PivotField.getValue()); + assertEquals(6, placeholder0PivotField.getCount()); + + FieldStatsInfo placeholder0PivotFieldStatsInfo = placeholder0PivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("avg_price", placeholder0PivotFieldStatsInfo.getName()); + assertEquals(2000.0, placeholder0PivotFieldStatsInfo.getMin()); + assertEquals(6400.0, placeholder0PivotFieldStatsInfo.getMax()); + assertEquals(6, (long) placeholder0PivotFieldStatsInfo.getCount()); + assertEquals(0, (long) placeholder0PivotFieldStatsInfo.getMissing()); + assertEquals(22700.0, placeholder0PivotFieldStatsInfo.getSum()); + assertEquals(1.0105E8, placeholder0PivotFieldStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(3783.333333333, (double) placeholder0PivotFieldStatsInfo.getMean(), 0.1E-7); + assertEquals(1741.742422595, placeholder0PivotFieldStatsInfo.getStddev(), 0.1E-7); + + PivotField microsoftPlaceholder0PivotField = placeholder0PivotField.getPivot().get(1); + assertEquals("microsoft", microsoftPlaceholder0PivotField.getValue()); + assertEquals(6, microsoftPlaceholder0PivotField.getCount()); + + FieldStatsInfo microsoftPlaceholder0PivotFieldStatsInfo = microsoftPlaceholder0PivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("avg_price", microsoftPlaceholder0PivotFieldStatsInfo.getName()); + assertEquals(2000.0, microsoftPlaceholder0PivotFieldStatsInfo.getMin()); + assertEquals(6400.0, microsoftPlaceholder0PivotFieldStatsInfo.getMax()); + assertEquals(6, (long) microsoftPlaceholder0PivotFieldStatsInfo.getCount()); + assertEquals(0, (long) microsoftPlaceholder0PivotFieldStatsInfo.getMissing()); + assertEquals(22700.0, microsoftPlaceholder0PivotFieldStatsInfo.getSum()); + assertEquals(1.0105E8, microsoftPlaceholder0PivotFieldStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(3783.333333333, (double) microsoftPlaceholder0PivotFieldStatsInfo.getMean(), 0.1E-7); + assertEquals(1741.742422595, microsoftPlaceholder0PivotFieldStatsInfo.getStddev(), 0.1E-7); } /** diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java index c080b174f4b..54cb064c54d 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java @@ -27,9 +27,8 @@ import java.io.IOException; import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.FieldStatsInfo; import org.apache.solr.client.solrj.response.PivotField; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.ModifiableSolrParams; @@ -68,9 +67,9 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest // the 5 top foo_s terms have 100 docs each on every shard for (int i = 0; i < 100; i++) { for (int j = 0; j < 5; j++) { - shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j)); - shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j)); - shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j)); + shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j, "stat_i", j * 13 - i)); + shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j, "stat_i", j * 3 + i)); + shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j, "stat_i", i * 7 + j)); } } @@ -78,14 +77,14 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest // on both shard0 & shard1 ("bbb_") for (int i = 0; i < 50; i++) { for (int j = 0; j < 20; j++) { - shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j)); - shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j)); + shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j, "stat_i", 0)); + shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j, "stat_i", 1)); } // distracting term appears on only on shard2 50 times shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA")); } // put "bbb0" on shard2 exactly once to sanity check refinement - shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0")); + shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0", "stat_i", -2)); // long 'tail' foo_s term appears in 45 docs on every shard // foo_s:tail is the only term with bar_s sub-pivot terms @@ -95,11 +94,12 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest // but the top 5 terms are ccc(0-4) -- 7 on each shard // (4 docs each have junk terms) String sub_term = (i < 35) ? "ccc"+(i % 5) : ((i < 41) ? "tailB" : "junkA"); - shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term)); - shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term)); + shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i)); + shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i)); // shard2's top 5 sub-pivot terms are junk only it has with 8 docs each // and 5 docs that use "tailB" + // NOTE: none of these get stat_i ! ! sub_term = (i < 40) ? "junkB"+(i % 5) : "tailB"; shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term)); } @@ -175,7 +175,9 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest FacetParams.FACET_OVERREQUEST_RATIO, "0", "facet", "true", "facet.limit", "6", - "facet.pivot", "foo_s,bar_s" ) + "facet.pivot", "{!stats=sxy}foo_s,bar_s", + "stats", "true", + "stats.field", "{!tag=sxy}stat_i") ).getFacetPivot().get("foo_s,bar_s"); assertEquals(6, pivots.size()); for (int i = 0; i < 5; i++) { @@ -183,9 +185,23 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa")); assertEquals(pivot.toString(), 300, pivot.getCount()); } - // even w/o the long tail, we should have still asked shard2 to refine bbb0 - assertTrue(pivots.get(5).toString(), pivots.get(5).getValue().equals("bbb0")); - assertEquals(pivots.get(5).toString(), 101, pivots.get(5).getCount()); + { // even w/o the long tail, we should have still asked shard2 to refine bbb0 + pivot = pivots.get(5); + assertTrue(pivot.toString(), pivot.getValue().equals("bbb0")); + assertEquals(pivot.toString(), 101, pivot.getCount()); + // basic check of refined stats + FieldStatsInfo bbb0Stats = pivot.getFieldStatsInfo().get("stat_i"); + assertEquals("stat_i", bbb0Stats.getName()); + assertEquals(-2.0, bbb0Stats.getMin()); + assertEquals(1.0, bbb0Stats.getMax()); + assertEquals(101, (long) bbb0Stats.getCount()); + assertEquals(0, (long) bbb0Stats.getMissing()); + assertEquals(48.0, bbb0Stats.getSum()); + assertEquals(0.475247524752475, (double) bbb0Stats.getMean(), 0.1E-7); + assertEquals(54.0, bbb0Stats.getSumOfSquares(), 0.1E-7); + assertEquals(0.55846323792, bbb0Stats.getStddev(), 0.1E-7); + } + // with default overrequesting, we should find the correct top 6 including // long tail and top sub-pivots @@ -284,6 +300,65 @@ public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTest assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc")); assertEquals(pivot.toString(), 14, pivot.getCount()); } + + doTestDeepPivotStats(); + } + + public void doTestDeepPivotStats() throws Exception { + // Deep checking of some Facet stats - no refinement involved here + + List pivots = + query("q", "*:*", + "shards", getShardsString(), + "facet", "true", + "rows" , "0", + "facet.pivot","{!stats=s1}foo_s,bar_s", + "stats", "true", + "stats.field", "{!key=avg_price tag=s1}stat_i").getFacetPivot().get("foo_s,bar_s"); + PivotField aaa0PivotField = pivots.get(0); + assertEquals("aaa0", aaa0PivotField.getValue()); + assertEquals(300, aaa0PivotField.getCount()); + + FieldStatsInfo aaa0StatsInfo = aaa0PivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("avg_price", aaa0StatsInfo.getName()); + assertEquals(-99.0, aaa0StatsInfo.getMin()); + assertEquals(693.0, aaa0StatsInfo.getMax()); + assertEquals(300, (long) aaa0StatsInfo.getCount()); + assertEquals(0, (long) aaa0StatsInfo.getMissing()); + assertEquals(34650.0, aaa0StatsInfo.getSum()); + assertEquals(1.674585E7, aaa0StatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(115.5, (double) aaa0StatsInfo.getMean(), 0.1E-7); + assertEquals(206.4493184076, aaa0StatsInfo.getStddev(), 0.1E-7); + + PivotField tailPivotField = pivots.get(5); + assertEquals("tail", tailPivotField.getValue()); + assertEquals(135, tailPivotField.getCount()); + + FieldStatsInfo tailPivotFieldStatsInfo = tailPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("avg_price", tailPivotFieldStatsInfo.getName()); + assertEquals(0.0, tailPivotFieldStatsInfo.getMin()); + assertEquals(44.0, tailPivotFieldStatsInfo.getMax()); + assertEquals(90, (long) tailPivotFieldStatsInfo.getCount()); + assertEquals(45, (long) tailPivotFieldStatsInfo.getMissing()); + assertEquals(1980.0, tailPivotFieldStatsInfo.getSum()); + assertEquals(22.0, (double) tailPivotFieldStatsInfo.getMean(), 0.1E-7); + assertEquals(58740.0, tailPivotFieldStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(13.0599310011, tailPivotFieldStatsInfo.getStddev(), 0.1E-7); + + PivotField tailBPivotField = tailPivotField.getPivot().get(0); + assertEquals("tailB", tailBPivotField.getValue()); + assertEquals(17, tailBPivotField.getCount()); + + FieldStatsInfo tailBPivotFieldStatsInfo = tailBPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("avg_price", tailBPivotFieldStatsInfo.getName()); + assertEquals(35.0, tailBPivotFieldStatsInfo.getMin()); + assertEquals(40.0, tailBPivotFieldStatsInfo.getMax()); + assertEquals(12, (long) tailBPivotFieldStatsInfo.getCount()); + assertEquals(5, (long) tailBPivotFieldStatsInfo.getMissing()); + assertEquals(450.0, tailBPivotFieldStatsInfo.getSum()); + assertEquals(37.5, (double) tailBPivotFieldStatsInfo.getMean(), 0.1E-7); + assertEquals(16910.0, tailBPivotFieldStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(1.78376517, tailBPivotFieldStatsInfo.getStddev(), 0.1E-7); } } diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallAdvancedTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallAdvancedTest.java new file mode 100644 index 00000000000..d428f068c2d --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallAdvancedTest.java @@ -0,0 +1,234 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.FieldStatsInfo; +import org.apache.solr.client.solrj.response.PivotField; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +/** + * tests some edge cases of pivot faceting with stats + * + * NOTE: This test ignores the control collection (in single node mode, there is no + * need for the overrequesting, all the data is local -- so comparisons with it wouldn't + * be valid in some cases we are testing here) + */ +public class DistributedFacetPivotSmallAdvancedTest extends BaseDistributedSearchTestCase { + + public DistributedFacetPivotSmallAdvancedTest() { + this.fixShardCount = true; + this.shardCount = 2; + } + + @Override + public void doTest() throws Exception { + + del("*:*"); + final SolrServer shard0 = clients.get(0); + final SolrServer shard1 = clients.get(1); + + // NOTE: we use the literal (4 character) string "null" as a company name + // to help ensure there isn't any bugs where the literal string is treated as if it + // were a true NULL value. + + // shard0 + shard0.add(sdoc(id, 19, "place_t", "cardiff dublin", + "company_t", "microsoft polecat", + "price_ti", "15", "foo_s", "aaa", "foo_i", 10)); + shard0.add(sdoc(id, 20, "place_t", "dublin", + "company_t", "polecat microsoft null", + "price_ti", "19", "foo_s", "bbb", "foo_i", 4)); + shard0.add(sdoc(id, 21, "place_t", "london la dublin", + "company_t", "microsoft fujitsu null polecat", + "price_ti", "29", "foo_s", "bbb", "foo_i", 3)); + shard0.add(sdoc(id, 22, "place_t", "krakow london cardiff", + "company_t", "polecat null bbc", + "price_ti", "39", "foo_s", "bbb", "foo_i", 6)); + shard0.add(sdoc(id, 23, "place_t", "london", + "company_t", "", + "price_ti", "29", "foo_s", "bbb", "foo_i", 9)); + // shard1 + shard1.add(sdoc(id, 24, "place_t", "la", + "company_t", "", + "foo_s", "aaa", "foo_i", 21)); + shard1.add(sdoc(id, 25, + "company_t", "microsoft polecat null fujitsu null bbc", + "price_ti", "59", "foo_s", "aaa", "foo_i", 5)); + shard1.add(sdoc(id, 26, "place_t", "krakow", + "company_t", "null", + "foo_s", "aaa", "foo_i", 23)); + shard1.add(sdoc(id, 27, "place_t", "krakow cardiff dublin london la", + "company_t", "null microsoft polecat bbc fujitsu", + "foo_s", "aaa", "foo_i", 91)); + shard1.add(sdoc(id, 28, "place_t", "cork", + "company_t", "fujitsu rte", "foo_s", "aaa", "foo_i", 76)); + commit(); + + handle.clear(); + handle.put("QTime", SKIPVAL); + handle.put("timestamp", SKIPVAL); + handle.put("maxScore", SKIPVAL); + + doTestDeepPivotStatsOnString(); + doTestTopStatsWithRefinement(); + } + + /** + * we need to ensure that stats never "overcount" the values from a single shard + * even if we hit that shard with a refinement request + */ + private void doTestTopStatsWithRefinement() throws Exception { + + + ModifiableSolrParams coreParams = params("q", "*:*", "rows", "0", + "stats", "true", + "stats.field", "{!tag=s1}foo_i" ); + ModifiableSolrParams facetParams = new ModifiableSolrParams(coreParams); + facetParams.add(params("facet", "true", + "facet.limit", "1", + "facet.pivot", "{!stats=s1}place_t,company_t")); + + ModifiableSolrParams facetForceRefineParams = new ModifiableSolrParams(facetParams); + facetForceRefineParams.add(params(FacetParams.FACET_OVERREQUEST_COUNT, "0", + FacetParams.FACET_OVERREQUEST_RATIO, "0")); + + for (ModifiableSolrParams params : new ModifiableSolrParams[] { + coreParams, facetParams, facetForceRefineParams }) { + + // for all three sets of these params, the "top level" + // stats in the response of a distributed query should be the same + ModifiableSolrParams q = new ModifiableSolrParams(params); + q.set("shards", getShardsString()); + + QueryResponse rsp = queryServer(q); + FieldStatsInfo fieldStatsInfo = rsp.getFieldStatsInfo().get("foo_i"); + + String msg = q.toString(); + + assertEquals(msg, 3.0, fieldStatsInfo.getMin()); + assertEquals(msg, 91.0, fieldStatsInfo.getMax()); + assertEquals(msg, 10, (long) fieldStatsInfo.getCount()); + assertEquals(msg, 0, (long) fieldStatsInfo.getMissing()); + assertEquals(msg, 248.0, fieldStatsInfo.getSum()); + assertEquals(msg, 15294.0, fieldStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(msg, 24.8, (double) fieldStatsInfo.getMean(), 0.1E-7); + assertEquals(msg, 31.87405772027709, fieldStatsInfo.getStddev(), 0.1E-7); + + if (params.getBool("facet", false)) { + // if this was a facet request, then the top pivot constraint and pivot + // stats should match what we expect - regardless of wether refine + // was used, or if the query was initially satisfied by the default overrequest + + List placePivots = rsp.getFacetPivot().get("place_t,company_t"); + assertEquals(1, placePivots.size()); + + PivotField dublinPivotField = placePivots.get(0); + assertEquals("dublin", dublinPivotField.getValue()); + assertEquals(4, dublinPivotField.getCount()); + assertEquals(1, dublinPivotField.getPivot().size()); + + PivotField microsoftPivotField = dublinPivotField.getPivot().get(0); + assertEquals("microsoft", microsoftPivotField.getValue()); + assertEquals(4, microsoftPivotField.getCount()); + + FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("foo_i"); + assertEquals(3.0D, dublinMicrosoftStatsInfo.getMin()); + assertEquals(91.0D, dublinMicrosoftStatsInfo.getMax()); + assertEquals(4, (long) dublinMicrosoftStatsInfo.getCount()); + assertEquals(0, (long) dublinMicrosoftStatsInfo.getMissing()); + + } + } + + // sanity check that the top pivot from each shard is diff, to prove to + // ourselves that the above queries really must have involved refinement. + Object s0pivValue = clients.get(0) + .query(facetParams).getFacetPivot().get("place_t,company_t").get(0).getValue(); + Object s1pivValue = clients.get(1) + .query(facetParams).getFacetPivot().get("place_t,company_t").get(0).getValue(); + assertFalse("both shards have same top constraint, test is invalid" + + "(did someone change the test data?) ==> " + + s0pivValue + "==" + s1pivValue, s0pivValue.equals(s1pivValue)); + + } + + private void doTestDeepPivotStatsOnString() throws Exception { + SolrParams params = params("q", "*:*", "rows", "0", + "shards", getShardsString(), + "facet", "true", "stats", "true", + "facet.pivot", "{!stats=s1}place_t,company_t", + "stats.field", "{!key=avg_price tag=s1}foo_s"); + QueryResponse rsp = queryServer(new ModifiableSolrParams(params)); + + List placePivots = rsp.getFacetPivot().get("place_t,company_t"); + + PivotField dublinPivotField = placePivots.get(0); + assertEquals("dublin", dublinPivotField.getValue()); + assertEquals(4, dublinPivotField.getCount()); + + PivotField microsoftPivotField = dublinPivotField.getPivot().get(0); + assertEquals("microsoft", microsoftPivotField.getValue()); + assertEquals(4, microsoftPivotField.getCount()); + + FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("aaa", dublinMicrosoftStatsInfo.getMin()); + assertEquals("bbb", dublinMicrosoftStatsInfo.getMax()); + assertEquals(4, (long) dublinMicrosoftStatsInfo.getCount()); + assertEquals(0, (long) dublinMicrosoftStatsInfo.getMissing()); + + PivotField cardiffPivotField = placePivots.get(2); + assertEquals("cardiff", cardiffPivotField.getValue()); + assertEquals(3, cardiffPivotField.getCount()); + + PivotField polecatPivotField = cardiffPivotField.getPivot().get(0); + assertEquals("polecat", polecatPivotField.getValue()); + assertEquals(3, polecatPivotField.getCount()); + + FieldStatsInfo cardiffPolecatStatsInfo = polecatPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("aaa", cardiffPolecatStatsInfo.getMin()); + assertEquals("bbb", cardiffPolecatStatsInfo.getMax()); + assertEquals(3, (long) cardiffPolecatStatsInfo.getCount()); + assertEquals(0, (long) cardiffPolecatStatsInfo.getMissing()); + + PivotField krakowPivotField = placePivots.get(3); + assertEquals("krakow", krakowPivotField.getValue()); + assertEquals(3, krakowPivotField.getCount()); + + PivotField fujitsuPivotField = krakowPivotField.getPivot().get(3); + assertEquals("fujitsu", fujitsuPivotField.getValue()); + assertEquals(1, fujitsuPivotField.getCount()); + + FieldStatsInfo krakowFujitsuStatsInfo = fujitsuPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals("aaa", krakowFujitsuStatsInfo.getMin()); + assertEquals("aaa", krakowFujitsuStatsInfo.getMax()); + assertEquals(1, (long) krakowFujitsuStatsInfo.getCount()); + assertEquals(0, (long) krakowFujitsuStatsInfo.getMissing()); + } + +} diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java index 15a6f3d3217..1407a80fa42 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java @@ -20,9 +20,11 @@ package org.apache.solr.handler.component; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.Date; import java.util.List; import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.response.FieldStatsInfo; import org.apache.solr.client.solrj.response.PivotField; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.params.FacetParams; @@ -46,20 +48,22 @@ public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCas // NOTE: we use the literal (4 character) string "null" as a company name // to help ensure there isn't any bugs where the literal string is treated as if it // were a true NULL value. - index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat"); - index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null"); + index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat", "price_ti", "15"); + index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null", "price_ti", "19", + // this is the only doc to have solo_* fields, therefore only 1 shard has them + // TODO: add enum field - blocked by SOLR-6682 + "solo_i", 42, "solo_s", "lonely", "solo_dt", "1976-03-06T01:23:45Z"); index(id, 21, "place_t", "london la dublin", "company_t", - "microsoft fujitsu null polecat"); + "microsoft fujitsu null polecat", "price_ti", "29"); index(id, 22, "place_t", "krakow london cardiff", "company_t", - "polecat null bbc"); - index(id, 23, "place_t", "london", "company_t", ""); + "polecat null bbc", "price_ti", "39"); + index(id, 23, "place_t", "london", "company_t", "", "price_ti", "29"); index(id, 24, "place_t", "la", "company_t", ""); - index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc"); + index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc", "price_ti", "59"); index(id, 26, "place_t", "krakow", "company_t", "null"); - index(id, 27, "place_t", "krakow cardiff dublin london la", "company_t", - "null microsoft polecat bbc fujitsu"); - index(id, 28, "place_t", "cork", "company_t", - "fujitsu rte"); + index(id, 27, "place_t", "krakow cardiff dublin london la", + "company_t", "null microsoft polecat bbc fujitsu"); + index(id, 28, "place_t", "cork", "company_t", "fujitsu rte"); commit(); handle.clear(); @@ -332,6 +336,76 @@ public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCas throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); } } + + doTestDeepPivotStats(); + + doTestPivotStatsFromOneShard(); + } + + private void doTestDeepPivotStats() throws Exception { + SolrParams params = params("q", "*:*", "rows", "0", + "facet", "true", "stats", "true", + "facet.pivot", "{!stats=s1}place_t,company_t", + "stats.field", "{!key=avg_price tag=s1}price_ti"); + QueryResponse rsp = query(params); + + List placePivots = rsp.getFacetPivot().get("place_t,company_t"); + + PivotField dublinPivotField = placePivots.get(0); + assertEquals("dublin", dublinPivotField.getValue()); + assertEquals(4, dublinPivotField.getCount()); + + PivotField microsoftPivotField = dublinPivotField.getPivot().get(0); + assertEquals("microsoft", microsoftPivotField.getValue()); + assertEquals(4, microsoftPivotField.getCount()); + + FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals(15.0, dublinMicrosoftStatsInfo.getMin()); + assertEquals(29.0, dublinMicrosoftStatsInfo.getMax()); + assertEquals(3, (long) dublinMicrosoftStatsInfo.getCount()); + assertEquals(1, (long) dublinMicrosoftStatsInfo.getMissing()); + assertEquals(63.0, dublinMicrosoftStatsInfo.getSum()); + assertEquals(1427.0, dublinMicrosoftStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(21.0, (double) dublinMicrosoftStatsInfo.getMean(), 0.1E-7); + assertEquals(7.211102550927978, dublinMicrosoftStatsInfo.getStddev(), 0.1E-7); + + + PivotField cardiffPivotField = placePivots.get(2); + assertEquals("cardiff", cardiffPivotField.getValue()); + assertEquals(3, cardiffPivotField.getCount()); + + PivotField polecatPivotField = cardiffPivotField.getPivot().get(0); + assertEquals("polecat", polecatPivotField.getValue()); + assertEquals(3, polecatPivotField.getCount()); + + FieldStatsInfo cardiffPolecatStatsInfo = polecatPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals(15.0, cardiffPolecatStatsInfo.getMin()); + assertEquals(39.0, cardiffPolecatStatsInfo.getMax()); + assertEquals(2, (long) cardiffPolecatStatsInfo.getCount()); + assertEquals(1, (long) cardiffPolecatStatsInfo.getMissing()); + assertEquals(54.0, cardiffPolecatStatsInfo.getSum()); + assertEquals(1746.0, cardiffPolecatStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(27.0, (double) cardiffPolecatStatsInfo.getMean(), 0.1E-7); + assertEquals(16.97056274847714, cardiffPolecatStatsInfo.getStddev(), 0.1E-7); + + + PivotField krakowPivotField = placePivots.get(3); + assertEquals("krakow", krakowPivotField.getValue()); + assertEquals(3, krakowPivotField.getCount()); + + PivotField fujitsuPivotField = krakowPivotField.getPivot().get(3); + assertEquals("fujitsu", fujitsuPivotField.getValue()); + assertEquals(1, fujitsuPivotField.getCount()); + + FieldStatsInfo krakowFujitsuStatsInfo = fujitsuPivotField.getFieldStatsInfo().get("avg_price"); + assertEquals(null, krakowFujitsuStatsInfo.getMin()); + assertEquals(null, krakowFujitsuStatsInfo.getMax()); + assertEquals(0, (long) krakowFujitsuStatsInfo.getCount()); + assertEquals(1, (long) krakowFujitsuStatsInfo.getMissing()); + assertEquals(0.0, krakowFujitsuStatsInfo.getSum()); + assertEquals(0.0, krakowFujitsuStatsInfo.getSumOfSquares(), 0.1E-7); + assertEquals(Double.NaN, (double) krakowFujitsuStatsInfo.getMean(), 0.1E-7); + assertEquals(0.0, krakowFujitsuStatsInfo.getStddev(), 0.1E-7); } // Useful to check for errors, orders lists and does toString() equality check @@ -351,6 +425,46 @@ public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCas } assertEquals(expectedPlacePivots.toString(), placePivots.toString()); } + + /** + * sanity check the stat values nested under a pivot when at least one shard + * has nothing but missing values for the stat + */ + private void doTestPivotStatsFromOneShard() throws Exception { + SolrParams params = params("q", "*:*", "rows", "0", + "facet", "true", "stats", "true", + "facet.pivot", "{!stats=s1}place_t,company_t", + "stats.field", "{!tag=s1}solo_i", + "stats.field", "{!tag=s1}solo_s", + "stats.field", "{!tag=s1}solo_dt"); + + QueryResponse rsp = query(params); + + List placePivots = rsp.getFacetPivot().get("place_t,company_t"); + + PivotField placePivot = placePivots.get(0); + assertEquals("dublin", placePivot.getValue()); + assertEquals(4, placePivot.getCount()); + + PivotField companyPivot = placePivot.getPivot().get(2); + assertEquals("null", companyPivot.getValue()); + assertEquals(3, companyPivot.getCount()); + + for (PivotField pf : new PivotField[] { placePivot, companyPivot }) { + assertThereCanBeOnlyOne(pf, pf.getFieldStatsInfo().get("solo_s"), "lonely"); + + assertThereCanBeOnlyOne(pf, pf.getFieldStatsInfo().get("solo_i"), 42.0D); + assertEquals(pf.getField()+":"+pf.getValue()+": int mean", + 42.0D, pf.getFieldStatsInfo().get("solo_i").getMean()); + + Object expected = new Date(194923425000L); // 1976-03-06T01:23:45Z + assertThereCanBeOnlyOne(pf, pf.getFieldStatsInfo().get("solo_dt"), expected); + assertEquals(pf.getField()+":"+pf.getValue()+": date mean", + expected, pf.getFieldStatsInfo().get("solo_dt").getMean()); + + // TODO: add enum field asserts - blocked by SOLR-6682 + } + } private void testCountSorting(List pivots) { Integer lastCount = null; @@ -365,12 +479,27 @@ public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCas } } + /** + * given a PivotField, a FieldStatsInfo, and a value; asserts that: + *
    + *
  • stat count == 1
  • + *
  • stat missing == pivot count - 1
  • + *
  • stat min == stat max == value
  • + *
+ */ + private void assertThereCanBeOnlyOne(PivotField pf, FieldStatsInfo stats, Object val) { + String msg = pf.getField() + ":" + pf.getValue(); + assertEquals(msg + " stats count", 1L, (long) stats.getCount()); + assertEquals(msg + " stats missing", pf.getCount()-1L, (long) stats.getMissing()); + assertEquals(msg + " stats min", val, stats.getMin()); + assertEquals(msg + " stats max", val, stats.getMax()); + } + public static class ComparablePivotField extends PivotField { - public ComparablePivotField(String f, Object v, int count, - List pivot) { - super(f,v,count,pivot); + public ComparablePivotField(String f, Object v, int count, List pivot) { + super(f,v,count,pivot, null); } @Override diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotWhiteBoxTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotWhiteBoxTest.java new file mode 100644 index 00000000000..d17af16e13e --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotWhiteBoxTest.java @@ -0,0 +1,138 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.response.PivotField; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; + +import java.util.List; + +public class DistributedFacetPivotWhiteBoxTest extends BaseDistributedSearchTestCase { + + public DistributedFacetPivotWhiteBoxTest() { + this.fixShardCount = true; + this.shardCount = 4; + } + + @Override + public void doTest() throws Exception { + + del("*:*"); + + // NOTE: we use the literal (4 character) string "null" as a company name + // to help ensure there isn't any bugs where the literal string is treated as if it + // were a true NULL value. + index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat", "price_ti", "15"); + index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null", "price_ti", "19", + // this is the only doc to have solo_* fields, therefore only 1 shard has them + // TODO: add enum field - blocked by SOLR-6682 + "solo_i", 42, "solo_s", "lonely", "solo_dt", "1976-03-06T01:23:45Z"); + index(id, 21, "place_t", "krakow london la dublin", "company_t", + "microsoft fujitsu null polecat", "price_ti", "29"); + index(id, 22, "place_t", "krakow london cardiff", "company_t", + "polecat null bbc", "price_ti", "39"); + index(id, 23, "place_t", "krakow london", "company_t", "", "price_ti", "29"); + index(id, 24, "place_t", "krakow la", "company_t", ""); + index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc", "price_ti", "59"); + index(id, 26, "place_t", "krakow", "company_t", "null"); + index(id, 27, "place_t", "krakow cardiff dublin london la", + "company_t", "null microsoft polecat bbc fujitsu"); + index(id, 28, "place_t", "krakow cork", "company_t", "fujitsu rte"); + commit(); + + handle.clear(); + handle.put("QTime", SKIPVAL); + handle.put("timestamp", SKIPVAL); + handle.put("maxScore", SKIPVAL); + + doShardTestTopStats(); + doTestRefinementRequest(); + } + + /** + * recreates the initial request to a shard in a distributed query + * confirming that both top level stats, and per-pivot stats are returned. + */ + private void doShardTestTopStats() throws Exception { + + SolrParams params = params("facet", "true", + "q", "*:*", + // "wt", "javabin", + "facet.pivot", "{!stats=s1}place_t,company_t", + // "version", "2", + "start", "0", "rows", "0", + "fsv", "true", + "fl", "id,score", + "stats", "true", + "stats.field", "{!key=avg_price tag=s1}price_ti", + "f.place_t.facet.limit", "160", + "f.place_t.facet.pivot.mincount", "0", + "f.company_t.facet.limit", "160", + "f.company_t.facet.pivot.mincount", "0", + "isShard", "true", "distrib", "false"); + QueryResponse rsp = queryServer(new ModifiableSolrParams(params)); + + assertNotNull("initial shard request should include non-null top level stats", + rsp.getFieldStatsInfo()); + assertFalse("initial shard request should include top level stats", + rsp.getFieldStatsInfo().isEmpty()); + + List placePivots = rsp.getFacetPivot().get("place_t,company_t"); + for (PivotField pivotField : placePivots) { + assertFalse("pivot stats should not be empty in initial request", + pivotField.getFieldStatsInfo().isEmpty()); + } + } + + /** + * recreates a pivot refinement request to a shard in a distributed query + * confirming that the per-pivot stats are returned, but not the top level stats + * because they shouldn't be overcounted. + */ + private void doTestRefinementRequest() throws Exception { + SolrParams params = params("facet.missing", "true", + "facet", "true", + "facet.limit", "4", + "distrib", "false", + // "wt", "javabin", + // "version", "2", + "rows", "0", + "facet.sort", "index", + "fpt0", "~krakow", + "facet.pivot.mincount", "-1", + "isShard", "true", + "facet.pivot", "{!fpt=0 stats=st1}place_t,company_t", + "stats", "false", + "stats.field", "{!key=sk1 tag=st1,st2}price_ti"); + QueryResponse rsp = clients.get(0).query(new ModifiableSolrParams(params)); + + assertNull("pivot refine request should *NOT* include top level stats", + rsp.getFieldStatsInfo()); + + List placePivots = rsp.getFacetPivot().get("place_t,company_t"); + + assertEquals("asked to refine exactly one place", + 1, placePivots.size()); + assertFalse("pivot stats should not be empty in refinement request", + placePivots.get(0).getFieldStatsInfo().isEmpty()); + + } +} diff --git a/solr/core/src/test/org/apache/solr/handler/component/FacetPivotSmallTest.java b/solr/core/src/test/org/apache/solr/handler/component/FacetPivotSmallTest.java new file mode 100644 index 00000000000..0b56707195e --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/FacetPivotSmallTest.java @@ -0,0 +1,504 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.BeforeClass; + +/** + * Single node testing of pivot facets + */ +public class FacetPivotSmallTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml", "schema11.xml"); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + clearIndex(); + assertU(commit()); + lrf = h.getRequestFactory("standard", 0, 20); + } + + /** + * we don't support comma's in the "stats" local param ... yet: SOLR-6663 + */ + public void testStatsTagHasComma() throws Exception { + + if (random().nextBoolean()) { + // behavior should be same either way + index(); + } + + assertQEx("Can't use multiple tags in stats local param until SOLR-6663 is decided", + req("q","*:*", "facet", "true", + "stats", "true", + "stats.field", "{!tag=foo}price_ti", + "stats.field", "{!tag=bar}id", + "facet.pivot", "{!stats=foo,bar}place_t,company_t"), + 400); + } + + /** + * if bogus stats are requested, the pivots should still work + */ + public void testBogusStatsTag() throws Exception { + index(); + + assertQ(req("q","*:*", "facet", "true", + "facet.pivot", "{!stats=bogus}place_t,company_t") + // check we still get pivots... + , "//arr[@name='place_t,company_t']/lst[str[@name='value'][.='dublin']]" + // .. but sanity check we don't have any stats + , "count(//arr[@name='place_t,company_t']/lst[str[@name='value'][.='dublin']]/lst[@name='stats'])=0"); + } + + public void testPivotFacetUnsorted() throws Exception { + index(); + + final ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("facet", "true"); + params.add("facet.pivot", "place_t,company_t"); + + SolrQueryRequest req = req(params); + final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst"; + assertQ(req, facetPivotPrefix + "/str[@name='field'][.='place_t']", + // dublin + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=4]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=4]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", + // london + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=2]", + // cardiff + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", + // krakow + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=1]", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", + + // la + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", + // cork + facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=1]", + facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='rte']", + facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=1]" + ); + } + + public void testPivotFacetStatsUnsortedTagged() throws Exception { + index(); + + final ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("facet", "true"); + params.add("facet.pivot", "{!stats=s1}place_t,company_t"); + params.add("stats", "true"); + params.add("stats.field", "{!key=avg_price tag=s1 mean=true}price_ti"); + + SolrQueryRequest req = req(params); + final String statsPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst"; + String dublinMicrosoftStats = statsPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[str[@name='value'][.='microsoft']]/lst[@name='stats']/lst[@name='stats_fields']/lst[@name='avg_price']"; + String cardiffPolecatStats = statsPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[str[@name='value'][.='polecat']]/lst[@name='stats']/lst[@name='stats_fields']/lst[@name='avg_price']"; + String krakowFujitsuStats = statsPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[str[@name='value'][.='fujitsu']]/lst[@name='stats']/lst[@name='stats_fields']/lst[@name='avg_price']"; + assertQ(req, + dublinMicrosoftStats + "/double[@name='min'][.=15.0]", + dublinMicrosoftStats + "/double[@name='max'][.=29.0]", + dublinMicrosoftStats + "/long[@name='count'][.=3]", + dublinMicrosoftStats + "/long[@name='missing'][.=1]", + dublinMicrosoftStats + "/double[@name='sum'][.=63.0]", + dublinMicrosoftStats + "/double[@name='sumOfSquares'][.=1427.0]", + dublinMicrosoftStats + "/double[@name='mean'][.=21.0]", + dublinMicrosoftStats + "/double[@name='stddev'][.=7.211102550927978]", + + cardiffPolecatStats + "/double[@name='min'][.=15.0]", + cardiffPolecatStats + "/double[@name='max'][.=39.0]", + cardiffPolecatStats + "/long[@name='count'][.=2]", + cardiffPolecatStats + "/long[@name='missing'][.=1]", + cardiffPolecatStats + "/double[@name='sum'][.=54.0]", + cardiffPolecatStats + "/double[@name='sumOfSquares'][.=1746.0]", + cardiffPolecatStats + "/double[@name='mean'][.=27.0]", + cardiffPolecatStats + "/double[@name='stddev'][.=16.97056274847714]", + + krakowFujitsuStats + "/null[@name='min']", + krakowFujitsuStats + "/null[@name='max']", + krakowFujitsuStats + "/long[@name='count'][.=0]", + krakowFujitsuStats + "/long[@name='missing'][.=1]", + krakowFujitsuStats + "/double[@name='sum'][.=0.0]", + krakowFujitsuStats + "/double[@name='sumOfSquares'][.=0.0]", + krakowFujitsuStats + "/double[@name='mean'][.='NaN']", + krakowFujitsuStats + "/double[@name='stddev'][.=0.0]" + ); + } + + + public void testPivotFacetSortedCount() throws Exception { + index(); + + final ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("facet", "true"); + params.add("facet.pivot", "place_t,company_t"); + + // Test sorting by count + //TODO clarify why facet count active by default + // The default is count if facet.limit is greater than 0, index otherwise, but facet.limit was not defined + params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT); + final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst"; + SolrQueryRequest req = req(params); + assertQ(req, facetPivotPrefix + "/str[@name='field'][.='place_t']", + // dublin + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=4]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=4]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", + // london + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=2]", + // cardiff + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='cardiff']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", + // krakow + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=1]", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='krakow']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", + + // la + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[5]/str[@name='value'][.='bbc']", + facetPivotPrefix + "[str[@name='value'][.='la']]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", + // cork + facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=1]", + facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='rte']", + facetPivotPrefix + "[str[@name='value'][.='cork']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=1]" + ); + + + } + + + public void testPivotFacetLimit() throws Exception { + index(); + + final ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("facet", "true"); + params.add("facet.pivot", "place_t,company_t"); + + params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT); + params.set(FacetParams.FACET_LIMIT, 2); + + final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst"; + SolrQueryRequest req = req(params); + assertQ(req, facetPivotPrefix + "/str[@name='field'][.='place_t']", + // dublin + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=4]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=4]", + // london + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='london']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=3]" + ); + } + + public void testPivotIndividualFacetLimit() throws Exception { + index(); + + final ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("facet", "true"); + params.add("facet.pivot", "place_t,company_t"); + + params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT); + params.set("f.place_t." + FacetParams.FACET_LIMIT, 1); + params.set("f.company_t." + FacetParams.FACET_LIMIT, 4); + + final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst"; + SolrQueryRequest req = req(params); + assertQ(req, facetPivotPrefix + "/str[@name='field'][.='place_t']", + // dublin + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[1]/int[@name='count'][.=4]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/str[@name='value'][.='polecat']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[2]/int[@name='count'][.=4]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/str[@name='value'][.='null']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[3]/int[@name='count'][.=3]", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "[str[@name='value'][.='dublin']]/arr[@name='pivot']/lst[4]/int[@name='count'][.=2]" + ); + } + + public void testPivotFacetMissing() throws Exception { + // Test facet.missing=true with diff sorts + index(); + indexMissing(); + + SolrParams missingA = params("q", "*:*", + "rows", "0", + "facet", "true", + "facet.pivot", "place_t,company_t", + // default facet.sort + FacetParams.FACET_MISSING, "true"); + + final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='place_t,company_t']/lst"; + SolrQueryRequest req = req(missingA); + assertQ(req, facetPivotPrefix + "/arr[@name='pivot'][count(.) > 0]", // not enough values for pivot + facetPivotPrefix + "[7]/null[@name='value'][.='']", // not the missing place value + facetPivotPrefix + "[7]/int[@name='count'][.=2]", // wrong missing place count + facetPivotPrefix + "[7]/arr[@name='pivot'][count(.) > 0]", // not enough sub-pivots for missing place + facetPivotPrefix + "[7]/arr[@name='pivot']/lst[6]/null[@name='value'][.='']", // not the missing company value + facetPivotPrefix + "[7]/arr[@name='pivot']/lst[6]/int[@name='count'][.=1]", // wrong missing company count + facetPivotPrefix + "[7]/arr[@name='pivot']/lst[6][not(arr[@name='pivot'])]" // company shouldn't have sub-pivots + ); + + SolrParams missingB = SolrParams.wrapDefaults(missingA, + params(FacetParams.FACET_LIMIT, "4", + "facet.sort", "index")); + + + req = req(missingB); + assertQ(req, facetPivotPrefix + "/arr[@name='pivot'][count(.) > 0]", // not enough values for pivot + facetPivotPrefix + "[5]/null[@name='value'][.='']", // not the missing place value + facetPivotPrefix + "[5]/int[@name='count'][.=2]", // wrong missing place count + facetPivotPrefix + "[5]/arr[@name='pivot'][count(.) > 0]", // not enough sub-pivots for missing place + facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5]/null[@name='value'][.='']", // not the missing company value + facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", // wrong missing company count + facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5][not(arr[@name='pivot'])]" // company shouldn't have sub-pivots + ); + } + + public void testPivotFacetIndexSortMincountAndLimit() throws Exception { + // sort=index + mincount + limit + index(); + indexMissing(); + + for (SolrParams variableParams : new SolrParams[]{ + // we should get the same results regardless of overrequest + params(), + params()}) { + SolrParams p = SolrParams.wrapDefaults(params("q", "*:*", + "rows", "0", + "facet", "true", + "facet.pivot", "company_t", + "facet.sort", "index", + "facet.pivot.mincount", "4", + "facet.limit", "4"), + variableParams); + final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='company_t']"; + SolrQueryRequest req = req(p); + assertQ(req, facetPivotPrefix + "[count(./lst) = 4]", // not enough values for pivot + facetPivotPrefix + "/lst[1]/str[@name='value'][.='fujitsu']", + facetPivotPrefix + "/lst[1]/int[@name='count'][.=4]", + facetPivotPrefix + "/lst[2]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "/lst[2]/int[@name='count'][.=5]", + facetPivotPrefix + "/lst[3]/str[@name='value'][.='null']", + facetPivotPrefix + "/lst[3]/int[@name='count'][.=6]", + facetPivotPrefix + "/lst[4]/str[@name='value'][.='polecat']", + facetPivotPrefix + "/lst[4]/int[@name='count'][.=6]" + ); + } + } + + public void testPivotFacetIndexSortMincountLimitAndOffset() throws Exception { + // sort=index + mincount + limit + offset + index(); + indexMissing(); + + for (SolrParams variableParams : new SolrParams[]{ + // we should get the same results regardless of overrequest + params(), + params()}) { + SolrParams p = SolrParams.wrapDefaults(params("q", "*:*", + "rows", "0", + "facet", "true", + "facet.pivot", "company_t", + "facet.sort", "index", + "facet.pivot.mincount", "4", + "facet.offset", "1", + "facet.limit", "4"), + variableParams); + final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='company_t']"; + SolrQueryRequest req = req(p); + assertQ(req, facetPivotPrefix + "[count(./lst) = 3]", // asked for 4, but not enough meet the mincount + facetPivotPrefix + "/lst[1]/str[@name='value'][.='microsoft']", + facetPivotPrefix + "/lst[1]/int[@name='count'][.=5]", + facetPivotPrefix + "/lst[2]/str[@name='value'][.='null']", + facetPivotPrefix + "/lst[2]/int[@name='count'][.=6]", + facetPivotPrefix + "/lst[3]/str[@name='value'][.='polecat']", + facetPivotPrefix + "/lst[3]/int[@name='count'][.=6]" + ); + } + } + + + public void testPivotFacetIndexSortMincountLimitAndOffsetPermutations() throws Exception { + // sort=index + mincount + limit + offset (more permutations) + index(); + indexMissing(); + + for (SolrParams variableParams : new SolrParams[]{ + // all of these combinations should result in the same first value + params("facet.pivot.mincount", "4", + "facet.offset", "2"), + params("facet.pivot.mincount", "5", + "facet.offset", "1"), + params("facet.pivot.mincount", "6", + "facet.offset", "0")}) { + SolrParams p = SolrParams.wrapDefaults(params("q", "*:*", + "rows", "0", + "facet", "true", + "facet.limit", "1", + "facet.sort", "index", + "facet.overrequest.ratio", "0", + "facet.pivot", "company_t"), + variableParams); + final String facetPivotPrefix = "//lst[@name='facet_counts']/lst[@name='facet_pivot']/arr[@name='company_t']"; + SolrQueryRequest req = req(p); + assertQ(req, facetPivotPrefix + "[count(./lst) = 1]", // asked for 4, but not enough meet the mincount + facetPivotPrefix + "/lst[1]/str[@name='value'][.='null']", + facetPivotPrefix + "/lst[1]/int[@name='count'][.=6]" + ); + } + } + + private void indexMissing() { + String[] missingDoc = {"id", "777"}; + assertU(adoc(missingDoc)); + assertU(commit()); + } + + private void index() { + // NOTE: we use the literal (4 character) string "null" as a company name + // to help ensure there isn't any bugs where the literal string is treated as if it + // were a true NULL value. + String[] doc = {"id", "19", "place_t", "cardiff dublin", "company_t", "microsoft polecat", "price_ti", "15"}; + assertU(adoc(doc)); + String[] doc1 = {"id", "20", "place_t", "dublin", "company_t", "polecat microsoft null", "price_ti", "19"}; + assertU(adoc(doc1)); + String[] doc2 = {"id", "21", "place_t", "london la dublin", "company_t", + "microsoft fujitsu null polecat", "price_ti", "29"}; + assertU(adoc(doc2)); + String[] doc3 = {"id", "22", "place_t", "krakow london cardiff", "company_t", + "polecat null bbc", "price_ti", "39"}; + assertU(adoc(doc3)); + String[] doc4 = {"id", "23", "place_t", "london", "company_t", "", "price_ti", "29"}; + assertU(adoc(doc4)); + String[] doc5 = {"id", "24", "place_t", "la", "company_t", ""}; + assertU(adoc(doc5)); + String[] doc6 = {"id", "25", "company_t", "microsoft polecat null fujitsu null bbc", "price_ti", "59"}; + assertU(adoc(doc6)); + String[] doc7 = {"id", "26", "place_t", "krakow", "company_t", "null"}; + assertU(adoc(doc7)); + String[] doc8 = {"id", "27", "place_t", "krakow cardiff dublin london la", "company_t", + "null microsoft polecat bbc fujitsu"}; + assertU(adoc(doc8)); + String[] doc9 = {"id", "28", "place_t", "cork", "company_t", + "fujitsu rte"}; + assertU(adoc(doc9)); + assertU(commit()); + } +} diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java index 19365055e0f..aa124d837aa 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java @@ -806,6 +806,13 @@ public class SolrQuery extends ModifiableSolrParams this.add( StatsParams.STATS_FIELD, field ); } + + public void addGetFieldStatistics( String ... field ) + { + this.set( StatsParams.STATS, true ); + this.add( StatsParams.STATS_FIELD, field ); + } + public void addStatsFieldFacets( String field, String ... facets ) { if( field == null ) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java index 3c178481bbb..9685832aaf4 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java @@ -180,6 +180,10 @@ public class FieldStatsInfo implements Serializable { return stddev; } + public Double getSumOfSquares() { + return sumOfSquares; + } + public Map> getFacets() { return facets; } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/PivotField.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/PivotField.java index d7009154373..3b084f6bddf 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/PivotField.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/PivotField.java @@ -20,6 +20,7 @@ package org.apache.solr.client.solrj.response; import java.io.PrintStream; import java.io.Serializable; import java.util.List; +import java.util.Map; public class PivotField implements Serializable { @@ -27,13 +28,23 @@ public class PivotField implements Serializable final Object _value; final int _count; final List _pivot; - - public PivotField( String f, Object v, int count, List pivot ) + final Map _statsInfo; + + /** + * @deprecated Use {@link #PivotField(String,Object,int,List,Map)} with a null statsInfo + */ + @Deprecated + public PivotField( String f, Object v, int count, List pivot) { + this(f, v, count, pivot, null); + } + + public PivotField( String f, Object v, int count, List pivot, Map statsInfo) { _field = f; _value = v; _count = count; _pivot = pivot; + _statsInfo = statsInfo; } public String getField() { @@ -52,6 +63,10 @@ public class PivotField implements Serializable return _pivot; } + public Map getFieldStatsInfo() { + return _statsInfo; + } + @Override public String toString() { @@ -63,7 +78,16 @@ public class PivotField implements Serializable for( int i=0; istats:[" ); + for( FieldStatsInfo fieldStatsInfo : _statsInfo.values() ) { + out.print(fieldStatsInfo.toString()); + out.print(","); + } + out.print("]"); + } + out.println(); if( _pivot != null ) { for( PivotField p : _pivot ) { p.write( out, indent+1 ); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java index 20d3e7b52fa..cc165e5eccd 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java @@ -23,6 +23,7 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.beans.DocumentObjectBinder; @@ -163,19 +164,25 @@ public class QueryResponse extends SolrResponseBase } private void extractStatsInfo(NamedList info) { + _fieldStatsInfo = extractFieldStatsInfo(info); + } + + private Map extractFieldStatsInfo(NamedList info) { if( info != null ) { - _fieldStatsInfo = new HashMap<>(); + Map fieldStatsInfoMap = new TreeMap<>(); NamedList> ff = (NamedList>) info.get( "stats_fields" ); if( ff != null ) { for( Map.Entry> entry : ff ) { NamedList v = entry.getValue(); if( v != null ) { - _fieldStatsInfo.put( entry.getKey(), + fieldStatsInfoMap.put( entry.getKey(), new FieldStatsInfo( v, entry.getKey() ) ); } } } + return fieldStatsInfoMap; } + return null; } private void extractDebugInfo( NamedList debug ) @@ -396,14 +403,38 @@ public class QueryResponse extends SolrResponseBase Object v = nl.getVal( 1 ); assert "count".equals(nl.getName(2)); int cnt = ((Integer)nl.getVal( 2 )).intValue(); - List p = null; + + List subPivots = null; + Map fieldStatsInfos = null; + if (4 <= nl.size()) { - assert "pivot".equals(nl.getName(3)); - Object subPiv = nl.getVal(3); - assert null != subPiv : "Server sent back 'null' for sub pivots?"; - p = readPivots( (List) subPiv ); + for(int index = 3; index < nl.size(); index++) { + final String key = nl.getName(index); + final Object val = nl.getVal(index); + switch (key) { + + case "pivot": { + assert null != val : "Server sent back 'null' for sub pivots?"; + assert val instanceof List : "Server sent non-List for sub pivots?"; + + subPivots = readPivots( (List) val ); + break; + } + case "stats": { + assert null != val : "Server sent back 'null' for stats?"; + assert val instanceof NamedList : "Server sent non-NamedList for stats?"; + + fieldStatsInfos = extractFieldStatsInfo((NamedList) val); + break; + } + default: + throw new RuntimeException( "unknown key in pivot: "+ key+ " ["+val+"]"); + + } + } } - values.add( new PivotField( f, v, cnt, p ) ); + + values.add( new PivotField( f, v, cnt, subPivots, fieldStatsInfos ) ); } return values; } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java index d47698a462f..0f57a0a885b 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java @@ -57,12 +57,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Random; -import java.util.Set; /** * This should include tests against the example solr config @@ -814,6 +811,197 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase doPivotFacetTest(false); } + @Test + public void testPivotFacetsStats() throws Exception { + SolrServer server = getSolrServer(); + + // Empty the database... + server.deleteByQuery("*:*");// delete everything! + server.commit(); + assertNumFound("*:*", 0); // make sure it got in + + int id = 1; + ArrayList docs = new ArrayList<>(); + docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "apple", "cat", "a", "inStock", true, "popularity", 12, "price", .017)); + docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "lg", "cat", "a", "inStock", false, "popularity", 13, "price", 16.04)); + docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "samsung", "cat", "a", "inStock", true, "popularity", 14, "price", 12.34)); + docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "lg", "cat", "b", "inStock", false, "popularity", 24, "price", 51.39)); + docs.add(makeTestDoc("id", id++, "features", "aaa", "manu", "nokia", "cat", "b", "inStock", true, "popularity", 28, "price", 131.39)); + docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "ztc", "cat", "a", "inStock", false, "popularity", 32)); + docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "htc", "cat", "a", "inStock", true, "popularity", 31, "price", 131.39)); + docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "apple", "cat", "b", "inStock", false, "popularity", 36)); + docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "lg", "cat", "b", "inStock", true, "popularity", 37, "price", 1.39)); + docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "ztc", "cat", "b", "inStock", false, "popularity", 38, "price", 47.98)); + docs.add(makeTestDoc("id", id++, "features", "bbb", "manu", "ztc", "cat", "b", "inStock", true, "popularity", -38)); + docs.add(makeTestDoc("id", id++, "cat", "b")); // something not matching all fields + server.add(docs); + server.commit(); + + for (String pivot : new String[] { "{!key=pivot_key stats=s1}features,manu", + "{!key=pivot_key stats=s1}features,manu,cat", + "{!key=pivot_key stats=s1}features,manu,cat,inStock" + }) { + + // for any of these pivot params, the assertions we check should be teh same + // (we stop asserting at the "manu" level) + + SolrQuery query = new SolrQuery("*:*"); + query.addFacetPivotField(pivot); + query.setFacetLimit(1); + query.addGetFieldStatistics("{!key=foo_price tag=s1}price", "{!tag=s1}popularity"); + query.setFacetMinCount(0); + query.setRows(0); + + QueryResponse rsp = server.query(query); + + // check top (ie: non-pivot) stats + Map map = rsp.getFieldStatsInfo(); + FieldStatsInfo intValueStatsInfo = map.get("popularity"); + assertEquals(-38.0d, intValueStatsInfo.getMin()); + assertEquals(38.0d, intValueStatsInfo.getMax()); + assertEquals(11l, intValueStatsInfo.getCount().longValue()); + assertEquals(1l, intValueStatsInfo.getMissing().longValue()); + assertEquals(227.0d, intValueStatsInfo.getSum()); + assertEquals(20.636363636363637d, intValueStatsInfo.getMean()); + + FieldStatsInfo doubleValueStatsInfo = map.get("foo_price"); + assertEquals(.017d, (double) doubleValueStatsInfo.getMin(), .01d); + assertEquals(131.39d, (double) doubleValueStatsInfo.getMax(), .01d); + assertEquals(8l, doubleValueStatsInfo.getCount().longValue()); + assertEquals(4l, doubleValueStatsInfo.getMissing().longValue()); + assertEquals(391.93d, (double) doubleValueStatsInfo.getSum(), .01d); + assertEquals(48.99d, (double) doubleValueStatsInfo.getMean(), .01d); + + // now get deeper and look at the pivots... + + NamedList> pivots = rsp.getFacetPivot(); + assertTrue( ! pivots.get("pivot_key").isEmpty() ); + + List list = pivots.get("pivot_key"); + PivotField featuresBBBPivot = list.get(0); + assertEquals("features", featuresBBBPivot.getField()); + assertEquals("bbb", featuresBBBPivot.getValue()); + assertNotNull(featuresBBBPivot.getFieldStatsInfo()); + assertEquals(2, featuresBBBPivot.getFieldStatsInfo().size()); + + FieldStatsInfo featuresBBBPivotStats1 = featuresBBBPivot.getFieldStatsInfo().get("foo_price"); + assertEquals("foo_price", featuresBBBPivotStats1.getName()); + assertEquals(131.39d, (double) featuresBBBPivotStats1.getMax(), .01d); + assertEquals(1.38d, (double) featuresBBBPivotStats1.getMin(), .01d); + assertEquals(180.75d, (double) featuresBBBPivotStats1.getSum(), .01d); + assertEquals(3, (long) featuresBBBPivotStats1.getCount()); + assertEquals(3, (long) featuresBBBPivotStats1.getMissing()); + assertEquals(60.25d, (double) featuresBBBPivotStats1.getMean(), .01d); + assertEquals(65.86d, featuresBBBPivotStats1.getStddev(), .01d); + assertEquals(19567.34d, featuresBBBPivotStats1.getSumOfSquares(), .01d); + + FieldStatsInfo featuresBBBPivotStats2 = featuresBBBPivot.getFieldStatsInfo().get("popularity"); + assertEquals("popularity", featuresBBBPivotStats2.getName()); + assertEquals(38.0d, (double) featuresBBBPivotStats2.getMax(), .01d); + assertEquals(-38.0d, (double) featuresBBBPivotStats2.getMin(), .01d); + assertEquals(136.0d, (double) featuresBBBPivotStats2.getSum(), .01d); + assertEquals(6, (long) featuresBBBPivotStats2.getCount()); + assertEquals(0, (long) featuresBBBPivotStats2.getMissing()); + assertEquals(22.66d, (double) featuresBBBPivotStats2.getMean(), .01d); + assertEquals(29.85d, featuresBBBPivotStats2.getStddev(), .01d); + assertEquals(7538.0d, featuresBBBPivotStats2.getSumOfSquares(), .01d); + + List nestedPivotList = featuresBBBPivot.getPivot(); + PivotField featuresBBBPivotPivot = nestedPivotList.get(0); + assertEquals("manu", featuresBBBPivotPivot.getField()); + assertEquals("ztc", featuresBBBPivotPivot.getValue()); + assertNotNull(featuresBBBPivotPivot.getFieldStatsInfo()); + assertEquals(2, featuresBBBPivotPivot.getFieldStatsInfo().size()); + + FieldStatsInfo featuresBBBManuZtcPivotStats1 = featuresBBBPivotPivot.getFieldStatsInfo().get("foo_price"); + assertEquals("foo_price", featuresBBBManuZtcPivotStats1.getName()); + assertEquals(47.97d, (double) featuresBBBManuZtcPivotStats1.getMax(), .01d); + assertEquals(47.97d, (double) featuresBBBManuZtcPivotStats1.getMin(), .01d); + assertEquals(47.97d, (double) featuresBBBManuZtcPivotStats1.getSum(), .01d); + assertEquals(1, (long) featuresBBBManuZtcPivotStats1.getCount()); + assertEquals(2, (long) featuresBBBManuZtcPivotStats1.getMissing()); + assertEquals(47.97d, (double) featuresBBBManuZtcPivotStats1.getMean(), .01d); + assertEquals(0.0d, featuresBBBManuZtcPivotStats1.getStddev(), .01d); + assertEquals(2302.08d, featuresBBBManuZtcPivotStats1.getSumOfSquares(), .01d); + + + FieldStatsInfo featuresBBBManuZtcPivotStats2 = featuresBBBPivotPivot.getFieldStatsInfo().get("popularity"); + assertEquals("popularity", featuresBBBManuZtcPivotStats2.getName()); + assertEquals(38.0d, (double) featuresBBBManuZtcPivotStats2.getMax(), .01d); + assertEquals(-38.0d, (double) featuresBBBManuZtcPivotStats2.getMin(), .01d); + assertEquals(32.0, (double) featuresBBBManuZtcPivotStats2.getSum(), .01d); + assertEquals(3, (long) featuresBBBManuZtcPivotStats2.getCount()); + assertEquals(0, (long) featuresBBBManuZtcPivotStats2.getMissing()); + assertEquals(10.66d, (double) featuresBBBManuZtcPivotStats2.getMean(), .01d); + assertEquals(42.25d, featuresBBBManuZtcPivotStats2.getStddev(), .01d); + assertEquals(3912.0d, featuresBBBManuZtcPivotStats2.getSumOfSquares(), .01d); + } + } + + @Test + public void testPivotFacetsStatsNotSupported() throws Exception { + SolrServer server = getSolrServer(); + + // Empty the database... + server.deleteByQuery("*:*");// delete everything! + server.commit(); + assertNumFound("*:*", 0); // make sure it got in + + // results of this test should be the same regardless of wether any docs in index + if (random().nextBoolean()) { + server.add(makeTestDoc("id", 1, "features", "aaa", "cat", "a", "inStock", true, "popularity", 12, "price", .017)); + server.commit(); + } + + ignoreException("is not currently supported"); + + // boolean field + SolrQuery query = new SolrQuery("*:*"); + query.addFacetPivotField("{!stats=s1}features,manu"); + query.addGetFieldStatistics("{!key=inStock_val tag=s1}inStock"); + try { + server.query(query); + fail("SolrException should be thrown on query"); + } catch (SolrException e) { + assertEquals("Pivot facet on boolean is not currently supported, bad request returned", 400, e.code()); + assertTrue(e.getMessage().contains("is not currently supported")); + assertTrue(e.getMessage().contains("boolean")); + } + + // asking for multiple stat tags -- see SOLR-6663 + query = new SolrQuery("*:*"); + query.addFacetPivotField("{!stats=tag1,tag2}features,manu"); + query.addGetFieldStatistics("{!tag=tag1}price", "{!tag=tag2}popularity"); + query.setFacetMinCount(0); + query.setRows(0); + try { + server.query(query); + fail("SolrException should be thrown on query"); + } catch (SolrException e) { + assertEquals(400, e.code()); + assertTrue(e.getMessage().contains("stats")); + assertTrue(e.getMessage().contains("comma")); + assertTrue(e.getMessage().contains("tag")); + } + + // text field + query = new SolrQuery("*:*"); + query.addFacetPivotField("{!stats=s1}features,manu"); + query.addGetFieldStatistics("{!tag=s1}features"); + query.setFacetMinCount(0); + query.setRows(0); + try { + server.query(query); + fail("SolrException should be thrown on query"); + } catch (SolrException e) { + assertEquals("Pivot facet on string is not currently supported, bad request returned", 400, e.code()); + assertTrue(e.getMessage().contains("is not currently supported")); + assertTrue(e.getMessage().contains("text_general")); + } + + + } + public void testPivotFacetsMissing() throws Exception { doPivotFacetTest(true); }