From 910d467a93398670c6a16ed79da5672da34514d5 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Wed, 13 Aug 2014 18:23:53 +0000 Subject: [PATCH] SOLR-2894: Distributed query support for facet.pivot git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1617789 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 2 + .../handler/component/FacetComponent.java | 890 ++++++++++++------ .../solr/handler/component/PivotFacet.java | 164 ++++ .../handler/component/PivotFacetField.java | 386 ++++++++ .../PivotFacetFieldValueCollection.java | 342 +++++++ .../handler/component/PivotFacetHelper.java | 311 ++---- .../component/PivotFacetProcessor.java | 252 +++++ .../handler/component/PivotFacetValue.java | 206 ++++ .../solr/handler/component/ShardRequest.java | 1 + .../org/apache/solr/request/SimpleFacets.java | 53 +- .../org/apache/solr/util/PivotListEntry.java | 62 ++ .../org/apache/solr/CursorPagingTest.java | 35 +- .../apache/solr/TestDistributedSearch.java | 10 +- .../solr/cloud/TestCloudPivotFacet.java | 530 +++++++++++ .../DistributedFacetPivotLargeTest.java | 762 +++++++++++++++ .../DistributedFacetPivotLongTailTest.java | 289 ++++++ .../DistributedFacetPivotSmallTest.java | 439 +++++++++ .../component/TestPivotHelperCode.java | 118 +++ .../test/org/apache/solr/util/TestUtils.java | 24 + .../client/solrj/response/QueryResponse.java | 11 +- .../solr/common/params/FacetParams.java | 18 + .../org/apache/solr/common/util/StrUtils.java | 40 +- .../java/org/apache/solr/SolrTestCaseJ4.java | 42 + 23 files changed, 4423 insertions(+), 564 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/handler/component/PivotFacet.java create mode 100644 solr/core/src/java/org/apache/solr/handler/component/PivotFacetField.java create mode 100644 solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java create mode 100644 solr/core/src/java/org/apache/solr/handler/component/PivotFacetProcessor.java create mode 100644 solr/core/src/java/org/apache/solr/handler/component/PivotFacetValue.java create mode 100644 solr/core/src/java/org/apache/solr/util/PivotListEntry.java create mode 100644 solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java create mode 100644 solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java create mode 100644 solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java create mode 100644 solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java create mode 100644 solr/core/src/test/org/apache/solr/handler/component/TestPivotHelperCode.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 3128a5b99e8..bd5368183a0 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -188,6 +188,8 @@ New Features * SOLR-6304 : JsonLoader should be able to flatten an input JSON to multiple docs (Noble Paul) +* SOLR-2894: Distributed query support for facet.pivot (Dan Cooper, Erik Hatcher, Chris Russell, + Andrew Muldowney, Brett Lucey, Mark Miller, hossman) Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java index 6f571a0d08a..f386bc0d8b6 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java @@ -21,12 +21,15 @@ import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Map.Entry; import org.apache.lucene.util.FixedBitSet; import org.apache.solr.common.SolrException; @@ -52,333 +55,515 @@ import org.slf4j.LoggerFactory; * * @since solr 1.3 */ -public class FacetComponent extends SearchComponent -{ +@SuppressWarnings("rawtypes") +public class FacetComponent extends SearchComponent { public static Logger log = LoggerFactory.getLogger(FacetComponent.class); - + public static final String COMPONENT_NAME = "facet"; + + private static final String PIVOT_KEY = "facet_pivot"; + private static final String PIVOT_REFINE_PREFIX = "{!"+PivotFacet.REFINE_PARAM+"="; - static final String PIVOT_KEY = "facet_pivot"; + /** + * incrememented counter used to track the values being refined in a given request. + * This counter is used in conjunction with {@link PivotFacet#REFINE_PARAM} to identify + * which refinement values are associated with which pivots + */ + int pivotRefinementCounter = 0; @Override - public void prepare(ResponseBuilder rb) throws IOException - { - if (rb.req.getParams().getBool(FacetParams.FACET,false)) { - rb.setNeedDocSet( true ); + public void prepare(ResponseBuilder rb) throws IOException { + if (rb.req.getParams().getBool(FacetParams.FACET, false)) { + rb.setNeedDocSet(true); rb.doFacets = true; } } - + /** * Actually run the query */ @Override - public void process(ResponseBuilder rb) throws IOException - { + public void process(ResponseBuilder rb) throws IOException { if (rb.doFacets) { SolrParams params = rb.req.getParams(); - SimpleFacets f = new SimpleFacets(rb.req, - rb.getResults().docSet, - params, - rb ); + SimpleFacets f = new SimpleFacets(rb.req, rb.getResults().docSet, params, rb); + NamedList counts = f.getFacetCounts(); - String[] pivots = params.getParams( FacetParams.FACET_PIVOT ); - if( pivots != null && pivots.length > 0 ) { - PivotFacetHelper pivotHelper = new PivotFacetHelper(rb.req, - rb.getResults().docSet, - params, - rb ); - NamedList v = pivotHelper.process(pivots); - if( v != null ) { - counts.add( PIVOT_KEY, v ); + String[] pivots = params.getParams(FacetParams.FACET_PIVOT); + if (pivots != null && pivots.length > 0) { + PivotFacetProcessor pivotProcessor + = new PivotFacetProcessor(rb.req, rb.getResults().docSet, params, rb); + SimpleOrderedMap>> v + = pivotProcessor.process(pivots); + if (v != null) { + counts.add(PIVOT_KEY, v); } } - // TODO ???? add this directly to the response, or to the builder? - rb.rsp.add( "facet_counts", counts ); + rb.rsp.add("facet_counts", counts); } } - + private static final String commandPrefix = "{!" + CommonParams.TERMS + "=$"; - + @Override public int distributedProcess(ResponseBuilder rb) throws IOException { if (!rb.doFacets) { return ResponseBuilder.STAGE_DONE; } - + if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { - // overlap facet refinement requests (those shards that we need a count for - // particular facet values from), where possible, with + // overlap facet refinement requests (those shards that we need a count + // for particular facet values from), where possible, with // the requests to get fields (because we know that is the // only other required phase). // We do this in distributedProcess so we can look at all of the // requests in the outgoing queue at once. - - - - for (int shardNum=0; shardNum refinements = null; - + + for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) { + List distribFieldFacetRefinements = null; + for (DistribFieldFacet dff : rb._facetInfo.facets.values()) { if (!dff.needRefinements) continue; List refList = dff._toRefine[shardNum]; - if (refList == null || refList.size()==0) continue; - - String key = dff.getKey(); // reuse the same key that was used for the main facet + if (refList == null || refList.size() == 0) continue; + + String key = dff.getKey(); // reuse the same key that was used for the + // main facet String termsKey = key + "__terms"; String termsVal = StrUtils.join(refList, ','); - + String facetCommand; // add terms into the original facet.field command // do it via parameter reference to avoid another layer of encoding. - + String termsKeyEncoded = QueryParsing.encodeLocalParamVal(termsKey); if (dff.localParams != null) { - facetCommand = commandPrefix+termsKeyEncoded + " " + dff.facetStr.substring(2); + facetCommand = commandPrefix + termsKeyEncoded + " " + + dff.facetStr.substring(2); } else { - facetCommand = commandPrefix+termsKeyEncoded+'}'+dff.field; + facetCommand = commandPrefix + termsKeyEncoded + '}' + dff.field; } - - if (refinements == null) { - refinements = new ArrayList<>(); + + if (distribFieldFacetRefinements == null) { + distribFieldFacetRefinements = new ArrayList<>(); } - - refinements.add(facetCommand); - refinements.add(termsKey); - refinements.add(termsVal); + + distribFieldFacetRefinements.add(facetCommand); + distribFieldFacetRefinements.add(termsKey); + distribFieldFacetRefinements.add(termsVal); } + + boolean pivotFacetRefinementRequestsExistForShard = + doAnyPivotFacetRefinementRequestsExistForShard(rb._facetInfo, shardNum); - if (refinements == null) continue; - - + if (distribFieldFacetRefinements == null + && !pivotFacetRefinementRequestsExistForShard) { + // nothing to refine, short circut out + continue; + } + String shard = rb.shards[shardNum]; - ShardRequest refine = null; + ShardRequest shardsRefineRequest = null; boolean newRequest = false; - + // try to find a request that is already going out to that shard. - // If nshards becomes to great, we way want to move to hashing for better - // scalability. + // If nshards becomes to great, we way want to move to hashing for + // better scalability. for (ShardRequest sreq : rb.outgoing) { - if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0 - && sreq.shards != null - && sreq.shards.length==1 - && sreq.shards[0].equals(shard)) - { - refine = sreq; + if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0 + && sreq.shards != null + && sreq.shards.length == 1 + && sreq.shards[0].equals(shard)) { + shardsRefineRequest = sreq; break; } } - - if (refine == null) { - // we didn't find any other suitable requests going out to that shard, so - // create one ourselves. + + if (shardsRefineRequest == null) { + // we didn't find any other suitable requests going out to that shard, + // so create one ourselves. newRequest = true; - refine = new ShardRequest(); - refine.shards = new String[]{rb.shards[shardNum]}; - refine.params = new ModifiableSolrParams(rb.req.getParams()); + shardsRefineRequest = new ShardRequest(); + shardsRefineRequest.shards = new String[] { rb.shards[shardNum] }; + shardsRefineRequest.params = new ModifiableSolrParams(rb.req.getParams()); // don't request any documents - refine.params.remove(CommonParams.START); - refine.params.set(CommonParams.ROWS,"0"); + shardsRefineRequest.params.remove(CommonParams.START); + shardsRefineRequest.params.set(CommonParams.ROWS, "0"); } - - refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS; - refine.params.set(FacetParams.FACET, "true"); - refine.params.remove(FacetParams.FACET_FIELD); - refine.params.remove(FacetParams.FACET_QUERY); - - for (int i=0; i> pivotFacetRefinements, + ResponseBuilder rb, int shardNum) { + + FacetInfo fi = rb._facetInfo; + + ShardRequest shardsRefineRequestPivot = new ShardRequest(); + shardsRefineRequestPivot.shards = new String[] {rb.shards[shardNum]}; + shardsRefineRequestPivot.params = new ModifiableSolrParams(rb.req.getParams()); - @Override - public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) { - if (!rb.doFacets) return; + // don't request any documents + shardsRefineRequestPivot.params.remove(CommonParams.START); + shardsRefineRequestPivot.params.set(CommonParams.ROWS, "0"); + + shardsRefineRequestPivot.purpose |= ShardRequest.PURPOSE_REFINE_PIVOT_FACETS; + shardsRefineRequestPivot.params.set(FacetParams.FACET, "true"); + shardsRefineRequestPivot.params.remove(FacetParams.FACET_PIVOT_MINCOUNT); + shardsRefineRequestPivot.params.set(FacetParams.FACET_PIVOT_MINCOUNT, -1); + shardsRefineRequestPivot.params.remove(FacetParams.FACET_PIVOT); + shardsRefineRequestPivot.params.remove(FacetParams.FACET_OFFSET); + + for (int pivotIndex = 0; pivotIndex < fi.pivotFacets.size(); pivotIndex++) { + String pivotFacetKey = fi.pivotFacets.getName(pivotIndex); + PivotFacet pivotFacet = fi.pivotFacets.getVal(pivotIndex); - if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) { - sreq.purpose |= ShardRequest.PURPOSE_GET_FACETS; + List queuedRefinementsForShard = + pivotFacet.getQueuedRefinements(shardNum); - FacetInfo fi = rb._facetInfo; - if (fi == null) { - rb._facetInfo = fi = new FacetInfo(); - fi.parse(rb.req.getParams(), rb); - // should already be true... - // sreq.params.set(FacetParams.FACET, "true"); + if ( ! queuedRefinementsForShard.isEmpty() ) { + + String fieldsKey = PivotFacet.REFINE_PARAM + pivotRefinementCounter; + String command; + + if (pivotFacet.localParams != null) { + command = PIVOT_REFINE_PREFIX + pivotRefinementCounter + " " + + pivotFacet.facetStr.substring(2); + } else { + command = PIVOT_REFINE_PREFIX + pivotRefinementCounter + "}" + + pivotFacet.getKey(); + } + + shardsRefineRequestPivot.params.add(FacetParams.FACET_PIVOT, command); + for (PivotFacetValue refinementValue : queuedRefinementsForShard) { + String refinementStr = PivotFacetHelper + .encodeRefinementValuePath(refinementValue.getValuePath()); + shardsRefineRequestPivot.params.add(fieldsKey, refinementStr); + } - - sreq.params.remove(FacetParams.FACET_MINCOUNT); - sreq.params.remove(FacetParams.FACET_OFFSET); - sreq.params.remove(FacetParams.FACET_LIMIT); - - for (DistribFieldFacet dff : fi.facets.values()) { - String paramStart = "f." + dff.field + '.'; - sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT); - sreq.params.remove(paramStart + FacetParams.FACET_OFFSET); - - dff.initialLimit = dff.limit <= 0 ? dff.limit : dff.offset + dff.limit; - - if (dff.sort.equals(FacetParams.FACET_SORT_COUNT)) { - if (dff.limit > 0) { - // set the initial limit higher to increase accuracy - dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10; - dff.initialMincount = 0; // TODO: we could change this to 1, but would then need more refinement for small facet result sets? - } else { - // if limit==-1, then no need to artificially lower mincount to 0 if it's 1 - dff.initialMincount = Math.min(dff.minCount, 1); - } - } else { - // we're sorting by index order. - // if minCount==0, we should always be able to get accurate results w/o over-requesting or refining - // if minCount==1, we should be able to get accurate results w/o over-requesting, but we'll need to refine - // if minCount==n (>1), we can set the initialMincount to minCount/nShards, rounded up. - // For example, we know that if minCount=10 and we have 3 shards, then at least one shard must have a count of 4 for the term - // For the minCount>1 case, we can generate too short of a list (miss terms at the end of the list) unless limit==-1 - // For example: each shard could produce a list of top 10, but some of those could fail to make it into the combined list (i.e. - // we needed to go beyond the top 10 to generate the top 10 combined). Overrequesting can help a little here, but not as - // much as when sorting by count. - if (dff.minCount <= 1) { - dff.initialMincount = dff.minCount; - } else { - dff.initialMincount = (int)Math.ceil((double)dff.minCount / rb.slices.length); - // dff.initialMincount = 1; - } - } - - if (dff.initialMincount != 0) { - sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount); - } - - // Currently this is for testing only and allows overriding of the - // facet.limit set to the shards - dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit); - - sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit); } + pivotRefinementCounter++; + } + + rb.addRequest(this, shardsRefineRequestPivot); + } + + public void modifyRequest(ResponseBuilder rb, SearchComponent who,ShardRequest sreq) { + + if (!rb.doFacets) return; + + if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) { + sreq.purpose |= ShardRequest.PURPOSE_GET_FACETS; + + FacetInfo fi = rb._facetInfo; + if (fi == null) { + rb._facetInfo = fi = new FacetInfo(); + fi.parse(rb.req.getParams(), rb); + } + + modifyRequestForFieldFacets(rb, sreq, fi); + + modifyRequestForPivotFacets(rb, sreq, fi.pivotFacets); + + sreq.params.remove(FacetParams.FACET_MINCOUNT); + sreq.params.remove(FacetParams.FACET_OFFSET); + } else { // turn off faceting on other requests sreq.params.set(FacetParams.FACET, "false"); // we could optionally remove faceting params } } + + private void modifyRequestForFieldFacets(ResponseBuilder rb, ShardRequest sreq, FacetInfo fi) { + for (DistribFieldFacet dff : fi.facets.values()) { + + String paramStart = "f." + dff.field + '.'; + sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT); + sreq.params.remove(paramStart + FacetParams.FACET_OFFSET); + + dff.initialLimit = dff.limit <= 0 ? dff.limit : dff.offset + dff.limit; + + if (dff.sort.equals(FacetParams.FACET_SORT_COUNT)) { + if (dff.limit > 0) { + // set the initial limit higher to increase accuracy + dff.initialLimit = doOverRequestMath(dff.initialLimit, dff.overrequestRatio, + dff.overrequestCount); + dff.initialMincount = 0; // TODO: we could change this to 1, but would + // then need more refinement for small facet + // result sets? + } else { + // if limit==-1, then no need to artificially lower mincount to 0 if + // it's 1 + dff.initialMincount = Math.min(dff.minCount, 1); + } + } else { + // we're sorting by index order. + // if minCount==0, we should always be able to get accurate results w/o + // over-requesting or refining + // if minCount==1, we should be able to get accurate results w/o + // over-requesting, but we'll need to refine + // if minCount==n (>1), we can set the initialMincount to + // minCount/nShards, rounded up. + // For example, we know that if minCount=10 and we have 3 shards, then + // at least one shard must have a count of 4 for the term + // For the minCount>1 case, we can generate too short of a list (miss + // terms at the end of the list) unless limit==-1 + // For example: each shard could produce a list of top 10, but some of + // those could fail to make it into the combined list (i.e. + // we needed to go beyond the top 10 to generate the top 10 combined). + // Overrequesting can help a little here, but not as + // much as when sorting by count. + if (dff.minCount <= 1) { + dff.initialMincount = dff.minCount; + } else { + dff.initialMincount = (int) Math.ceil((double) dff.minCount / rb.slices.length); + } + } + + // Currently this is for testing only and allows overriding of the + // facet.limit set to the shards + dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit); + + sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit); + sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount); + } + } + + private void modifyRequestForPivotFacets(ResponseBuilder rb, + ShardRequest sreq, + SimpleOrderedMap pivotFacets) { + for (Entry pfwEntry : pivotFacets) { + PivotFacet pivot = pfwEntry.getValue(); + for (String pivotField : StrUtils.splitSmart(pivot.getKey(), ',')) { + modifyRequestForIndividualPivotFacets(rb, sreq, pivotField); + } + } + } + + private void modifyRequestForIndividualPivotFacets(ResponseBuilder rb, ShardRequest sreq, + String fieldToOverRequest) { + + final SolrParams originalParams = rb.req.getParams(); + final String paramStart = "f." + fieldToOverRequest + "."; + + final int requestedLimit = originalParams.getFieldInt(fieldToOverRequest, + FacetParams.FACET_LIMIT, 100); + sreq.params.remove(paramStart + FacetParams.FACET_LIMIT); + + final int offset = originalParams.getFieldInt(fieldToOverRequest, + FacetParams.FACET_OFFSET, 0); + sreq.params.remove(paramStart + FacetParams.FACET_OFFSET); + + final double overRequestRatio = originalParams.getFieldDouble + (fieldToOverRequest, FacetParams.FACET_OVERREQUEST_RATIO, 1.5); + sreq.params.remove(paramStart + FacetParams.FACET_OVERREQUEST_RATIO); + + final int overRequestCount = originalParams.getFieldInt + (fieldToOverRequest, FacetParams.FACET_OVERREQUEST_COUNT, 10); + sreq.params.remove(paramStart + FacetParams.FACET_OVERREQUEST_COUNT); + + final int requestedMinCount = originalParams.getFieldInt + (fieldToOverRequest, FacetParams.FACET_PIVOT_MINCOUNT, 1); + sreq.params.remove(paramStart + FacetParams.FACET_PIVOT_MINCOUNT); + + final String defaultSort = (requestedLimit > 0) + ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX; + final String sort = originalParams.getFieldParam + (fieldToOverRequest, FacetParams.FACET_SORT, defaultSort); + + int shardLimit = requestedLimit + offset; + int shardMinCount = requestedMinCount; + + // per-shard mincount & overrequest + if ( FacetParams.FACET_SORT_INDEX.equals(sort) && + 1 < requestedMinCount && + 0 < requestedLimit) { + + // We can divide the mincount by num shards rounded up, because unless + // a single shard has at least that many it can't compete... + shardMinCount = (int) Math.ceil((double) requestedMinCount / rb.slices.length); + + // ...but we still need to overrequest to reduce chances of missing something + shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount); + + // (for mincount <= 1, no overrequest needed) + + } else if ( FacetParams.FACET_SORT_COUNT.equals(sort) ) { + if ( 0 < requestedLimit ) { + shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount); + shardMinCount = 0; + } else { + shardMinCount = Math.min(requestedMinCount, 1); + } + } + sreq.params.set(paramStart + FacetParams.FACET_LIMIT, shardLimit); + sreq.params.set(paramStart + FacetParams.FACET_PIVOT_MINCOUNT, shardMinCount); + } + + private int doOverRequestMath(int limit, double ratio, int count) { + // NOTE: normally, "1.0F < ratio" + // + // if the user chooses a ratio < 1, we allow it and don't "bottom out" at + // the original limit until *after* we've also added the count. + int adjustedLimit = (int) (limit * ratio) + count; + return Math.max(limit, adjustedLimit); + } + @Override public void handleResponses(ResponseBuilder rb, ShardRequest sreq) { if (!rb.doFacets) return; - - if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) { + + if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS) != 0) { countFacets(rb, sreq); - } else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) { - refineFacets(rb, sreq); + } else { + // at present PURPOSE_REFINE_FACETS and PURPOSE_REFINE_PIVOT_FACETS + // don't co-exist in individual requests, but don't assume that + // will always be the case + if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS) != 0) { + refineFacets(rb, sreq); + } + if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_PIVOT_FACETS) != 0) { + refinePivotFacets(rb, sreq); + } } } - - - - + private void countFacets(ResponseBuilder rb, ShardRequest sreq) { FacetInfo fi = rb._facetInfo; - - for (ShardResponse srsp: sreq.responses) { + + for (ShardResponse srsp : sreq.responses) { int shardNum = rb.getShardNum(srsp.getShard()); NamedList facet_counts = null; try { - facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts"); - } - catch(Exception ex) { - if(rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) { + facet_counts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts"); + } catch (Exception ex) { + if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) { continue; // looks like a shard did not return anything } - throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to read facet info for shard: "+srsp.getShard(), ex); + throw new SolrException(ErrorCode.SERVER_ERROR, + "Unable to read facet info for shard: " + srsp.getShard(), ex); } - + // handle facet queries - NamedList facet_queries = (NamedList)facet_counts.get("facet_queries"); + NamedList facet_queries = (NamedList) facet_counts.get("facet_queries"); if (facet_queries != null) { - for (int i=0; i pivotFacet : fi.pivotFacets) { + pivotFacet.getValue().queuePivotRefinementRequests(); } - + // // This code currently assumes that there will be only a single // request ((with responses from all shards) sent out to get facets... // otherwise we would need to wait until all facet responses were received. // - for (DistribFieldFacet dff : fi.facets.values()) { - // no need to check these facets for refinement + // no need to check these facets for refinement if (dff.initialLimit <= 0 && dff.initialMincount <= 1) continue; - + // only other case where index-sort doesn't need refinement is if minCount==0 if (dff.minCount <= 1 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue; - + @SuppressWarnings("unchecked") // generic array's are annoying List[] tmp = (List[]) new List[rb.shards.length]; dff._toRefine = tmp; - + ShardFacetCount[] counts = dff.getCountSorted(); - int ntop = Math.min(counts.length, dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE); - long smallestCount = counts.length == 0 ? 0 : counts[ntop-1].count; - - for (int i=0; i= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE); + long smallestCount = counts.length == 0 ? 0 : counts[ntop - 1].count; + + for (int i = 0; i < counts.length; i++) { ShardFacetCount sfc = counts[i]; boolean needRefinement = false; - - if (i= smallestCount, then flag for refinement long maxCount = sfc.count; - for (int shardNum=0; shardNum= fbs.length() || !fbs.get(sfc.termNum))) { // fbs can be null if a shard request failed + // fbs can be null if a shard request failed + if (fbs != null && (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum))) { // if missing from this shard, add the max it could be - maxCount += dff.maxPossible(sfc,shardNum); + maxCount += dff.maxPossible(sfc, shardNum); } } if (maxCount >= smallestCount) { @@ -386,12 +571,16 @@ public class FacetComponent extends SearchComponent needRefinement = true; } } - + if (needRefinement) { // add a query for each shard missing the term that needs refinement - for (int shardNum=0; shardNum= fbs.length() || !fbs.get(sfc.termNum)) && dff.maxPossible(sfc,shardNum)>0) { + // fbs can be null if a shard request failed + if (fbs != null && + (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) && + dff.maxPossible(sfc, shardNum) > 0) { + dff.needRefinements = true; List lst = dff._toRefine[shardNum]; if (lst == null) { @@ -405,7 +594,6 @@ public class FacetComponent extends SearchComponent } } - // // The implementation below uses the first encountered shard's // facet_intervals as the basis for subsequent shards' data to be merged. private void doDistribIntervals(FacetInfo fi, NamedList facet_counts) { @@ -456,7 +644,6 @@ public class FacetComponent extends SearchComponent // // The implementation below uses the first encountered shard's // facet_ranges as the basis for subsequent shards' data to be merged. - private void doDistribRanges(FacetInfo fi, NamedList facet_counts) { @SuppressWarnings("unchecked") SimpleOrderedMap> facet_ranges = @@ -544,35 +731,48 @@ public class FacetComponent extends SearchComponent } } + private void doDistribPivots(ResponseBuilder rb, int shardNum, NamedList facet_counts) { + @SuppressWarnings("unchecked") + SimpleOrderedMap>> facet_pivot + = (SimpleOrderedMap>>) facet_counts.get(PIVOT_KEY); + + if (facet_pivot != null) { + for (Map.Entry>> pivot : facet_pivot) { + final String pivotName = pivot.getKey(); + PivotFacet facet = rb._facetInfo.pivotFacets.get(pivotName); + facet.mergeResponseFromShard(shardNum, rb, pivot.getValue()); + } + } + } + private void refineFacets(ResponseBuilder rb, ShardRequest sreq) { FacetInfo fi = rb._facetInfo; - - for (ShardResponse srsp: sreq.responses) { + + for (ShardResponse srsp : sreq.responses) { // int shardNum = rb.getShardNum(srsp.shard); - NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts"); - NamedList facet_fields = (NamedList)facet_counts.get("facet_fields"); - - if (facet_fields == null) continue; // this can happen when there's an exception - - for (int i=0; i>> pivotFacetResponsesFromShard + = (NamedList>>) facetCounts.get(PIVOT_KEY); + if (null == pivotFacetResponsesFromShard) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "No pivot refinement response from shard: " + srsp.getShard()); + } + + for (Entry>> pivotFacetResponseFromShard : pivotFacetResponsesFromShard) { + PivotFacet masterPivotFacet = fi.pivotFacets.get(pivotFacetResponseFromShard.getKey()); + masterPivotFacet.mergeResponseFromShard(shardNumber, rb, pivotFacetResponseFromShard.getValue()); + masterPivotFacet.removeAllRefinementsForShard(shardNumber); + } + } + + if (allPivotFacetsAreFullyRefined(fi)) { + for (Entry pf : fi.pivotFacets) { + pf.getValue().queuePivotRefinementRequests(); + } + reQueuePivotFacetShardRequests(rb); + } + } + + private boolean allPivotFacetsAreFullyRefined(FacetInfo fi) { + + for (Entry pf : fi.pivotFacets) { + if (pf.getValue().isRefinementsRequired()) { + return false; + } + } + return true; + } + + private boolean doAnyPivotFacetRefinementRequestsExistForShard(FacetInfo fi, + int shardNum) { + for (int i = 0; i < fi.pivotFacets.size(); i++) { + PivotFacet pf = fi.pivotFacets.getVal(i); + if ( ! pf.getQueuedRefinements(shardNum).isEmpty() ) { + return true; + } + } + return false; + } + + private void reQueuePivotFacetShardRequests(ResponseBuilder rb) { + for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) { + if (doAnyPivotFacetRefinementRequestsExistForShard(rb._facetInfo, shardNum)) { + enqueuePivotFacetShardRequests(null, rb, shardNum); + } + } + } + @Override public void finishStage(ResponseBuilder rb) { + pivotRefinementCounter = 0; if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return; // wait until STAGE_GET_FIELDS // so that "result" is already stored in the response (for aesthetics) - - + FacetInfo fi = rb._facetInfo; - + NamedList facet_counts = new SimpleOrderedMap<>(); - + NamedList facet_queries = new SimpleOrderedMap<>(); - facet_counts.add("facet_queries",facet_queries); + facet_counts.add("facet_queries", facet_queries); for (QueryFacet qf : fi.queryFacets.values()) { facet_queries.add(qf.getKey(), num(qf.count)); } - + NamedList facet_fields = new SimpleOrderedMap<>(); facet_counts.add("facet_fields", facet_fields); - + for (DistribFieldFacet dff : fi.facets.values()) { - NamedList fieldCounts = new NamedList<>(); // order is more important for facets + // order is important for facet values, so use NamedList + NamedList fieldCounts = new NamedList<>(); facet_fields.add(dff.getKey(), fieldCounts); - + ShardFacetCount[] counts; boolean countSorted = dff.sort.equals(FacetParams.FACET_SORT_COUNT); if (countSorted) { @@ -613,14 +876,15 @@ public class FacetComponent extends SearchComponent counts = dff.getCountSorted(); } } else if (dff.sort.equals(FacetParams.FACET_SORT_INDEX)) { - counts = dff.getLexSorted(); + counts = dff.getLexSorted(); } else { // TODO: log error or throw exception? - counts = dff.getLexSorted(); + counts = dff.getLexSorted(); } - + if (countSorted) { - int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length); - for (int i=dff.offset; i= 0 ? dff.limit : Integer.MAX_VALUE; - + // index order... - for (int i=0; i 0) { @@ -655,11 +919,30 @@ public class FacetComponent extends SearchComponent facet_counts.add("facet_ranges", fi.rangeFacets); facet_counts.add("facet_intervals", fi.intervalFacets); + if (fi.pivotFacets != null && fi.pivotFacets.size() > 0) { + facet_counts.add(PIVOT_KEY, createPivotFacetOutput(rb)); + } + rb.rsp.add("facet_counts", facet_counts); rb._facetInfo = null; // could be big, so release asap } + private SimpleOrderedMap>> createPivotFacetOutput(ResponseBuilder rb) { + + SimpleOrderedMap>> combinedPivotFacets = new SimpleOrderedMap<>(); + for (Entry entry : rb._facetInfo.pivotFacets) { + String key = entry.getKey(); + PivotFacet pivot = entry.getValue(); + List> trimmedPivots = pivot.getTrimmedPivotsAsListOfNamedLists(rb); + if (null == trimmedPivots) { + trimmedPivots = Collections.>emptyList(); + } + + combinedPivotFacets.add(key, trimmedPivots); + } + return combinedPivotFacets; + } // use tags for smaller facet counts (better back compatibility) private Number num(long val) { @@ -699,6 +982,8 @@ public class FacetComponent extends SearchComponent = new SimpleOrderedMap<>(); public SimpleOrderedMap> intervalFacets = new SimpleOrderedMap<>(); + public SimpleOrderedMap pivotFacets + = new SimpleOrderedMap<>(); void parse(SolrParams params, ResponseBuilder rb) { queryFacets = new LinkedHashMap<>(); @@ -711,71 +996,83 @@ public class FacetComponent extends SearchComponent queryFacets.put(queryFacet.getKey(), queryFacet); } } - + String[] facetFs = params.getParams(FacetParams.FACET_FIELD); if (facetFs != null) { - + for (String field : facetFs) { DistribFieldFacet ff = new DistribFieldFacet(rb, field); facets.put(ff.getKey(), ff); } } + + // Develop Pivot Facet Information + String[] facetPFs = params.getParams(FacetParams.FACET_PIVOT); + if (facetPFs != null) { + for (String fieldGroup : facetPFs) { + PivotFacet pf = new PivotFacet(rb, fieldGroup); + pivotFacets.add(pf.getKey(), pf); + } + } } } - + /** * This API is experimental and subject to change */ public static class FacetBase { - String facetType; // facet.field, facet.query, etc (make enum?) - String facetStr; // original parameter value of facetStr - String facetOn; // the field or query, absent localParams if appropriate - private String key; // label in the response for the result... "foo" for {!key=foo}myfield - SolrParams localParams; // any local params for the facet - + String facetType; // facet.field, facet.query, etc (make enum?) + String facetStr; // original parameter value of facetStr + String facetOn; // the field or query, absent localParams if appropriate + private String key; // label in the response for the result... + // "foo" for {!key=foo}myfield + SolrParams localParams; // any local params for the facet + public FacetBase(ResponseBuilder rb, String facetType, String facetStr) { this.facetType = facetType; this.facetStr = facetStr; try { - this.localParams = QueryParsing.getLocalParams(facetStr, rb.req.getParams()); + this.localParams = QueryParsing.getLocalParams(facetStr, + rb.req.getParams()); } catch (SyntaxError e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } this.facetOn = facetStr; this.key = facetStr; - + if (localParams != null) { // remove local params unless it's a query if (!facetType.equals(FacetParams.FACET_QUERY)) { facetOn = localParams.get(CommonParams.VALUE); key = facetOn; } - + key = localParams.get(CommonParams.OUTPUT_KEY, key); } } - + /** returns the key in the response that this facet will be under */ public String getKey() { return key; } public String getType() { return facetType; } } - + /** * This API is experimental and subject to change */ public static class QueryFacet extends FacetBase { public long count; - + public QueryFacet(ResponseBuilder rb, String facetStr) { super(rb, FacetParams.FACET_QUERY, facetStr); } } - + /** * This API is experimental and subject to change */ public static class FieldFacet extends FacetBase { - public String field; // the field to facet on... "myfield" for {!key=foo}myfield + public String field; // the field to facet on... "myfield" for + // {!key=foo}myfield public FieldType ftype; public int offset; public int limit; @@ -784,76 +1081,94 @@ public class FacetComponent extends SearchComponent public boolean missing; public String prefix; public long missingCount; - + public FieldFacet(ResponseBuilder rb, String facetStr) { super(rb, FacetParams.FACET_FIELD, facetStr); fillParams(rb, rb.req.getParams(), facetOn); } - - private void fillParams(ResponseBuilder rb, SolrParams params, String field) { + + protected void fillParams(ResponseBuilder rb, SolrParams params, String field) { this.field = field; this.ftype = rb.req.getSchema().getFieldTypeNoEx(this.field); this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0); this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100); Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT); - if (mincount==null) { + if (mincount == null) { Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS); // mincount = (zeros!=null && zeros) ? 0 : 1; - mincount = (zeros!=null && !zeros) ? 1 : 0; + mincount = (zeros != null && !zeros) ? 1 : 0; // current default is to include zeros. } this.minCount = mincount; this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false); // default to sorting by count if there is a limit. - this.sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX); + this.sort = params.getFieldParam(field, FacetParams.FACET_SORT, + (limit > 0 ? + FacetParams.FACET_SORT_COUNT + : FacetParams.FACET_SORT_INDEX)); if (this.sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { this.sort = FacetParams.FACET_SORT_COUNT; } else if (this.sort.equals(FacetParams.FACET_SORT_INDEX_LEGACY)) { this.sort = FacetParams.FACET_SORT_INDEX; } - this.prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX); + this.prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX); } } - + /** * This API is experimental and subject to change */ + @SuppressWarnings("rawtypes") public static class DistribFieldFacet extends FieldFacet { - public List[] _toRefine; // a List of refinements needed, one for each shard. - - // SchemaField sf; // currently unneeded - + public List[] _toRefine; // a List of refinements needed, + // one for each shard. + + // SchemaField sf; // currently unneeded + // the max possible count for a term appearing on no list public long missingMaxPossible; - // the max possible count for a missing term for each shard (indexed by shardNum) + // the max possible count for a missing term for each shard (indexed by + // shardNum) public long[] missingMax; - public FixedBitSet[] counted; // a bitset for each shard, keeping track of which terms seen + // a bitset for each shard, keeping track of which terms seen + public FixedBitSet[] counted; public HashMap counts = new HashMap<>(128); public int termNum; - - public int initialLimit; // how many terms requested in first phase - public int initialMincount; // mincount param sent to each shard + + public int initialLimit; // how many terms requested in first phase + public int initialMincount; // mincount param sent to each shard + public double overrequestRatio; + public int overrequestCount; public boolean needRefinements; public ShardFacetCount[] countSorted; - + DistribFieldFacet(ResponseBuilder rb, String facetStr) { super(rb, facetStr); // sf = rb.req.getSchema().getField(field); missingMax = new long[rb.shards.length]; counted = new FixedBitSet[rb.shards.length]; } - + + protected void fillParams(ResponseBuilder rb, SolrParams params, String field) { + super.fillParams(rb, params, field); + this.overrequestRatio + = params.getFieldDouble(field, FacetParams.FACET_OVERREQUEST_RATIO, 1.5); + this.overrequestCount + = params.getFieldInt(field, FacetParams.FACET_OVERREQUEST_COUNT, 10); + + } + void add(int shardNum, NamedList shardCounts, int numRequested) { // shardCounts could be null if there was an exception int sz = shardCounts == null ? 0 : shardCounts.size(); int numReceived = sz; - - FixedBitSet terms = new FixedBitSet(termNum+sz); - + + FixedBitSet terms = new FixedBitSet(termNum + sz); + long last = 0; - for (int i=0; i() { @Override public int compare(ShardFacetCount o1, ShardFacetCount o2) { @@ -894,9 +1210,10 @@ public class FacetComponent extends SearchComponent countSorted = arr; return arr; } - + public ShardFacetCount[] getCountSorted() { - ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]); + ShardFacetCount[] arr + = counts.values().toArray(new ShardFacetCount[counts.size()]); Arrays.sort(arr, new Comparator() { @Override public int compare(ShardFacetCount o1, ShardFacetCount o2) { @@ -908,28 +1225,29 @@ public class FacetComponent extends SearchComponent countSorted = arr; return arr; } - + // returns the max possible value this ShardFacetCount could have for this shard // (assumes the shard did not report a count for this value) long maxPossible(ShardFacetCount sfc, int shardNum) { return missingMax[shardNum]; // TODO: could store the last term in the shard to tell if this term - // comes before or after it. If it comes before, we could subtract 1 + // comes before or after it. If it comes before, we could subtract 1 } } - + /** * This API is experimental and subject to change */ public static class ShardFacetCount { public String name; - public String indexed; // the indexed form of the name... used for comparisons. + // the indexed form of the name... used for comparisons + public String indexed; public long count; - public int termNum; // term number starting at 0 (used in bit arrays) - + public int termNum; // term number starting at 0 (used in bit arrays) + @Override public String toString() { - return "{term="+name+",termNum="+termNum+",count="+count+"}"; + return "{term=" + name + ",termNum=" + termNum + ",count=" + count + "}"; } } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacet.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacet.java new file mode 100644 index 00000000000..a823a8f12a4 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacet.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.handler.component; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.handler.component.FacetComponent.FacetBase; + +/** + * Models a single instance of a "pivot" specified by a {@link FacetParams#FACET_PIVOT} + * param, which may contain multiple nested fields. + * + * This class is also used to coordinate the refinement requests needed from various + * shards when doing processing a distributed request + */ +public class PivotFacet extends FacetBase { + + /** + * Local param used to indicate that refinements are requried on a pivot. Should + * also be used as the prefix for contatenanting with the value to determine the + * name of the multi-valued param that will contain all of the values needed for + * refinement. + */ + public static final String REFINE_PARAM = "fpt"; + + // TODO: is this really needed? can't we just loop over 0<=i> queuedRefinements = new HashMap<>(); + + // if null, then either we haven't collected any responses from shards + // or all the shards that have responded so far haven't had any values for the top + // field of this pivot. May be null forever if no doc in any shard has a value + // for the top field of the pivot + private PivotFacetField pivotFacetField; + + public PivotFacet(ResponseBuilder rb, String facetStr) { + super(rb, FacetParams.FACET_PIVOT, facetStr); + } + + /** + * Tracks that the specified shard needs to be asked to refine the specified + * {@link PivotFacetValue} + * + * @see #getQueuedRefinements + */ + public void addRefinement(int shardNumber, PivotFacetValue value) { + + if (!queuedRefinements.containsKey(shardNumber)) { + queuedRefinements.put(shardNumber, new ArrayList()); + } + + queuedRefinements.get(shardNumber).add(value); + } + + /** + * An immutable List of the {@link PivotFacetValue}s that need to be + * refined for this pivot. Once these refinements have been processed, + * the caller should clear them using {@link #removeAllRefinementsForShard} + * + * @see #addRefinement + * @see #removeAllRefinementsForShard + * @return a list of the values to refine, or an empty list. + */ + public List getQueuedRefinements(int shardNumber) { + List raw = queuedRefinements.get(shardNumber); + if (null == raw) { + raw = Collections.emptyList(); + } + return Collections.unmodifiableList(raw); + } + + /** + * Clears the list of queued refinements for the specified shard + * + * @see #addRefinement + * @see #getQueuedRefinements + */ + public void removeAllRefinementsForShard(int shardNumber) { + queuedRefinements.remove(shardNumber); + } + + /** + * If true, then additional refinement requests are needed to flesh out the correct + * counts for this Pivot + * + * @see #getQueuedRefinements + */ + public boolean isRefinementsRequired() { + return ! queuedRefinements.isEmpty(); + } + + /** + * A recursive method for generating NamedLists for this pivot + * suitable for including in a pivot facet response to the original distributed request. + * + * @see PivotFacetField#trim + * @see PivotFacetField#convertToListOfNamedLists + */ + public List> getTrimmedPivotsAsListOfNamedLists(ResponseBuilder rb) { + if (null == pivotFacetField) { + // no values in any shard for the top field of this pivot + return Collections.>emptyList(); + } + + pivotFacetField.trim(); + return pivotFacetField.convertToListOfNamedLists(); + } + + /** + * A recursive method for determining which {@link PivotFacetValue}s need to be + * refined for this pivot. + * + * @see PivotFacetField#queuePivotRefinementRequests + */ + public void queuePivotRefinementRequests() { + if (null == pivotFacetField) return; // NOOP + + pivotFacetField.sort(); + pivotFacetField.queuePivotRefinementRequests(this); + } + + /** + * Recursively merges the response from the specified shard, tracking the known shards. + * + * @see PivotFacetField#contributeFromShard + * @see PivotFacetField#createFromListOfNamedLists + */ + public void mergeResponseFromShard(int shardNumber, ResponseBuilder rb, List> response) { + + knownShards.set(shardNumber); + if (pivotFacetField == null) { + pivotFacetField = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, null, response); + } else { + pivotFacetField.contributeFromShard(shardNumber, rb, response); + } + } + + public String toString() { + return "[" + facetStr + "] | " + this.getKey(); + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetField.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetField.java new file mode 100644 index 00000000000..6ddd9884a46 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetField.java @@ -0,0 +1,386 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.handler.component; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; + + +/** + * Models a single field somewhere in a hierarchy of fields as part of a pivot facet. + * This pivot field contains {@link PivotFacetValue}s which may each contain a nested + * {@link PivotFacetField} child. This PivotFacetField may itself + * be a child of a {@link PivotFacetValue} parent. + * + * @see PivotFacetValue + * @see PivotFacetFieldValueCollection + */ +@SuppressWarnings("rawtypes") +public class PivotFacetField { + + public final String field; + + // null if this is a top level pivot, + // otherwise the value of the parent pivot we are nested under + public final PivotFacetValue parentValue; + + public final PivotFacetFieldValueCollection valueCollection; + + // Facet parameters relating to this field + private final int facetFieldLimit; + private final int facetFieldMinimumCount; + private final int facetFieldOffset; + private final String facetFieldSort; + + private final Map numberOfValuesContributedByShard = new HashMap<>(); + private final Map shardLowestCount = new HashMap<>(); + + private boolean needRefinementAtThisLevel = true; + + private PivotFacetField(ResponseBuilder rb, PivotFacetValue parent, String fieldName) { + + field = fieldName; + parentValue = parent; + + // facet params + SolrParams parameters = rb.req.getParams(); + facetFieldMinimumCount = parameters.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1); + facetFieldOffset = parameters.getFieldInt(field, FacetParams.FACET_OFFSET, 0); + facetFieldLimit = parameters.getFieldInt(field, FacetParams.FACET_LIMIT, 100); + String defaultSort = (facetFieldLimit > 0) ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX; + facetFieldSort = parameters.getFieldParam(field, FacetParams.FACET_SORT, defaultSort); + + valueCollection = new PivotFacetFieldValueCollection(facetFieldMinimumCount, facetFieldOffset, facetFieldLimit, facetFieldSort); + + if ( (facetFieldLimit < 0) || + // TODO: possible refinement issue if limit=0 & mincount=0 & missing=true + // (ie: we only want the missing count for this field) + (facetFieldLimit <= 0 && facetFieldMinimumCount == 0) || + (facetFieldSort.equals(FacetParams.FACET_SORT_INDEX) && facetFieldMinimumCount <= 0) + ) { + // in any of these cases, there's no need to refine this level of the pivot + needRefinementAtThisLevel = false; + } + } + + /** + * A recursive method that walks up the tree of pivot fields/values to build + * a list of String representations of the values that lead down to this + * PivotFacetField. + * + * @return A mutable List of the pivot values leading down to this pivot field, + * will never be null but may contain nulls and may be empty if this is a top + * level pivot field + * @see PivotFacetValue#getValuePath + */ + public List getValuePath() { + if (null != parentValue) { + return parentValue.getValuePath(); + } + return new ArrayList(3); + } + + /** + * A recursive method to construct a new PivotFacetField object from + * the contents of the {@link NamedList}s provided by the specified shard, relative + * to a parent value (if this is not the top field in the pivot hierarchy) + * + * The associated child {@link PivotFacetValue}s will be recursively built as well. + * + * @see PivotFacetValue#createFromNamedList + * @param shardNumber the id of the shard that provided this data + * @param rb The response builder of the current request + * @param owner the parent value in the current pivot (may be null) + * @param pivotValues the data from the specified shard for this pivot field, may be null or empty + * @return the new PivotFacetField, null if pivotValues is null or empty. + */ + public static PivotFacetField createFromListOfNamedLists(int shardNumber, ResponseBuilder rb, PivotFacetValue owner, List> pivotValues) { + + if (null == pivotValues || pivotValues.size() <= 0) return null; + + NamedList firstValue = pivotValues.get(0); + PivotFacetField createdPivotFacetField + = new PivotFacetField(rb, owner, PivotFacetHelper.getField(firstValue)); + + int lowestCount = Integer.MAX_VALUE; + + for (NamedList pivotValue : pivotValues) { + + lowestCount = Math.min(lowestCount, PivotFacetHelper.getCount(pivotValue)); + + PivotFacetValue newValue = PivotFacetValue.createFromNamedList + (shardNumber, rb, createdPivotFacetField, pivotValue); + createdPivotFacetField.valueCollection.add(newValue); + } + + createdPivotFacetField.shardLowestCount.put(shardNumber, lowestCount); + createdPivotFacetField.numberOfValuesContributedByShard.put(shardNumber, pivotValues.size()); + + return createdPivotFacetField; + } + + /** + * Destructive method that recursively prunes values from the data structure + * based on the counts for those values and the effective sort, mincount, limit, + * and offset being used for each field. + *

+ * This method should only be called after all refinement is completed just prior + * calling {@link #convertToListOfNamedLists} + *

+ * + * @see PivotFacet#getTrimmedPivotsAsListOfNamedLists + * @see PivotFacetFieldValueCollection#trim + */ + public void trim() { + // SOLR-6331... + // + // we can probably optimize the memory usage by trimming each level of the pivot once + // we know we've fully refined the values at that level + // (ie: fold this logic into refineNextLevelOfFacets) + this.valueCollection.trim(); + } + + /** + * Recursively sorts the collection of values associated with this field, and + * any sub-pivots those values have. + * + * @see FacetParams#FACET_SORT + * @see PivotFacetFieldValueCollection#sort + */ + public void sort() { + this.valueCollection.sort(); + } + + /** + * A recursive method for generating NamedLists from this field + * suitable for including in a pivot facet response to the original distributed request. + */ + public List> convertToListOfNamedLists() { + + List> convertedPivotList = null; + + if (valueCollection.size() > 0) { + convertedPivotList = new LinkedList<>(); + for (PivotFacetValue pivot : valueCollection) + convertedPivotList.add(pivot.convertToNamedList()); + } + + return convertedPivotList; + } + + /** + * A recursive method for determining which {@link PivotFacetValue}s need to be + * refined for this pivot. + * + * @see PivotFacet#queuePivotRefinementRequests + */ + public void queuePivotRefinementRequests(PivotFacet pf) { + + if (needRefinementAtThisLevel && ! valueCollection.getExplicitValuesList().isEmpty()) { + + if (FacetParams.FACET_SORT_COUNT.equals(facetFieldSort)) { + // we only need to things that are currently in our limit, + // or might be in our limit if we get increased counts from shards that + // didn't include this value the first time + final int indexOfCountThreshold + = Math.min(valueCollection.getExplicitValuesListSize(), + facetFieldOffset + facetFieldLimit) - 1; + final int countThreshold = valueCollection.getAt(indexOfCountThreshold).getCount(); + + int positionInResults = 0; + + for (PivotFacetValue value : valueCollection.getExplicitValuesList()) { + if (positionInResults <= indexOfCountThreshold) { + // This element is within the top results, so we need to get information + // from all of the shards. + processDefiniteCandidateElement(pf, value); + } else { + // This element is not within the top results, but may still need to be refined. + processPossibleCandidateElement(pf, value, countThreshold); + } + + positionInResults++; + } + } else { // FACET_SORT_INDEX + // everything needs refined to see what the per-shard mincount excluded + for (PivotFacetValue value : valueCollection.getExplicitValuesList()) { + processDefiniteCandidateElement(pf, value); + } + } + + needRefinementAtThisLevel = false; + } + + if ( pf.isRefinementsRequired() ) { + // if any refinements are needed, then we need to stop and wait to + // see how the picture may change before drilling down to child pivot fields + return; + } else { + // Since outstanding requests have been filled, then we can drill down + // to the next deeper level and check it. + refineNextLevelOfFacets(pf); + } + } + + /** + * Adds refinement requests for the value for each shard that has not already contributed + * a count for this value. + */ + private void processDefiniteCandidateElement(PivotFacet pf, PivotFacetValue value) { + + for (int shard = pf.knownShards.nextSetBit(0); + 0 <= shard; + shard = pf.knownShards.nextSetBit(shard+1)) { + if ( ! value.shardHasContributed(shard) ) { + if ( // if we're doing index order, we need to refine anything + // (mincount may have excluded from a shard) + FacetParams.FACET_SORT_INDEX.equals(facetFieldSort) + // if we are doing count order, we need to refine if the limit was hit + // (if it not, the shard doesn't have the value or it would have returned already) + || numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(shard) ) { + + pf.addRefinement(shard, value); + } + } + } + } + + private boolean numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(int shardNumber) { + return facetFieldLimit <= numberOfValuesContributedByShard(shardNumber); + } + + private int numberOfValuesContributedByShard(final int shardNumber) { + return numberOfValuesContributedByShard.containsKey(shardNumber) + ? numberOfValuesContributedByShard.get(shardNumber) + : 0; + } + + /** + * Checks the {@link #lowestCountContributedbyShard} for each shard, combined with the + * counts we already know, to see if this value is a viable candidate -- + * Does not make sense when using {@link FacetParams#FACET_SORT_INDEX} + * + * @see #processDefiniteCandidateElement + */ + private void processPossibleCandidateElement(PivotFacet pf, PivotFacetValue value, + final int refinementThreshold) { + + assert FacetParams.FACET_SORT_COUNT.equals(facetFieldSort) + : "Method only makes sense when sorting by count"; + + int maxPossibleCountAfterRefinement = value.getCount(); + + for (int shard = pf.knownShards.nextSetBit(0); + 0 <= shard; + shard = pf.knownShards.nextSetBit(shard+1)) { + if ( ! value.shardHasContributed(shard) ) { + maxPossibleCountAfterRefinement += lowestCountContributedbyShard(shard); + } + } + + if (refinementThreshold <= maxPossibleCountAfterRefinement) { + processDefiniteCandidateElement(pf, value); + } + } + + private int lowestCountContributedbyShard(int shardNumber) { + return (shardLowestCount.containsKey(shardNumber)) + ? shardLowestCount.get(shardNumber) + : 0; + } + + private void refineNextLevelOfFacets(PivotFacet pf) { + + List explicitValsToRefine + = valueCollection.getNextLevelValuesToRefine(); + + for (PivotFacetValue value : explicitValsToRefine) { + if (null != value.getChildPivot()) { + value.getChildPivot().queuePivotRefinementRequests(pf); + } + } + + PivotFacetValue missing = this.valueCollection.getMissingValue(); + if(null != missing && null != missing.getChildPivot()) { + missing.getChildPivot().queuePivotRefinementRequests(pf); + } + } + + private void incrementShardValueCount(int shardNumber) { + if (!numberOfValuesContributedByShard.containsKey(shardNumber)) { + numberOfValuesContributedByShard.put(shardNumber, 1); + } else { + numberOfValuesContributedByShard.put(shardNumber, numberOfValuesContributedByShard.get(shardNumber)+1); + } + } + + private void contributeValueFromShard(int shardNumber, ResponseBuilder rb, NamedList shardValue) { + + incrementShardValueCount(shardNumber); + + Comparable value = PivotFacetHelper.getValue(shardValue); + int count = PivotFacetHelper.getCount(shardValue); + + // We're changing values so we most mark the collection as dirty + valueCollection.markDirty(); + + if ( ( !shardLowestCount.containsKey(shardNumber) ) + || shardLowestCount.get(shardNumber) > count) { + shardLowestCount.put(shardNumber, count); + } + + PivotFacetValue facetValue = valueCollection.get(value); + if (null == facetValue) { + // never seen before, we need to create it from scratch + facetValue = PivotFacetValue.createFromNamedList(shardNumber, rb, this, shardValue); + this.valueCollection.add(facetValue); + } else { + facetValue.mergeContributionFromShard(shardNumber, rb, shardValue); + } + } + + /** + * Recursively merges the contributions from the specified shard for each + * {@link PivotFacetValue} represended in the response. + * + * @see PivotFacetValue#mergeContributionFromShard + * @param shardNumber the id of the shard that provided this data + * @param rb The response builder of the current request + * @param response the data from the specified shard for this pivot field, may be null + */ + public void contributeFromShard(int shardNumber, ResponseBuilder rb, List> response) { + if (null == response) return; + + for (NamedList responseValue : response) { + contributeValueFromShard(shardNumber, rb, responseValue); + } + } + + public String toString(){ + return String.format(Locale.ROOT, "P:%s F:%s V:%s", + parentValue, field, valueCollection); + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java new file mode 100644 index 00000000000..33a0ef9c9c5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java @@ -0,0 +1,342 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import org.apache.solr.common.params.FacetParams; + +/** + * Emcapsulates a collection of {@link PivotFacetValue}s associated with a + * {@link PivotFacetField} withs pecial tracking of a {@link PivotFacetValue} + * corrisponding to the null value when {@link FacetParams#FACET_MISSING} + * is used. + * + * @see #markDirty + * @see PivotFacetField + * @see PivotFacetValue + */ +@SuppressWarnings("rawtypes") +public class PivotFacetFieldValueCollection implements Iterable { + private List explicitValues; + private PivotFacetValue missingValue; + private Map valuesMap; + private boolean dirty = true; + + //Facet parameters relating to this field + private final int facetFieldMinimumCount; + private final int facetFieldOffset; + private final int facetFieldLimit; + private final String facetFieldSort; + + + public PivotFacetFieldValueCollection(int minCount, int offset, int limit, String fieldSort){ + this.explicitValues = new ArrayList<>(); + this.valuesMap = new HashMap<>(); + this.facetFieldMinimumCount = minCount; + this.facetFieldOffset = offset; + this.facetFieldLimit = limit; + this.facetFieldSort = fieldSort; + } + + /** + * Indicates that the values in this collection have been modified by the caller. + * + * Any caller that manipulates the {@link PivotFacetValue}s contained in this collection + * must call this method after doing so. + */ + public void markDirty() { + dirty = true; + } + + /** + * The {@link PivotFacetValue} with corisponding to a a value of + * null when {@link FacetParams#FACET_MISSING} is used. + * + * @return the appropriate PivotFacetValue object, may be null + * if we "missing" is not in use, or if it does not meat the mincount. + */ + public PivotFacetValue getMissingValue(){ + return missingValue; + } + + /** + * Read-Only access to the Collection of {@link PivotFacetValue}s corrisponding to + * non-missing values. + * + * @see #getMissingValue + */ + public List getExplicitValuesList() { + return Collections.unmodifiableList(explicitValues); + } + + /** + * Size of {@link #getExplicitValuesList} + */ + public int getExplicitValuesListSize() { + return this.explicitValues.size(); + } + + /** + * Total number of {@link PivotFacetValue}s, including the "missing" value if used. + * + * @see #getMissingValue + * @see #getExplicitValuesList + */ + public int size() { + return this.getExplicitValuesListSize() + (this.missingValue == null ? 0 : 1); + } + + /** + * Returns the appropriate sub-list of the explicit values that need to be refined, + * based on the {@link FacetParams#FACET_OFFSET} & {@link FacetParams#FACET_LIMIT} + * for this field. + * + * @see #getExplicitValuesList + * @see List#subList + */ + public List getNextLevelValuesToRefine() { + final int numRefinableValues = getExplicitValuesListSize(); + if (facetFieldOffset < numRefinableValues) { + final int offsetPlusCount = (facetFieldLimit >= 0) + ? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues) + : numRefinableValues; + return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount); + } else { + return Collections.emptyList(); + } + } + + /** + * Fast lookup to retrieve a {@link PivotFacetValue} from this collection if it + * exists + * + * @param value of the PivotFacetValue to lookup, if + * null this returns the same as {@link #getMissingValue} + * @return the corrisponding PivotFacetValue or null if there is + * no PivotFacetValue in this collection corrisponding to + * the specified value. + */ + public PivotFacetValue get(Comparable value){ + return valuesMap.get(value); + } + + /** + * Fetchs a {@link PivotFacetValue} from this collection via the index, may not + * be used to fetch the PivotFacetValue corrisponding to the missing-value. + * + * @see #getExplicitValuesList + * @see List#get(int) + * @see #getMissingValue + */ + public PivotFacetValue getAt(int index){ + return explicitValues.get(index); + } + + /** + * Adds a {@link PivotFacetValue} to this collection -- callers must not use this + * method if a {@link PivotFacetValue} with the same value already exists in this collection + */ + public void add(PivotFacetValue pfValue) { + Comparable val = pfValue.getValue(); + assert ! this.valuesMap.containsKey(val) + : "Must not add duplicate PivotFacetValue with redundent inner value"; + + dirty = true; + if(null == val) { + this.missingValue = pfValue; + } else { + this.explicitValues.add(pfValue); + } + this.valuesMap.put(val, pfValue); + } + + + /** + * Destructive method that recursively prunes values from the data structure + * based on the counts for those values and the effective sort, mincount, limit, + * and offset being used for each field. + *

+ * This method should only be called after all refinement is completed. + *

+ * + * @see PivotFacetField#trim + * @see PivotFacet#getTrimmedPivotsAsListOfNamedLists + */ + public void trim() { // NOTE: destructive + // TODO: see comment in PivotFacetField about potential optimization + // (ie: trim as we refine) + trimNonNullValues(); + trimNullValue(); + } + + private void trimNullValue(){ + if (missingValue == null) { + return; + } + + if (missingValue.getCount() >= facetFieldMinimumCount){ + if (null != missingValue.getChildPivot()) { + missingValue.getChildPivot().trim(); + } + } else { // missing count less than mincount + missingValue = null; + } + } + + private void trimNonNullValues(){ + if (explicitValues != null && explicitValues.size() > 0) { + + sort(); + + ArrayList trimmedValues = new ArrayList<>(); + + int facetsSkipped = 0; + + for (PivotFacetValue pivotValue : explicitValues) { + + if (pivotValue.getCount() >= facetFieldMinimumCount) { + if (facetsSkipped >= facetFieldOffset) { + trimmedValues.add(pivotValue); + if (pivotValue.getChildPivot() != null) { + pivotValue.getChildPivot().trim(); + } + if (facetFieldLimit > 0 && trimmedValues.size() >= facetFieldLimit) { + break; + } + } else { + facetsSkipped++; + } + } + } + + explicitValues = trimmedValues; + valuesMap.clear(); + } + } + + /** + * Sorts the collection and recursively sorts the collections assocaited with + * any sub-pivots. + * + * @see FacetParams#FACET_SORT + * @see PivotFacetField#sort + */ + public void sort() { + + if (dirty) { + if (facetFieldSort.equals(FacetParams.FACET_SORT_COUNT)) { + Collections.sort(this.explicitValues, new PivotFacetCountComparator()); + } else if (facetFieldSort.equals(FacetParams.FACET_SORT_INDEX)) { + Collections.sort(this.explicitValues, new PivotFacetValueComparator()); + } + dirty = false; + } + + for (PivotFacetValue value : this.explicitValues) + if (value.getChildPivot() != null) { + value.getChildPivot().sort(); + } + + if (missingValue != null && missingValue.getChildPivot() != null) { + missingValue.getChildPivot().sort(); + } + } + + /** + * Iterator over all elements in this Collection, including the result of + * {@link #getMissingValue} as the last element (if it exists) + */ + @Override + public Iterator iterator() { + Iterator it = new Iterator() { + private final Iterator valuesIterator = explicitValues.iterator(); + private boolean shouldGiveMissingValue = (missingValue != null); + + @Override + public boolean hasNext() { + return valuesIterator.hasNext() || shouldGiveMissingValue; + } + + @Override + public PivotFacetValue next() { + while(valuesIterator.hasNext()){ + return (PivotFacetValue) valuesIterator.next(); + } + //else + if(shouldGiveMissingValue){ + shouldGiveMissingValue = false; + return missingValue; + } + return null; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Can't remove from this iterator"); + } + }; + return it; + } + + /** Sorts {@link PivotFacetValue} instances by their count */ + public class PivotFacetCountComparator implements Comparator { + public int compare(PivotFacetValue left, PivotFacetValue right) { + int countCmp = right.getCount() - left.getCount(); + return (0 != countCmp) ? countCmp : + compareWithNullLast(left.getValue(), right.getValue()); + } + } + + /** Sorts {@link PivotFacetValue} instances by their value */ + public class PivotFacetValueComparator implements Comparator { + public int compare(PivotFacetValue left, PivotFacetValue right) { + return compareWithNullLast(left.getValue(), right.getValue()); + } + } + + /** + * A helper method for use in Comparator classes where object properties + * are Comparable but may be null. + */ + static int compareWithNullLast(final Comparable o1, final Comparable o2) { + if (null == o1) { + if (null == o2) { + return 0; + } + return 1; // o1 is null, o2 is not + } + if (null == o2) { + return -1; // o2 is null, o1 is not + } + return o1.compareTo(o2); + } + + public String toString(){ + return String.format(Locale.ROOT, "Values:%s | Missing:%s ", explicitValues, missingValue); + } +} + + diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java index 262e46db850..94ade7a0b19 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java @@ -17,254 +17,105 @@ package org.apache.solr.handler.component; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Deque; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -import org.apache.lucene.index.Term; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.solr.common.SolrException; +import org.apache.solr.util.PivotListEntry; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.request.SimpleFacets; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.SchemaField; -import org.apache.solr.search.DocSet; -import org.apache.solr.search.SolrIndexSearcher; -import org.apache.solr.search.SyntaxError; +import org.apache.solr.common.util.StrUtils; -/** - * @since solr 4.0 - */ -public class PivotFacetHelper extends SimpleFacets -{ +import java.util.ArrayList; +import java.util.List; +import java.util.Collections; - protected int minMatch; +public class PivotFacetHelper { - public PivotFacetHelper(SolrQueryRequest req, DocSet docs, SolrParams params, ResponseBuilder rb) { - super(req, docs, params, rb); - minMatch = params.getInt( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); - } + /** + * Encodes a value path as a string for the purposes of a refinement request + * + * @see PivotFacetValue#getValuePath + * @see #decodeRefinementValuePath + */ + public static String encodeRefinementValuePath(List values) { + // HACK: prefix flag every value to account for empty string vs null + // NOTE: even if we didn't have to worry about null's smartSplit is stupid about + // pruning empty strings from list + // "^" prefix = null + // "~" prefix = not null, may be empty string - public SimpleOrderedMap>> process(String[] pivots) throws IOException { - if (!rb.doFacets || pivots == null) - return null; + assert null != values; - SimpleOrderedMap>> pivotResponse = new SimpleOrderedMap<>(); - for (String pivot : pivots) { - //ex: pivot == "features,cat" or even "{!ex=mytag}features,cat" - try { - this.parseParams(FacetParams.FACET_PIVOT, pivot); - } catch (SyntaxError e) { - throw new SolrException(ErrorCode.BAD_REQUEST, e); + // special case: empty list => empty string + if (values.isEmpty()) { return ""; } + + + StringBuilder out = new StringBuilder(); + for (String val : values) { + if (null == val) { + out.append('^'); + } else { + out.append('~'); + StrUtils.appendEscapedTextToBuilder(out, val, ','); } - pivot = facetValue;//facetValue potentially modified from parseParams() - - String[] fields = pivot.split(","); - - if( fields.length < 2 ) { - throw new SolrException( ErrorCode.BAD_REQUEST, - "Pivot Facet needs at least two fields: "+pivot ); - } - - String field = fields[0]; - String subField = fields[1]; - Deque fnames = new LinkedList<>(); - for( int i=fields.length-1; i>1; i-- ) { - fnames.push( fields[i] ); - } - - NamedList superFacets = this.getTermCounts(field); - - //super.key usually == pivot unless local-param 'key' used - pivotResponse.add(key, doPivots(superFacets, field, subField, fnames, docs)); + out.append(','); } - return pivotResponse; + out.deleteCharAt(out.length()-1); // prune the last seperator + return out.toString(); + // return StrUtils.join(values, ','); } /** - * Recursive function to do all the pivots + * Decodes a value path string specified for refinement. + * + * @see #encodeRefinementValuePath */ - protected List> doPivots(NamedList superFacets, - String field, String subField, Deque fnames, - DocSet docs) throws IOException - { - SolrIndexSearcher searcher = rb.req.getSearcher(); - // TODO: optimize to avoid converting to an external string and then having to convert back to internal below - SchemaField sfield = searcher.getSchema().getField(field); - FieldType ftype = sfield.getType(); + public static List decodeRefinementValuePath(String valuePath) { + List rawvals = StrUtils.splitSmart(valuePath, ",", true); + // special case: empty list => empty string + if (rawvals.isEmpty()) return rawvals; - String nextField = fnames.poll(); - - List> values = new ArrayList<>( superFacets.size() ); - for (Map.Entry kv : superFacets) { - // Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though - if (kv.getValue() >= minMatch) { - - // may be null when using facet.missing - final String fieldValue = kv.getKey(); - - // don't reuse the same BytesRef each time since we will be - // constructing Term objects used in TermQueries that may be cached. - BytesRefBuilder termval = null; - - SimpleOrderedMap pivot = new SimpleOrderedMap<>(); - pivot.add( "field", field ); - if (null == fieldValue) { - pivot.add( "value", null ); - } else { - termval = new BytesRefBuilder(); - ftype.readableToIndexed(fieldValue, termval); - pivot.add( "value", ftype.toObject(sfield, termval.get()) ); - } - pivot.add( "count", kv.getValue() ); - - if( subField == null ) { - values.add( pivot ); - } - else { - DocSet subset = null; - if ( null == termval ) { - DocSet hasVal = searcher.getDocSet - (new TermRangeQuery(field, null, null, false, false)); - subset = docs.andNot(hasVal); - } else { - Query query = new TermQuery(new Term(field, termval.get())); - subset = searcher.getDocSet(query, docs); - } - super.docs = subset;//used by getTermCounts() - - NamedList nl = this.getTermCounts(subField); - if (nl.size() >= minMatch) { - pivot.add( "pivot", doPivots( nl, subField, nextField, fnames, subset) ); - values.add( pivot ); // only add response if there are some counts - } - } + List out = new ArrayList(rawvals.size()); + for (String raw : rawvals) { + assert 0 < raw.length(); + if ('^' == raw.charAt(0)) { + assert 1 == raw.length(); + out.add(null); + } else { + assert '~' == raw.charAt(0); + out.add(raw.substring(1)); } } - - // put the field back on the list - fnames.push( nextField ); - return values; + + return out; + } + + /** @see PivotListEntry#VALUE */ + public static Comparable getValue(NamedList pivotList) { + return (Comparable) PivotFacetHelper.retrieve(PivotListEntry.VALUE, + pivotList); + } + + /** @see PivotListEntry#FIELD */ + public static String getField(NamedList pivotList) { + return (String) PivotFacetHelper.retrieve(PivotListEntry.FIELD, pivotList); + } + + /** @see PivotListEntry#COUNT */ + public static Integer getCount(NamedList pivotList) { + return (Integer) PivotFacetHelper.retrieve(PivotListEntry.COUNT, pivotList); + } + + /** @see PivotListEntry#PIVOT */ + public static List> getPivots(NamedList pivotList) { + int pivotIdx = pivotList.indexOf(PivotListEntry.PIVOT.getName(), 0); + if (pivotIdx > -1) { + return (List>) pivotList.getVal(pivotIdx); + } + return null; + } + + private static Object retrieve(PivotListEntry entryToGet, NamedList pivotList) { + return pivotList.get(entryToGet.getName(), entryToGet.getIndex()); } -// TODO: This is code from various patches to support distributed search. -// Some parts may be helpful for whoever implements distributed search. -// -// @Override -// public int distributedProcess(ResponseBuilder rb) throws IOException { -// if (!rb.doFacets) { -// return ResponseBuilder.STAGE_DONE; -// } -// -// if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { -// SolrParams params = rb.req.getParams(); -// String[] pivots = params.getParams(FacetParams.FACET_PIVOT); -// for ( ShardRequest sreq : rb.outgoing ) { -// if (( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0 -// && sreq.shards != null && sreq.shards.length == 1 ) { -// sreq.params.set( FacetParams.FACET, "true" ); -// sreq.params.set( FacetParams.FACET_PIVOT, pivots ); -// sreq.params.set( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); // keep this at 1 regardless so that it accumulates everything -// } -// } -// } -// return ResponseBuilder.STAGE_DONE; -// } -// -// @Override -// public void handleResponses(ResponseBuilder rb, ShardRequest sreq) { -// if (!rb.doFacets) return; -// -// -// if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) { -// SimpleOrderedMap>> tf = rb._pivots; -// if ( null == tf ) { -// tf = new SimpleOrderedMap>>(); -// rb._pivots = tf; -// } -// for (ShardResponse srsp: sreq.responses) { -// int shardNum = rb.getShardNum(srsp.getShard()); -// -// NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts"); -// -// // handle facet trees from shards -// SimpleOrderedMap>> shard_pivots = -// (SimpleOrderedMap>>)facet_counts.get( PIVOT_KEY ); -// -// if ( shard_pivots != null ) { -// for (int j=0; j< shard_pivots.size(); j++) { -// // TODO -- accumulate the results from each shard -// // The following code worked to accumulate facets for an previous -// // two level patch... it is here for reference till someone can upgrade -// /** -// String shard_tree_name = (String) shard_pivots.getName( j ); -// SimpleOrderedMap shard_tree = (SimpleOrderedMap)shard_pivots.getVal( j ); -// SimpleOrderedMap facet_tree = tf.get( shard_tree_name ); -// if ( null == facet_tree) { -// facet_tree = new SimpleOrderedMap(); -// tf.add( shard_tree_name, facet_tree ); -// } -// -// for( int o = 0; o < shard_tree.size() ; o++ ) { -// String shard_outer = (String) shard_tree.getName( o ); -// NamedList shard_innerList = (NamedList) shard_tree.getVal( o ); -// NamedList tree_innerList = (NamedList) facet_tree.get( shard_outer ); -// if ( null == tree_innerList ) { -// tree_innerList = new NamedList(); -// facet_tree.add( shard_outer, tree_innerList ); -// } -// -// for ( int i = 0 ; i < shard_innerList.size() ; i++ ) { -// String shard_term = (String) shard_innerList.getName( i ); -// long shard_count = ((Number) shard_innerList.getVal(i)).longValue(); -// int tree_idx = tree_innerList.indexOf( shard_term, 0 ); -// -// if ( -1 == tree_idx ) { -// tree_innerList.add( shard_term, shard_count ); -// } else { -// long tree_count = ((Number) tree_innerList.getVal( tree_idx )).longValue(); -// tree_innerList.setVal( tree_idx, shard_count + tree_count ); -// } -// } // innerList loop -// } // outer loop -// **/ -// } // each tree loop -// } -// } -// } -// return ; -// } -// -// @Override -// public void finishStage(ResponseBuilder rb) { -// if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return; -// // wait until STAGE_GET_FIELDS -// // so that "result" is already stored in the response (for aesthetics) -// -// SimpleOrderedMap>> tf = rb._pivots; -// -// // get 'facet_counts' from the response -// NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts"); -// if (facetCounts == null) { -// facetCounts = new NamedList(); -// rb.rsp.add("facet_counts", facetCounts); -// } -// facetCounts.add( PIVOT_KEY, tf ); -// rb._pivots = null; -// } -// -// public String getDescription() { -// return "Handle Pivot (multi-level) Faceting"; -// } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetProcessor.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetProcessor.java new file mode 100644 index 00000000000..8ccbc199595 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetProcessor.java @@ -0,0 +1,252 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.request.SimpleFacets; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.lucene.search.Query; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Deque; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * Processes all Pivot facet logic for a single node -- both non-distrib, and per-shard + */ +public class PivotFacetProcessor extends SimpleFacets +{ + protected SolrParams params; + + public PivotFacetProcessor(SolrQueryRequest req, DocSet docs, SolrParams params, ResponseBuilder rb) { + super(req, docs, params, rb); + this.params = params; + } + + /** + * Processes all of the specified {@link FacetParams#FACET_PIVOT} strings, generating + * a completel response tree for each pivot. The values in this response will either + * be the complete tree of fields and values for the specified pivot in the local index, + * or the requested refinements if the pivot params include the {@link PivotFacet#REFINE_PARAM} + */ + public SimpleOrderedMap>> process(String[] pivots) throws IOException { + if (!rb.doFacets || pivots == null) + return null; + + SimpleOrderedMap>> pivotResponse = new SimpleOrderedMap<>(); + for (String pivotList : pivots) { + try { + this.parseParams(FacetParams.FACET_PIVOT, pivotList); + } catch (SyntaxError e) { + throw new SolrException(ErrorCode.BAD_REQUEST, e); + } + List pivotFields = StrUtils.splitSmart(facetValue, ",", true); + if( pivotFields.size() < 1 ) { + throw new SolrException( ErrorCode.BAD_REQUEST, + "Pivot Facet needs at least one field name: " + pivotList); + } else { + SolrIndexSearcher searcher = rb.req.getSearcher(); + for (String fieldName : pivotFields) { + SchemaField sfield = searcher.getSchema().getField(fieldName); + if (sfield == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + fieldName + "\" is not a valid field name in pivot: " + pivotList); + } + } + } + + //REFINEMENT + String fieldValueKey = localParams == null ? null : localParams.get(PivotFacet.REFINE_PARAM); + if(fieldValueKey != null ){ + String[] refinementValuesByField = params.getParams(PivotFacet.REFINE_PARAM+fieldValueKey); + for(String refinements : refinementValuesByField){ + pivotResponse.addAll(processSingle(pivotFields, refinements)); + } + } else{ + pivotResponse.addAll(processSingle(pivotFields, null)); + } + } + return pivotResponse; + } + + /** + * Process a single branch of refinement values for a specific pivot + * @param pivotFields the ordered list of fields in this pivot + * @param refinements the comma seperate list of refinement values corrisponding to each field in the pivot, or null if there are no refinements + */ + private SimpleOrderedMap>> processSingle(List pivotFields, + String refinements) throws IOException { + SolrIndexSearcher searcher = rb.req.getSearcher(); + SimpleOrderedMap>> pivotResponse = new SimpleOrderedMap<>(); + + String field = pivotFields.get(0); + SchemaField sfield = searcher.getSchema().getField(field); + + Deque fnames = new LinkedList<>(); + for( int i = pivotFields.size()-1; i>1; i-- ) { + fnames.push( pivotFields.get(i) ); + } + + NamedList facetCounts; + Deque vnames = new LinkedList<>(); + + if (null != refinements) { + // All values, split by the field they should go to + List refinementValuesByField + = PivotFacetHelper.decodeRefinementValuePath(refinements); + + for( int i=refinementValuesByField.size()-1; i>0; i-- ) { + vnames.push(refinementValuesByField.get(i));//Only for [1] and on + } + + String firstFieldsValues = refinementValuesByField.get(0); + + facetCounts = new NamedList(); + facetCounts.add(firstFieldsValues, + getSubsetSize(this.docs, sfield, firstFieldsValues)); + } else { + // no refinements needed + facetCounts = this.getTermCountsForPivots(field, this.docs); + } + + if(pivotFields.size() > 1) { + String subField = pivotFields.get(1); + pivotResponse.add(key, + doPivots(facetCounts, field, subField, fnames, vnames, this.docs)); + } else { + pivotResponse.add(key, doPivots(facetCounts, field, null, fnames, vnames, this.docs)); + } + return pivotResponse; + } + + /** + * Recursive function to compute all the pivot counts for the values under teh specified field + */ + protected List> doPivots(NamedList superFacets, + String field, String subField, Deque fnames,Deque vnames,DocSet docs) throws IOException { + + SolrIndexSearcher searcher = rb.req.getSearcher(); + // TODO: optimize to avoid converting to an external string and then having to convert back to internal below + SchemaField sfield = searcher.getSchema().getField(field); + FieldType ftype = sfield.getType(); + + String nextField = fnames.poll(); + + // re-useable BytesRefBuilder for conversion of term values to Objects + BytesRefBuilder termval = new BytesRefBuilder(); + + List> values = new ArrayList<>( superFacets.size() ); + for (Map.Entry kv : superFacets) { + // Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though + if (kv.getValue() >= getMinCountForField(field)) { + final String fieldValue = kv.getKey(); + + SimpleOrderedMap pivot = new SimpleOrderedMap<>(); + pivot.add( "field", field ); + if (null == fieldValue) { + pivot.add( "value", null ); + } else { + ftype.readableToIndexed(fieldValue, termval); + pivot.add( "value", ftype.toObject(sfield, termval.get()) ); + } + pivot.add( "count", kv.getValue() ); + + DocSet subset = getSubset(docs, sfield, fieldValue); + + if( subField != null ) { + NamedList facetCounts; + if(!vnames.isEmpty()){ + String val = vnames.pop(); + facetCounts = new NamedList(); + facetCounts.add(val, getSubsetSize(subset, + searcher.getSchema().getField(subField), + val)); + } else { + facetCounts = this.getTermCountsForPivots(subField, subset); + } + + if (facetCounts.size() >= 1) { + pivot.add( "pivot", doPivots( facetCounts, subField, nextField, fnames, vnames, subset) ); + } + } + values.add( pivot ); + } + + } + // put the field back on the list + fnames.push( nextField ); + return values; + } + + /** + * Given a base docset, computes the size of the subset of documents corrisponding to the specified pivotValue + * + * @param base the set of documents to evalute relative to + * @param field the field type used by the pivotValue + * @param pivotValue String representation of the value, may be null (ie: "missing") + */ + private int getSubsetSize(DocSet base, SchemaField field, String pivotValue) throws IOException { + FieldType ft = field.getType(); + if ( null == pivotValue ) { + Query query = ft.getRangeQuery(null, field, null, null, false, false); + DocSet hasVal = searcher.getDocSet(query); + return base.andNotSize(hasVal); + } else { + Query query = ft.getFieldQuery(null, field, pivotValue); + return searcher.numDocs(query, base); + } + } + + /** + * Given a base docset, computes the subset of documents corrisponding to the specified pivotValue + * + * @param base the set of documents to evalute relative to + * @param field the field type used by the pivotValue + * @param pivotValue String representation of the value, may be null (ie: "missing") + */ + private DocSet getSubset(DocSet base, SchemaField field, String pivotValue) throws IOException { + FieldType ft = field.getType(); + if ( null == pivotValue ) { + Query query = ft.getRangeQuery(null, field, null, null, false, false); + DocSet hasVal = searcher.getDocSet(query); + return base.andNot(hasVal); + } else { + Query query = ft.getFieldQuery(null, field, pivotValue); + return searcher.getDocSet(query, base); + } + } + + private int getMinCountForField(String fieldname){ + return params.getFieldInt(fieldname, FacetParams.FACET_PIVOT_MINCOUNT, 1); + } + +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetValue.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetValue.java new file mode 100644 index 00000000000..69e5de6375a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetValue.java @@ -0,0 +1,206 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.BitSet; +import java.util.Date; +import java.util.List; +import java.util.Locale; + +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.schema.TrieDateField; +import org.apache.solr.util.PivotListEntry; + +/** + * Models a single (value, count) pair that will exist in the collection of values for a + * {@link PivotFacetField} parent. This PivotFacetValue may itself have a + * nested {@link PivotFacetField} child + * + * @see PivotFacetField + * @see PivotFacetFieldValueCollection + */ +@SuppressWarnings("rawtypes") +public class PivotFacetValue { + + private final BitSet sourceShards = new BitSet(); + private final PivotFacetField parentPivot; + private final Comparable value; + // child can't be final, circular ref on construction + private PivotFacetField childPivot = null; + private int count; // mutable + + private PivotFacetValue(PivotFacetField parent, Comparable val) { + this.parentPivot = parent; + this.value = val; + } + + /** + * The value of the asssocated field modeled by this PivotFacetValue. + * May be null if this PivotFacetValue models the count for docs + * "missing" the field value. + * + * @see FacetParams#FACET_MISSING + */ + public Comparable getValue() { return value; } + + /** The count corrisponding to the value modeled by this PivotFacetValue */ + public int getCount() { return count; } + + /** + * The {@link PivotFacetField} corrisponding to the nested child pivot for this + * PivotFacetValue. May be null if this object is the leaf of a pivot. + */ + public PivotFacetField getChildPivot() { return childPivot; } + + + /** + * A recursive method that walks up the tree of pivot fields/values to build + * a list of the String representations of the values that lead down to this + * PivotFacetValue. + * + * @return a mutable List of the pivot value Strings leading down to and including + * this pivot value, will never be null but may contain nulls + * @see PivotFacetField#getValuePath + */ + public List getValuePath() { + List out = parentPivot.getValuePath(); + + // Note: this code doesn't play nice with custom FieldTypes -- see SOLR-6330 + + if (null == value) { + out.add(null); + } else if (value instanceof Date) { + out.add(TrieDateField.formatExternal((Date) value)); + } else { + out.add(value.toString()); + } + return out; + } + + /** + * A recursive method to construct a new PivotFacetValue object from + * the contents of the {@link NamedList} provided by the specified shard, relative + * to the specified field. + * + * If the NamedList contains data for a child {@link PivotFacetField} + * that will be recursively built as well. + * + * @see PivotFacetField#createFromListOfNamedLists + * @param shardNumber the id of the shard that provided this data + * @param rb The response builder of the current request + * @param parentField the parent field in the current pivot associated with this value + * @param pivotData the data from the specified shard for this pivot value + */ + @SuppressWarnings("unchecked") + public static PivotFacetValue createFromNamedList(int shardNumber, ResponseBuilder rb, PivotFacetField parentField, NamedList pivotData) { + + Comparable pivotVal = null; + int pivotCount = 0; + List> childPivotData = null; + + for (int i = 0; i < pivotData.size(); i++) { + String key = pivotData.getName(i); + Object value = pivotData.getVal(i); + PivotListEntry entry = PivotListEntry.get(key); + + switch (entry) { + + case VALUE: + pivotVal = (Comparable)value; + break; + case FIELD: + assert parentField.field.equals(value) + : "Parent Field mismatch: " + parentField.field + "!=" + value; + break; + case COUNT: + pivotCount = (Integer)value; + break; + case PIVOT: + childPivotData = (List>)value; + break; + default: + throw new RuntimeException("PivotListEntry contains unaccounted for item: " + entry); + } + } + + PivotFacetValue newPivotFacet = new PivotFacetValue(parentField, pivotVal); + newPivotFacet.count = pivotCount; + newPivotFacet.sourceShards.set(shardNumber); + + newPivotFacet.childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, newPivotFacet, childPivotData); + + return newPivotFacet; + } + + /** + * A NON-Recursive method indicating if the specified shard has already + * contributed to the count for this value. + */ + public boolean shardHasContributed(int shardNum) { + return sourceShards.get(shardNum); + } + + /** + * A recursive method for generating a NamedList from this value suitable for + * including in a pivot facet response to the original distributed request. + * + * @see PivotFacetField#convertToListOfNamedLists + */ + public NamedList convertToNamedList() { + NamedList newList = new SimpleOrderedMap<>(); + newList.add(PivotListEntry.FIELD.getName(), parentPivot.field); + newList.add(PivotListEntry.VALUE.getName(), value); + newList.add(PivotListEntry.COUNT.getName(), count); + if (childPivot != null && childPivot.convertToListOfNamedLists() != null) { + newList.add(PivotListEntry.PIVOT.getName(), childPivot.convertToListOfNamedLists()); + } + return newList; + } + + /** + * Merges in the count contributions from the specified shard for each. + * This method is recursive if the shard data includes sub-pivots + * + * @see PivotFacetField#contributeFromShard + * @see PivotFacetField#createFromListOfNamedLists + */ + public void mergeContributionFromShard(int shardNumber, ResponseBuilder rb, NamedList value) { + assert null != value : "can't merge in null data"; + + if (!shardHasContributed(shardNumber)) { + sourceShards.set(shardNumber); + count += PivotFacetHelper.getCount(value); + } + + List> shardChildPivots = PivotFacetHelper.getPivots(value); + // sub pivot -- we may not have seen this yet depending on refinement + if (null == childPivot) { + childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, this, shardChildPivots); + } else { + childPivot.contributeFromShard(shardNumber, rb, shardChildPivots); + } + } + + public String toString(){ + return String.format(Locale.ROOT, "F:%s V:%s Co:%d Ch?:%s", + parentPivot.field, value, count, (this.childPivot !=null)); + } + +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java b/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java index 53e319aa7b6..87cf4e96cd7 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java @@ -39,6 +39,7 @@ public class ShardRequest { public final static int PURPOSE_GET_TERMS =0x400; public final static int PURPOSE_GET_TOP_GROUPS =0x800; public final static int PURPOSE_GET_MLT_RESULTS =0x1000; + public final static int PURPOSE_REFINE_PIVOT_FACETS =0x2000; public int purpose; // the purpose of this request diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java index 72932242c8b..4c19f21bfc5 100644 --- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java +++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java @@ -332,15 +332,45 @@ public class SimpleFacets { ENUM, FC, FCS; } + /** + * Term counts for use in pivot faceting that resepcts the appropriate mincount + * @see FacetParams#FACET_PIVOT_MINCOUNT + */ + public NamedList getTermCountsForPivots(String field, DocSet docs) throws IOException { + Integer mincount = params.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1); + return getTermCounts(field, mincount, docs); + } + + /** + * Term counts for use in field faceting that resepects the appropriate mincount + * + * @see FacetParams#FACET_MINCOUNT + */ public NamedList getTermCounts(String field) throws IOException { return getTermCounts(field, this.docs); } + /** + * Term counts for use in field faceting that resepects the appropriate mincount + * + * @see FacetParams#FACET_MINCOUNT + */ public NamedList getTermCounts(String field, DocSet base) throws IOException { + Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT); + return getTermCounts(field, mincount, base); + } + + /** + * Term counts for use in field faceting that resepcts the specified mincount - + * if mincount is null, the "zeros" param is consulted for the appropriate backcompat + * default + * + * @see FacetParams#FACET_ZEROS + */ + private NamedList getTermCounts(String field, Integer mincount, DocSet base) throws IOException { int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0); int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100); if (limit == 0) return new NamedList<>(); - Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT); if (mincount==null) { Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS); // mincount = (zeros!=null && zeros) ? 0 : 1; @@ -554,7 +584,8 @@ public class SimpleFacets { try { NamedList result = new SimpleOrderedMap<>(); if(termList != null) { - result.add(workerKey, getListedTermCounts(workerFacetValue, termList, workerBase)); + List terms = StrUtils.splitSmart(termList, ",", true); + result.add(workerKey, getListedTermCounts(workerFacetValue, workerBase, terms)); } else { result.add(workerKey, getTermCounts(workerFacetValue, workerBase)); } @@ -597,13 +628,25 @@ public class SimpleFacets { } + /** + * Computes the term->count counts for the specified termList relative to the + * @param field the name of the field to compute term counts against + * @param termList a comma seperated (and backslash escaped) list of term values (in the specified field) to compute the counts for + * @see StrUtils#splitSmart + */ private NamedList getListedTermCounts(String field, String termList) throws IOException { - return getListedTermCounts(field, termList, this.docs); + List terms = StrUtils.splitSmart(termList, ",", true); + return getListedTermCounts(field, this.docs, terms); } - private NamedList getListedTermCounts(String field, String termList, DocSet base) throws IOException { + /** + * Computes the term->count counts for the specified term values relative to the + * @param field the name of the field to compute term counts against + * @param base the docset to compute term counts relative to + * @param terms a list of term values (in the specified field) to compute the counts for + */ + protected NamedList getListedTermCounts(String field, DocSet base, List terms) throws IOException { FieldType ft = searcher.getSchema().getFieldType(field); - List terms = StrUtils.splitSmart(termList, ",", true); NamedList res = new NamedList<>(); for (String term : terms) { String internal = ft.toInternal(term); diff --git a/solr/core/src/java/org/apache/solr/util/PivotListEntry.java b/solr/core/src/java/org/apache/solr/util/PivotListEntry.java new file mode 100644 index 00000000000..4fd2b1768d5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/util/PivotListEntry.java @@ -0,0 +1,62 @@ +package org.apache.solr.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Locale; + +/** + * Enum for modeling the elements of a (nested) pivot entry as expressed in a NamedList + */ +public enum PivotListEntry { + + FIELD(0), + VALUE(1), + COUNT(2), + PIVOT(3); + + // we could just use the ordinal(), but safer to be very explicit + private final int index; + + private PivotListEntry(int index) { + this.index = index; + } + + /** + * Case-insensitive lookup of PivotListEntry by name + * @see #getName + */ + public static PivotListEntry get(String name) { + return PivotListEntry.valueOf(name.toUpperCase(Locale.ROOT)); + } + + /** + * Name of this entry when used in response + * @see #get + */ + public String getName() { + return name().toLowerCase(Locale.ROOT); + } + + /** + * Indec of this entry when used in response + */ + public int getIndex() { + return index; + } + +} diff --git a/solr/core/src/test/org/apache/solr/CursorPagingTest.java b/solr/core/src/test/org/apache/solr/CursorPagingTest.java index a9563f8b71b..aa711e4e967 100644 --- a/solr/core/src/test/org/apache/solr/CursorPagingTest.java +++ b/solr/core/src/test/org/apache/solr/CursorPagingTest.java @@ -31,7 +31,6 @@ import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.schema.TrieDateField; import org.apache.solr.search.CursorMark; //jdoc import org.noggit.ObjectBuilder; @@ -39,12 +38,10 @@ import org.noggit.ObjectBuilder; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.ArrayList; -import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Collection; import java.util.Collections; -import java.util.Locale; import java.util.Map; import java.util.UUID; @@ -619,11 +616,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 { return 0 != TestUtil.nextInt(random(), 0, 30); } - /** returns likely most (1/10) of the time, otherwise unlikely */ - private static Object skewed(Object likely, Object unlikely) { - return (0 == TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely; - } - /** * An immutable list of the fields in the schema that can be used for sorting, * deterministically random order. @@ -898,7 +890,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 { 1.0D / random().nextInt(37))); } if (useField()) { - doc.addField("str", skewed(randomUsableUnicodeString(), + doc.addField("str", skewed(randomXmlUsableUnicodeString(), TestUtil.randomSimpleString(random(), 1, 1))); } if (useField()) { @@ -908,8 +900,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 { doc.addField("bin", ByteBuffer.wrap(randBytes)); } if (useField()) { - doc.addField("date", skewed(randomDate(), - dateWithRandomSecondOn2010_10_31_at_10_31())); + doc.addField("date", skewed(randomDate(), randomSkewedDate())); } if (useField()) { doc.addField("uuid", UUID.randomUUID().toString()); @@ -949,28 +940,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 { } } - /** - * We want "realistic" unicode strings beyond simple ascii, but because our - * updates use XML we need to ensure we don't get "special" code block. - */ - private static String randomUsableUnicodeString() { - String result = TestUtil.randomRealisticUnicodeString(random()); - if (result.matches(".*\\p{InSpecials}.*")) { - // oh well - result = TestUtil.randomSimpleString(random()); - } - return result; - } - - private static String randomDate() { - return TrieDateField.formatExternal(new Date(random().nextLong())); - } - - private static String dateWithRandomSecondOn2010_10_31_at_10_31() { - return String.format(Locale.ROOT, "2010-10-31T10:31:%02d.000Z", - TestUtil.nextInt(random(), 0, 59)); - } - private static final String[] currencies = { "USD", "EUR", "NOK" }; public static String randomCurrency() { diff --git a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java index d67d0412620..750a9b544c0 100644 --- a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java +++ b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java @@ -61,6 +61,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { @Override public void doTest() throws Exception { + QueryResponse rsp = null; int backupStress = stress; // make a copy so we can restore @@ -174,6 +175,13 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { // a facet query to test out chars out of the ascii range query("q","*:*", "rows",0, "facet","true", "facet.query","{!term f=foo_s}international\u00ff\u01ff\u2222\u3333"); + + // simple field facet on date fields + rsp = query("q","*:*", "rows",0, "facet","true", "facet.field", tdate_a); + assertEquals(1, rsp.getFacetFields().size()); + rsp = query("q","*:*", "rows",0, "facet","true", + "facet.field", tdate_b, "facet.field", tdate_a); + assertEquals(2, rsp.getFacetFields().size()); // simple date facet on one field query("q","*:*", "rows",100, "facet","true", @@ -337,7 +345,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { q.set("q", "*:*"); q.set(ShardParams.SHARDS_INFO, true); setDistributedParams(q); - QueryResponse rsp = queryServer(q); + rsp = queryServer(q); NamedList sinfo = (NamedList) rsp.getResponse().get(ShardParams.SHARDS_INFO); String shards = getShardsString(); int cnt = StringUtils.countMatches(shards, ",")+1; diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java new file mode 100644 index 00000000000..0eab116e441 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java @@ -0,0 +1,530 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import org.apache.lucene.util.TestUtil; +import org.apache.solr.SolrTestCaseJ4.SuppressSSL; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.client.solrj.response.PivotField; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.schema.TrieDateField; + +import static org.apache.solr.common.params.FacetParams.*; + +import org.apache.commons.lang.StringUtils; + +import org.junit.BeforeClass; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Arrays; +import java.util.Set; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Date; + +/** + *

+ * Randomized testing of Pivot Faceting using SolrCloud. + *

+ *

+ * After indexing a bunch of random docs, picks some random fields to pivot facet on, + * and then confirms that the resulting counts match the results of filtering on those + * values. This gives us strong assertions on the correctness of the total counts for + * each pivot value, but no assertions that the correct "top" counts were chosen. + *

+ *

+ * NOTE: this test ignores the control collection and only deals with the + * CloudSolrServer - this is because the randomized field values make it very easy for + * the term stats to miss values even with the overrequest. + * (because so many values will tie for "1"). What we care about here is + * that the counts we get back are correct and match what we get when filtering on those + * constraints. + *

+ * + * + * + */ +@SuppressSSL // Too Slow +public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { + + public static Logger log = LoggerFactory.getLogger(TestCloudPivotFacet.class); + + // param used by test purely for tracing & validation + private static String TRACE_MIN = "_test_min"; + // param used by test purely for tracing & validation + private static String TRACE_MISS = "_test_miss"; + // param used by test purely for tracing & validation + private static String TRACE_SORT = "_test_sort"; + + /** + * Controls the odds of any given doc having a value in any given field -- as this gets lower, + * the counts for "facet.missing" pivots should increase. + * @see #useField() + */ + private static int useFieldRandomizedFactor = -1; + + @BeforeClass + public static void initUseFieldRandomizedFactor() { + useFieldRandomizedFactor = TestUtil.nextInt(random(), 2, 30); + log.info("init'ing useFieldRandomizedFactor = {}", useFieldRandomizedFactor); + } + + @Override + public void doTest() throws Exception { + handle.clear(); + handle.put("QTime", SKIPVAL); + handle.put("timestamp", SKIPVAL); + + final Set fieldNameSet = new HashSet<>(); + + // build up a randomized index + final int numDocs = atLeast(500); + log.info("numDocs: {}", numDocs); + + for (int i = 1; i <= numDocs; i++) { + SolrInputDocument doc = buildRandomDocument(i); + + // not efficient, but it garuntees that even if people change buildRandomDocument + // we'll always have the full list of fields w/o needing to keep code in sync + fieldNameSet.addAll(doc.getFieldNames()); + + cloudClient.add(doc); + } + cloudClient.commit(); + + fieldNameSet.remove("id"); + assertTrue("WTF, bogus field exists?", fieldNameSet.add("bogus_not_in_any_doc_s")); + + final String[] fieldNames = fieldNameSet.toArray(new String[fieldNameSet.size()]); + Arrays.sort(fieldNames); // need determinism for buildRandomPivot calls + + + for (int i = 0; i < 5; i++) { + + String q = "*:*"; + if (random().nextBoolean()) { + q = "id:[* TO " + TestUtil.nextInt(random(),300,numDocs) + "]"; + } + ModifiableSolrParams baseP = params("rows", "0", "q", q); + + if (random().nextBoolean()) { + baseP.add("fq", "id:[* TO " + TestUtil.nextInt(random(),200,numDocs) + "]"); + } + + ModifiableSolrParams pivotP = params(FACET,"true", + FACET_PIVOT, buildRandomPivot(fieldNames)); + if (random().nextBoolean()) { + pivotP.add(FACET_PIVOT, buildRandomPivot(fieldNames)); + } + + // keep limit low - lots of unique values, and lots of depth in pivots + pivotP.add(FACET_LIMIT, ""+TestUtil.nextInt(random(),1,17)); + + // sometimes use an offset + if (random().nextBoolean()) { + pivotP.add(FACET_OFFSET, ""+TestUtil.nextInt(random(),0,7)); + } + + if (random().nextBoolean()) { + String min = ""+TestUtil.nextInt(random(),0,numDocs+10); + pivotP.add(FACET_PIVOT_MINCOUNT, min); + // trace param for validation + baseP.add(TRACE_MIN, min); + } + + if (random().nextBoolean()) { + String missing = ""+random().nextBoolean(); + pivotP.add(FACET_MISSING, missing); + // trace param for validation + baseP.add(TRACE_MISS, missing); + } + + if (random().nextBoolean()) { + String sort = random().nextBoolean() ? "index" : "count"; + pivotP.add(FACET_SORT, sort); + // trace param for validation + baseP.add(TRACE_SORT, sort); + } + + // overrequest + // + // NOTE: since this test focuses on accuracy of refinement, and doesn't do + // control collection comparisons, there isn't a lot of need for excessive + // overrequesting -- we focus here on trying to exercise the various edge cases + // involved as different values are used with overrequest + if (0 == TestUtil.nextInt(random(),0,4)) { + // we want a decent chance of no overrequest at all + pivotP.add(FACET_OVERREQUEST_COUNT, "0"); + pivotP.add(FACET_OVERREQUEST_RATIO, "0"); + } else { + if (random().nextBoolean()) { + pivotP.add(FACET_OVERREQUEST_COUNT, ""+TestUtil.nextInt(random(),0,5)); + } + if (random().nextBoolean()) { + // sometimes give a ratio less then 1, code should be smart enough to deal + float ratio = 0.5F + random().nextFloat(); + // sometimes go negative + if (random().nextBoolean()) { + ratio *= -1; + } + pivotP.add(FACET_OVERREQUEST_RATIO, ""+ratio); + } + } + + assertPivotCountsAreCorrect(baseP, pivotP); + } + } + + /** + * Given some query params, executes the request against the cloudClient and + * then walks the pivot facet values in the response, treating each one as a + * filter query to assert the pivot counts are correct. + */ + private void assertPivotCountsAreCorrect(SolrParams baseParams, + SolrParams pivotParams) + throws SolrServerException { + + SolrParams initParams = SolrParams.wrapAppended(pivotParams, baseParams); + + log.info("Doing full run: {}", initParams); + countNumFoundChecks = 0; + + NamedList> pivots = null; + try { + QueryResponse initResponse = cloudClient.query(initParams); + pivots = initResponse.getFacetPivot(); + assertNotNull(initParams + " has null pivots?", pivots); + assertEquals(initParams + " num pivots", + initParams.getParams("facet.pivot").length, pivots.size()); + } catch (Exception e) { + throw new RuntimeException("init query failed: " + initParams + ": " + + e.getMessage(), e); + } + try { + for (Map.Entry> pivot : pivots) { + final String pivotKey = pivot.getKey(); + // :HACK: for counting the max possible pivot depth + final int maxDepth = 1 + pivotKey.length() - pivotKey.replace(",","").length(); + + assertTraceOk(pivotKey, baseParams, pivot.getValue()); + + // NOTE: we can't make any assumptions/assertions about the number of + // constraints here because of the random data - which means if pivotting is + // completely broken and there are no constrains this loop could be a No-Op + // but in that case we just have to trust that DistributedFacetPivotTest + // will catch it. + for (PivotField constraint : pivot.getValue()) { + int depth = assertPivotCountsAreCorrect(pivotKey, baseParams, constraint); + + // we can't assert that the depth reached is the same as the depth requested + // because the fq and/or mincount may have pruned the tree too much + assertTrue("went too deep: "+depth+": " + pivotKey + " ==> " + pivot, + depth <= maxDepth); + + } + } + } catch (AssertionError e) { + throw new AssertionError(initParams + " ==> " + e.getMessage(), e); + } finally { + log.info("Ending full run (countNumFoundChecks={}): {}", + countNumFoundChecks, initParams); + } + } + + /** + * Recursive Helper method for asserting that pivot constraint counds match + * results when filtering on those constraints. Returns the recursive depth reached + * (for sanity checking) + */ + private int assertPivotCountsAreCorrect(String pivotName, + SolrParams baseParams, + PivotField constraint) + throws SolrServerException { + + SolrParams p = SolrParams.wrapAppended(baseParams, + params("fq", buildFilter(constraint))); + List subPivots = null; + try { + assertNumFound(pivotName, constraint.getCount(), p); + subPivots = constraint.getPivot(); + } catch (Exception e) { + throw new RuntimeException(pivotName + ": count query failed: " + p + ": " + + e.getMessage(), e); + } + int depth = 0; + if (null != subPivots) { + assertTraceOk(pivotName, baseParams, subPivots); + + for (PivotField subPivot : subPivots) { + depth = assertPivotCountsAreCorrect(pivotName, p, subPivot); + } + } + return depth + 1; + } + + /** + * Verify that the PivotFields we're lookin at doesn't violate any of the expected + * behaviors based on the TRACE_* params found in the base params + */ + private void assertTraceOk(String pivotName, SolrParams baseParams, List constraints) { + if (null == constraints || 0 == constraints.size()) { + return; + } + final int maxIdx = constraints.size() - 1; + + final int min = baseParams.getInt(TRACE_MIN, -1); + final boolean expectMissing = baseParams.getBool(TRACE_MISS, false); + final boolean checkCount = "count".equals(baseParams.get(TRACE_SORT, "count")); + + int prevCount = Integer.MAX_VALUE; + + for (int i = 0; i <= maxIdx; i++) { + final PivotField constraint = constraints.get(i); + final int count = constraint.getCount(); + + if (0 < min) { + assertTrue(pivotName + ": val #"+i +" of " + maxIdx + + ": count("+count+") < facet.mincount("+min+"): " + constraint, + min <= count); + } + // missing value must always come last, but only if facet.missing was used + // and may not exist at all (mincount, none missing for this sub-facet, etc...) + if ((i < maxIdx) || (!expectMissing)) { + assertNotNull(pivotName + ": val #"+i +" of " + maxIdx + + " has null value: " + constraint, + constraint.getValue()); + } + // if we are expecting count based sort, then the count of each constraint + // must be lt-or-eq the count that came before -- or it must be the last value and + // be "missing" + if (checkCount) { + assertTrue(pivotName + ": val #"+i +" of" + maxIdx + + ": count("+count+") > prevCount("+prevCount+"): " + constraint, + ((count <= prevCount) + || (expectMissing && i == maxIdx && null == constraint.getValue()))); + prevCount = count; + } + } + } + + /** + * Given a PivotField constraint, generate a query for the field+value + * for use in an fq to verify the constraint count + */ + private static String buildFilter(PivotField constraint) { + Object value = constraint.getValue(); + if (null == value) { + // facet.missing, exclude any indexed term + return "-" + constraint.getField() + ":[* TO *]"; + } + // otherwise, build up a term filter... + String prefix = "{!term f=" + constraint.getField() + "}"; + if (value instanceof Date) { + return prefix + TrieDateField.formatExternal((Date)value); + } else { + return prefix + value; + } + } + + + /** + * Creates a random facet.pivot param string using some of the specified fieldNames + */ + private static String buildRandomPivot(String[] fieldNames) { + final int depth = TestUtil.nextInt(random(), 1, 3); + String [] fields = new String[depth]; + for (int i = 0; i < depth; i++) { + // yes this means we might use the same field twice + // makes it a robust test (especially for multi-valued fields) + fields[i] = fieldNames[TestUtil.nextInt(random(),0,fieldNames.length-1)]; + } + return StringUtils.join(fields, ","); + } + + + /** + * Creates a document with randomized field values, some of which be missing values, + * some of which will be multi-valued (per the schema) and some of which will be + * skewed so that small subsets of the ranges will be more common (resulting in an + * increased likelihood of duplicate values) + * + * @see #buildRandomPivot + */ + private static SolrInputDocument buildRandomDocument(int id) { + SolrInputDocument doc = sdoc("id", id); + // most fields are in most docs + // if field is in a doc, then "skewed" chance val is from a dense range + // (hopefully with lots of duplication) + for (String prefix : new String[] { "pivot_i", "pivot_ti" }) { + if (useField()) { + doc.addField(prefix+"1", skewed(TestUtil.nextInt(random(), 20, 50), + random().nextInt())); + + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, skewed(TestUtil.nextInt(random(), 20, 50), + random().nextInt())); + } + } + } + for (String prefix : new String[] { "pivot_l", "pivot_tl" }) { + if (useField()) { + doc.addField(prefix+"1", skewed(TestUtil.nextInt(random(), 5000, 5100), + random().nextLong())); + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, skewed(TestUtil.nextInt(random(), 5000, 5100), + random().nextLong())); + } + } + } + for (String prefix : new String[] { "pivot_f", "pivot_tf" }) { + if (useField()) { + doc.addField(prefix+"1", skewed(1.0F / random().nextInt(13), + random().nextFloat() * random().nextInt())); + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, skewed(1.0F / random().nextInt(13), + random().nextFloat() * random().nextInt())); + } + } + } + for (String prefix : new String[] { "pivot_d", "pivot_td" }) { + if (useField()) { + doc.addField(prefix+"1", skewed(1.0D / random().nextInt(19), + random().nextDouble() * random().nextInt())); + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, skewed(1.0D / random().nextInt(19), + random().nextDouble() * random().nextInt())); + } + } + } + for (String prefix : new String[] { "pivot_dt", "pivot_tdt" }) { + if (useField()) { + doc.addField(prefix+"1", skewed(randomSkewedDate(), randomDate())); + + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, skewed(randomSkewedDate(), randomDate())); + + } + } + } + { + String prefix = "pivot_b"; + if (useField()) { + doc.addField(prefix+"1", random().nextBoolean() ? "t" : "f"); + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, random().nextBoolean() ? "t" : "f"); + } + } + } + for (String prefix : new String[] { "pivot_x_s", "pivot_y_s", "pivot_z_s"}) { + if (useField()) { + doc.addField(prefix+"1", skewed(TestUtil.randomSimpleString(random(), 1, 1), + randomXmlUsableUnicodeString())); + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, skewed(TestUtil.randomSimpleString(random(), 1, 1), + randomXmlUsableUnicodeString())); + } + } + } + + // + // for the remaining fields, make every doc have a value in a dense range + // + + for (String prefix : new String[] { "dense_pivot_x_s", "dense_pivot_y_s" }) { + if (useField()) { + doc.addField(prefix+"1", TestUtil.randomSimpleString(random(), 1, 1)); + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, TestUtil.randomSimpleString(random(), 1, 1)); + } + } + } + for (String prefix : new String[] { "dense_pivot_i", "dense_pivot_ti" }) { + if (useField()) { + doc.addField(prefix+"1", TestUtil.nextInt(random(), 20, 50)); + } + if (useField()) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + doc.addField(prefix, TestUtil.nextInt(random(), 20, 50)); + } + } + } + + return doc; + } + + /** + * Similar to usually() but we want it to happen just as often regardless + * of test multiplier and nightly status + * + * @see #useFieldRandomizedFactor + */ + private static boolean useField() { + assert 0 < useFieldRandomizedFactor; + return 0 != TestUtil.nextInt(random(), 0, useFieldRandomizedFactor); + } + + /** + * Asserts the number of docs matching the SolrParams aganst the cloudClient + */ + private void assertNumFound(String msg, int expected, SolrParams p) + throws SolrServerException { + + countNumFoundChecks++; + + SolrParams params = SolrParams.wrapDefaults(params("rows","0"), p); + assertEquals(msg + ": " + params, + expected, cloudClient.query(params).getResults().getNumFound()); + } + + /** + * @see #assertNumFound + * @see #assertPivotCountsAreCorrect(SolrParams,SolrParams) + */ + private int countNumFoundChecks = 0; +} diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java new file mode 100644 index 00000000000..31e23339fa0 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java @@ -0,0 +1,762 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Date; +import java.util.List; +import java.io.IOException; + +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.PivotField; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.SolrParams; + +import junit.framework.AssertionFailedError; + +public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCase { + + public static final String SPECIAL = ""; + + public DistributedFacetPivotLargeTest() { + this.fixShardCount = true; + this.shardCount = 4; // we leave one empty as an edge case + } + + @Override + public void doTest() throws Exception { + this.stress = 0 ; + handle.clear(); + handle.put("QTime", SKIPVAL); + handle.put("timestamp", SKIPVAL); + handle.put("maxScore", SKIPVAL); + + setupDistributedPivotFacetDocuments(); + + QueryResponse rsp = null; + + List pivots = null; + PivotField firstInt = null; + PivotField firstBool = null; + PivotField firstDate = null; + PivotField firstPlace = null; + PivotField firstCompany = null; + + // basic check w/ limit & default sort (count) + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","place_s,company_t", + FacetParams.FACET_LIMIT, "12"); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(12, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0)); + // Microsoft will come back wrong if refinement was not done correctly + assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1)); + + // trivial mincount=0 check + rsp = query( "q", "does_not_exist_s:foo", + "rows", "0", + "facet","true", + "facet.pivot","company_t", + FacetParams.FACET_LIMIT, "10", + FacetParams.FACET_PIVOT_MINCOUNT,"0"); + pivots = rsp.getFacetPivot().get("company_t"); + assertEquals(10, pivots.size()); + for (PivotField p : pivots) { + assertEquals(0, p.getCount()); + } + + // sanity check limit=0 w/ mincount=0 & missing=true + // + // SOLR-6328: doesn't work for single node, so can't work for distrib either (yet) + // + // PivotFacetField's init of needRefinementAtThisLevel as needing potential change + // + // rsp = query( "q", "*:*", + // "rows", "0", + // "facet","true", + // "f.company_t.facet.limit", "10", + // "facet.pivot","special_s,bogus_s,company_t", + // "facet.missing", "true", + // FacetParams.FACET_LIMIT, "0", + // FacetParams.FACET_PIVOT_MINCOUNT,"0"); + // pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t"); + // assertEquals(1, pivots.size()); // only the missing + // assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s + // assertEquals(pivots.toString(), 1, pivots.get(0).getPivot()); + // assertPivot("bogus_s", null, docNumber, pivots.get(0).getPivot().get(0)); + // // TODO: some asserts on company results + + // basic check w/ default sort, limit, & mincount==0 + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","place_s,company_t", + FacetParams.FACET_LIMIT, "50", + FacetParams.FACET_PIVOT_MINCOUNT,"0"); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(50, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0)); + // Microsoft will come back wrong if refinement was not done correctly + assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1)); + + // sort=index + offset + limit w/ some variables + for (SolrParams variableParams : + new SolrParams[] { // bother variations should kwrk just as well + // defauts + params(), + // force refinement + params(FacetParams.FACET_OVERREQUEST_RATIO, "1", + FacetParams.FACET_OVERREQUEST_COUNT, "0") }) { + + SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*", + "rows", "0", + "facet","true", + "facet.sort","index", + "f.place_s.facet.limit", "20", + "f.place_s.facet.offset", "40", + "facet.pivot", "place_s,company_t"), + variableParams ); + + try { + rsp = query( p ); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(20, pivots.size()); // limit + for (int i = 0; i < 10; i++) { + PivotField place = pivots.get(i); + assertTrue(place.toString(), place.getValue().toString().endsWith("placeholder")); + assertEquals(3, place.getPivot().size()); + assertPivot("company_t", "bbc", 6, place.getPivot().get(0)); + assertPivot("company_t", "microsoft", 6, place.getPivot().get(1)); + assertPivot("company_t", "polecat", 6, place.getPivot().get(2)); + } + assertPivot("place_s", "cardiff", 257, pivots.get(10)); + assertPivot("place_s", "krakaw", 1, pivots.get(11)); + assertPivot("place_s", "medical staffing network holdings, inc.", 51, pivots.get(12)); + for (int i = 13; i < 20; i++) { + PivotField place = pivots.get(i); + assertTrue(place.toString(), place.getValue().toString().startsWith("placeholder")); + assertEquals(1, place.getPivot().size()); + PivotField company = place.getPivot().get(0); + assertTrue(company.toString(), company.getValue().toString().startsWith("compholder")); + assertEquals(company.toString(), 1, company.getCount()); + } + } catch (AssertionFailedError ae) { + throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); + } + } + + // sort=index + mincount=0 + // + // SOLR-6329: facet.pivot.mincount=0 doesn't work well with distrib + // + // broken honda + // + // This is tricky, here's what i think is happening.... + // - "company:honda" only exists on twoShard, and only w/ "place:cardiff" + // - twoShard has no other places in it's docs + // - twoShard can't return any other places to w/ honda as a count=0 sub-value + // - if we refined all other companies places, would twoShard return honda==0 ? + // ... but there's no refinement since mincount==0 + // - would it even matter + // + // should we remove the refinement short circut? + // + // rsp = query( params( "q", "*:*", + // "rows", "0", + // "facet","true", + // "facet.sort","index", + // "f.place_s.facet.limit", "20", + // "f.place_s.facet.offset", "40", + // FacetParams.FACET_PIVOT_MINCOUNT,"0", + // "facet.pivot", "place_s,company_t") ); + // // TODO: more asserts + // + // + // really trivial demonstration of the above problem + // + // rsp = query( params( "q", "*:*", + // "rows", "0", + // "facet","true", + // FacetParams.FACET_PIVOT_MINCOUNT,"0", + // "facet.pivot", "top_s,sub_s") ); + + // basic check w/ limit & index sort + for (SolrParams facetParams : + // results should be the same regardless of wether local params are used + new SolrParams[] { + // Broken: SOLR-6193 + // params("facet.pivot","{!facet.limit=4 facet.sort=index}place_s,company_t"), + // params("facet.pivot","{!facet.sort=index}place_s,company_t", + // FacetParams.FACET_LIMIT, "4"), + params("facet.pivot","place_s,company_t", + FacetParams.FACET_LIMIT, "4", + "facet.sort", "index") }) { + SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*", + "rows", "0", + "facet","true"), + facetParams ); + try { + rsp = query( p ); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(4, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "0placeholder", 6, firstPlace); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 6, firstCompany); + } catch (AssertionFailedError ae) { + throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); + } + } + + // Pivot Faceting (combined wtih Field Faceting) + for (SolrParams facetParams : + // with and w/o an excluded fq + // (either way, facet results should be the same) + new SolrParams[] { + params("facet.pivot","place_s,company_t", + "facet.field","place_s"), + params("facet.pivot","{!ex=ok}place_s,company_t", + "facet.field","{!ex=ok}place_s", + "fq","{!tag=ok}place_s:cardiff"), + params("facet.pivot","{!ex=pl,co}place_s,company_t", + "fq","{!tag=pl}place_s:cardiff", + "fq","{!tag=co}company_t:bbc") }) { + + // default order (count) + rsp = query( SolrParams.wrapDefaults(params("q", "*:*", + "rows", "0", + "facet","true", + FacetParams.FACET_LIMIT, "4"), + facetParams) ); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(4, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(4, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 101, firstCompany); + + // Index Order + rsp = query( SolrParams.wrapDefaults(params("q", "*:*", + "rows", "0", + "facet","true", + FacetParams.FACET_LIMIT, "4", + "facet.sort", "index"), + facetParams) ); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(4, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "0placeholder", 6, firstPlace); + assertEquals(3, firstPlace.getPivot().size()); // num vals in data < limit==3 + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 6, firstCompany); + + // Field level limits + rsp = query( SolrParams.wrapDefaults(params("q", "*:*", + "rows", "0", + "facet","true", + "f.place_s.facet.limit","2", + "f.company_t.facet.limit","4"), + facetParams) ); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(2, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(4, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 101, firstCompany); + } + + // Pivot Faceting Count w/fq (not excluded) + rsp = query( "q", "*:*", + "rows", "0", + "fq","place_s:cardiff", + "facet","true", + "facet.pivot","place_s,company_t", + FacetParams.FACET_LIMIT, "4"); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(1, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(4, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 101, firstCompany); + + + // Same Pivot - one with exclusion and one w/o + rsp = query( "q", "*:*", + "rows", "0", + "fq","{!tag=ff}pay_i:[2000 TO *]", + "facet","true", + "facet.pivot","{!key=filt}place_s,company_t", + "facet.pivot","{!key=nofilt ex=ff}place_s,company_t", + FacetParams.FACET_LIMIT, "4"); + pivots = rsp.getFacetPivot().get("filt"); + assertEquals(4, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 105, firstPlace); + assertEquals(4, firstPlace.getPivot().size()); + assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0)); + assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1)); + // + pivots = rsp.getFacetPivot().get("nofilt"); + assertEquals(4, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(4, firstPlace.getPivot().size()); + assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0)); + assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1)); + + // Same Pivot - one in default (count) order and one in index order + // + // Broken: SOLR-6193 - the facet.sort localparam isn't being picked up correctly + // + // rsp = query( "q", "*:*", + // "rows", "0", + // "facet","true", + // "fq","pay_i:[2000 TO *]", + // "facet.pivot","{!key=sc}place_s,company_t", + // "facet.pivot","{!key=si facet.sort=index}place_s,company_t", + // FacetParams.FACET_LIMIT, "4"); + // pivots = rsp.getFacetPivot().get("sc"); + // assertEquals(4, pivots.size()); + // firstPlace = pivots.get(0); + // assertPivot("place_s", "cardiff", 105, firstPlace); + // assertEquals(4, firstPlace.getPivot().size()); + // assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0)); + // assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1)); + // // + // pivots = rsp.getFacetPivot().get("si"); + // assertEquals(4, pivots.size()); + // firstPlace = pivots.get(0); + // assertPivot("place_s", "0placeholder", 6, firstPlace); + // assertEquals(3, firstPlace.getPivot().size()); // only 3 in the data < facet.limit + // assertPivot("company_t", "bbc", 6, firstPlace.getPivot().get(0)); + // assertPivot("company_t", "microsoft", 6, firstPlace.getPivot().get(1)); + + + // Field level limits and small offset + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","place_s,company_t", + "f.place_s.facet.limit","2", + "f.company_t.facet.limit","4", + "facet.offset","1"); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(2, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace); + assertEquals(2, firstPlace.getPivot().size()); // num vals in data < limit==4 + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 50, firstCompany); + + + // Field level offsets and limit + rsp = query( "q", "*:*", + "rows", "0", + "fq","{!tag=pl}place_s:cardiff", + "facet","true", + "facet.pivot","{!ex=pl}place_s,company_t", + "f.place_s.facet.offset","1", + "f.company_t.facet.offset","2", + FacetParams.FACET_LIMIT, "4"); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(4, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace); + assertEquals(1, firstPlace.getPivot().size()); // num vals in data < limit==4 + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "polecat", 50, firstCompany); + + + // datetime + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","hiredate_dt,place_s,company_t", + "f.hiredate_dt.facet.limit","2", + "f.hiredate_dt.facet.offset","1", + FacetParams.FACET_LIMIT, "4"); + pivots = rsp.getFacetPivot().get("hiredate_dt,place_s,company_t"); + assertEquals(2, pivots.size()); + firstDate = pivots.get(0); // 2012-09-01T12:30:00Z + assertPivot("hiredate_dt", new Date(1346502600000L), 200, firstDate); + assertEquals(1, firstDate.getPivot().size()); // num vals in data < limit==4 + firstPlace = firstDate.getPivot().get(0); + assertPivot("place_s", "cardiff", 200, firstPlace); + assertEquals(4, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 50, firstCompany); + + // int + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","pay_i,place_s,company_t", + "f.pay_i.facet.limit","2", + "f.pay_i.facet.offset","1", + FacetParams.FACET_LIMIT, "4"); + pivots = rsp.getFacetPivot().get("pay_i,place_s,company_t"); + assertEquals(2, pivots.size()); + firstInt = pivots.get(0); + assertPivot("pay_i", 2000, 50, firstInt); + assertEquals(4, firstInt.getPivot().size()); + firstPlace = firstInt.getPivot().get(0); + assertPivot("place_s", "0placeholder", 1, firstPlace); + assertEquals(3, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 1, firstCompany); + + // boolean + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","real_b,place_s,company_t", + "f.real_b.facet.missing","true", + "f.real_b.facet.limit","2", + FacetParams.FACET_LIMIT, "4"); + pivots = rsp.getFacetPivot().get("real_b,place_s,company_t"); + assertEquals(3, pivots.size()); + firstBool = pivots.get(0); + assertPivot("real_b", false, 300, firstBool); + assertEquals(4, firstBool.getPivot().size()); + firstPlace = firstBool.getPivot().get(0); + assertPivot("place_s", "0placeholder", 6, firstPlace); + assertEquals(3, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 6, firstCompany); + + // bogus fields + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","doesntexist_t,neitherdoi_i", + FacetParams.FACET_LIMIT, "4"); + pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i"); + assertEquals(0, pivots.size()); + + // bogus fields with facet.missing + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","doesntexist_t,neitherdoi_i", + "facet.missing", "true", + FacetParams.FACET_LIMIT, "4"); + pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i"); + assertEquals(1, pivots.size()); + assertPivot("doesntexist_t", null, docNumber, pivots.get(0)); + assertEquals(1, pivots.get(0).getPivot().size()); + assertPivot("neitherdoi_i", null, docNumber, pivots.get(0).getPivot().get(0)); + + // Negative facet limit + for (SolrParams facetParams : + // results should be the same regardless of wether facet.limit is global, + // a local param, or specified as a per-field override for both fields + new SolrParams[] { + params(FacetParams.FACET_LIMIT, "-1", + "facet.pivot","place_s,company_t"), + // Broken: SOLR-6193 + // params("facet.pivot","{!facet.limit=-1}place_s,company_t"), + params("f.place_s.facet.limit", "-1", + "f.company_t.facet.limit", "-1", + "facet.pivot","place_s,company_t") }) { + + SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*", + "rows", "0", + "facet","true", + "facet.sort", "count" ), + facetParams); + try { + rsp = query( p ); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(103, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(54, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t","bbc", 101, firstCompany); + } catch (AssertionFailedError ae) { + throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); + } + } + + // Negative per-field facet limit (outer) + for (SolrParams facetParams : + // results should be the same regardless of wether per-field facet.limit is + // a global or a local param + new SolrParams[] { + // Broken: SOLR-6193 + // params( "facet.pivot","{!f.id.facet.limit=-1}place_s,id" ), + params( "facet.pivot","place_s,id", + "f.id.facet.limit", "-1") }) { + + SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*", + "rows", "0", + "facet","true", + "facet.sort", "count" ), + facetParams); + try { + rsp = query( p ); + pivots = rsp.getFacetPivot().get("place_s,id"); + assertEquals(100, pivots.size()); // default + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(257, firstPlace.getPivot().size()); + } catch (AssertionFailedError ae) { + throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); + } + } + + // Negative per-field facet limit (inner) + for (SolrParams facetParams : + // results should be the same regardless of wether per-field facet.limit is + // a global or a local param + new SolrParams[] { + // Broken: SOLR-6193 + // params( "facet.pivot","{!f.place_s.facet.limit=-1}place_s,id" ), + params( "facet.pivot","place_s,id", + "f.place_s.facet.limit", "-1") }) { + + SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*", + "rows", "0", + "facet","true", + "facet.sort", "count" ), + facetParams); + try { + rsp = query( p ); + pivots = rsp.getFacetPivot().get("place_s,id"); + assertEquals(103, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(100, firstPlace.getPivot().size()); // default + } catch (AssertionFailedError ae) { + throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); + } + } + + // Mincount + facet.pivot 2 different ways (swap field order) + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","place_s,company_t", + "facet.pivot","company_t,place_s", + FacetParams.FACET_PIVOT_MINCOUNT,"6"); + pivots = rsp.getFacetPivot().get("place_s,company_t"); + assertEquals(52, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(4, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "bbc", 101, firstCompany); + // + pivots = rsp.getFacetPivot().get("company_t,place_s"); + assertEquals(4, pivots.size()); + firstCompany = pivots.get(0); + assertPivot("company_t", "bbc", 451, firstCompany); + assertEquals(52, firstCompany.getPivot().size()); + firstPlace = firstCompany.getPivot().get(0); + assertPivot("place_s", "cardiff", 101, firstPlace); + + // refine on SPECIAL empty string + rsp = query( "q", "*:*", + "fq", "-place_s:0placeholder", + "rows", "0", + "facet","true", + "facet.limit","1", + FacetParams.FACET_OVERREQUEST_RATIO, "0", // force refinement + FacetParams.FACET_OVERREQUEST_COUNT, "1", // force refinement + "facet.pivot","special_s,company_t"); + assertEquals(docNumber - 6, rsp.getResults().getNumFound()); // all docs but 0place + pivots = rsp.getFacetPivot().get("special_s,company_t"); + assertEquals(1, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("special_s", SPECIAL, 3, firstPlace); + assertEquals(1, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "microsoft", 2, firstCompany); + + // TODO test "company_t,special_s" as well + + + // refine on SPECIAL empty string & facet.missing + // Also proves refinement on non-top elements occurs and allows them to get into the top + rsp = query( "q", "*:*", + "fq", "-place_s:0placeholder", + "rows", "0", + "facet","true", + "facet.limit","1", + "facet.missing","true", + FacetParams.FACET_OVERREQUEST_RATIO, "0", // force refinement + FacetParams.FACET_OVERREQUEST_COUNT, "2", // force refinement + "facet.pivot","special_s,company_t"); + assertEquals(docNumber - 6, rsp.getResults().getNumFound()); // all docs but 0place + pivots = rsp.getFacetPivot().get("special_s,company_t"); + assertEquals(2, pivots.size()); + firstPlace = pivots.get(0); + assertPivot("special_s", SPECIAL, 3, firstPlace); + assertEquals(1, firstPlace.getPivot().size()); + firstCompany = firstPlace.getPivot().get(0); + assertPivot("company_t", "microsoft", 2, firstCompany); + // last is "missing" val + assertPivot("special_s", null, docNumber -6 -3 -2, pivots.get(1)); // -0place -SPECIAL -xxx + + // forced refinement on facet.missing + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "f.bogus_x_s.facet.missing","true", + "f.bogus_y_s.facet.missing","true", + "facet.pivot","bogus_x_s,place_s,bogus_y_s,company_t", + FacetParams.FACET_LIMIT, "12"); + pivots = rsp.getFacetPivot().get("bogus_x_s,place_s,bogus_y_s,company_t"); + assertEquals(1, pivots.size()); // just the missing value for bogus_x_s + assertPivot("bogus_x_s", null, docNumber, pivots.get(0)); + pivots = pivots.get(0).getPivot(); + assertEquals(12, pivots.size()); // places + firstPlace = pivots.get(0); + assertPivot("place_s", "cardiff", 257, firstPlace); + assertEquals(1, firstPlace.getPivot().size()); // just the missing value for bogus_y_s + assertPivot("bogus_y_s", null, 257, firstPlace.getPivot().get(0)); + assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0).getPivot().get(0)); + // Microsoft will come back wrong if refinement was not done correctly + assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(0).getPivot().get(1)); + + + + + + // Overrequesting a lot + this.query( "q", "*:*", + "rows", "0", + "facet", "true", + "facet.pivot","place_s,company_t", + FacetParams.FACET_OVERREQUEST_RATIO, "10", + FacetParams.FACET_OVERREQUEST_COUNT, "100"); + + // Overrequesting off + this.query( "q", "*:*", + "rows", "0", + "facet", "true", + "facet.pivot","place_s,company_t", + FacetParams.FACET_OVERREQUEST_RATIO, "0", + FacetParams.FACET_OVERREQUEST_COUNT, "0"); + + } + + /** + * asserts that the actual PivotField matches the expected criteria + */ + private void assertPivot(String field, Object value, int count, // int numKids, + PivotField actual) { + assertEquals("FIELD: " + actual.toString(), field, actual.getField()); + assertEquals("VALUE: " + actual.toString(), value, actual.getValue()); + assertEquals("COUNT: " + actual.toString(), count, actual.getCount()); + // TODO: add arg && assert on number of kids + //assertEquals("#KIDS: " + actual.toString(), numKids, actual.getPivot().size()); + } + + + + private void setupDistributedPivotFacetDocuments() throws Exception{ + + //Clear docs + del("*:*"); + commit(); + + final int maxDocs = 50; + final SolrServer zeroShard = clients.get(0); + final SolrServer oneShard = clients.get(1); + final SolrServer twoShard = clients.get(2); + final SolrServer threeShard = clients.get(3); // edge case: never gets any matching docs + + for(Integer i=0;i pivots = null; + List[] shardPivots = new List[3]; + shardPivots[0] = shard0.query( req ).getFacetPivot().get("foo_s,bar_s"); + shardPivots[1] = shard1.query( req ).getFacetPivot().get("foo_s,bar_s"); + shardPivots[2] = shard2.query( req ).getFacetPivot().get("foo_s,bar_s"); + + // top 5 same on all shards + for (int i = 0; i < 3; i++) { + assertEquals(10, shardPivots[i].size()); + for (int j = 0; j < 5; j++) { + pivot = shardPivots[i].get(j); + assertEquals(pivot.toString(), "aaa"+j, pivot.getValue()); + assertEquals(pivot.toString(), 100, pivot.getCount()); + } + } + // top 6-10 same on shard0 & shard11 + for (int i = 0; i < 2; i++) { + for (int j = 5; j < 10; j++) { + pivot = shardPivots[i].get(j); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("bbb")); + assertEquals(pivot.toString(), 50, pivot.getCount()); + } + } + // 6-10 on shard2 + assertEquals("junkA", shardPivots[2].get(5).getValue()); + assertEquals(50, shardPivots[2].get(5).getCount()); + assertEquals("tail", shardPivots[2].get(6).getValue()); + assertEquals(45, shardPivots[2].get(6).getCount()); + assertEquals("bbb0", shardPivots[2].get(7).getValue()); + assertEquals(1, shardPivots[2].get(7).getCount()); + for (int j = 8; j < 10; j++) { + pivot = shardPivots[2].get(j); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("zzz")); + assertEquals(pivot.toString(), 1, pivot.getCount()); + } + // check sub-shardPivots on "tail" from shard2 + pivots = shardPivots[2].get(6).getPivot(); + assertEquals(6, pivots.size()); + for (int j = 0; j < 5; j++) { + pivot = pivots.get(j); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("junkB")); + assertEquals(pivot.toString(), 8, pivot.getCount()); + } + pivot = pivots.get(5); + assertEquals("tailB", pivot.getValue()); + assertEquals(5, pivot.getCount()); + + // if we disable overrequesting, we don't find the long tail + + pivots = queryServer( params( "q", "*:*", + "shards", getShardsString(), + FacetParams.FACET_OVERREQUEST_COUNT, "0", + FacetParams.FACET_OVERREQUEST_RATIO, "0", + "facet", "true", + "facet.limit", "6", + "facet.pivot", "foo_s,bar_s" ) + ).getFacetPivot().get("foo_s,bar_s"); + assertEquals(6, pivots.size()); + for (int i = 0; i < 5; i++) { + pivot = pivots.get(i); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa")); + assertEquals(pivot.toString(), 300, pivot.getCount()); + } + // even w/o the long tail, we should have still asked shard2 to refine bbb0 + assertTrue(pivots.get(5).toString(), pivots.get(5).getValue().equals("bbb0")); + assertEquals(pivots.get(5).toString(), 101, pivots.get(5).getCount()); + + // with default overrequesting, we should find the correct top 6 including + // long tail and top sub-pivots + // (even if we disable overrequesting on the sub-pivot) + for (ModifiableSolrParams q : new ModifiableSolrParams[] { + params(), + params("f.bar_s.facet.overrequest.ratio","0", + "f.bar_s.facet.overrequest.count","0") }) { + + q.add( params( "q", "*:*", + "shards", getShardsString(), + "facet", "true", + "facet.limit", "6", + "facet.pivot", "foo_s,bar_s" )); + pivots = queryServer( q ).getFacetPivot().get("foo_s,bar_s"); + + assertEquals(6, pivots.size()); + for (int i = 0; i < 5; i++) { + pivot = pivots.get(i); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa")); + assertEquals(pivot.toString(), 300, pivot.getCount()); + } + pivot = pivots.get(5); + assertEquals(pivot.toString(), "tail", pivot.getValue()); + assertEquals(pivot.toString(), 135, pivot.getCount()); + // check the sub pivots + pivots = pivot.getPivot(); + assertEquals(6, pivots.size()); + pivot = pivots.get(0); + assertEquals(pivot.toString(), "tailB", pivot.getValue()); + assertEquals(pivot.toString(), 17, pivot.getCount()); + for (int i = 1; i < 6; i++) { // ccc(0-4) + pivot = pivots.get(i); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc")); + assertEquals(pivot.toString(), 14, pivot.getCount()); + } + } + + // if we lower the facet.limit on the sub-pivot, overrequesting should still ensure + // that we get the correct top5 including "tailB" + + pivots = queryServer( params( "q", "*:*", + "shards", getShardsString(), + "facet", "true", + "facet.limit", "6", + "f.bar_s.facet.limit", "5", + "facet.pivot", "foo_s,bar_s" ) + ).getFacetPivot().get("foo_s,bar_s"); + assertEquals(6, pivots.size()); + for (int i = 0; i < 5; i++) { + pivot = pivots.get(i); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa")); + assertEquals(pivot.toString(), 300, pivot.getCount()); + } + pivot = pivots.get(5); + assertEquals(pivot.toString(), "tail", pivot.getValue()); + assertEquals(pivot.toString(), 135, pivot.getCount()); + // check the sub pivots + pivots = pivot.getPivot(); + assertEquals(5, pivots.size()); + pivot = pivots.get(0); + assertEquals(pivot.toString(), "tailB", pivot.getValue()); + assertEquals(pivot.toString(), 17, pivot.getCount()); + for (int i = 1; i < 5; i++) { // ccc(0-3) + pivot = pivots.get(i); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc")); + assertEquals(pivot.toString(), 14, pivot.getCount()); + } + + // however with a lower limit and overrequesting disabled, + // we're going to miss out on tailB + + pivots = queryServer( params( "q", "*:*", + "shards", getShardsString(), + "facet", "true", + "facet.limit", "6", + "f.bar_s.facet.overrequest.ratio", "0", + "f.bar_s.facet.overrequest.count", "0", + "f.bar_s.facet.limit", "5", + "facet.pivot", "foo_s,bar_s" ) + ).getFacetPivot().get("foo_s,bar_s"); + assertEquals(6, pivots.size()); + for (int i = 0; i < 5; i++) { + pivot = pivots.get(i); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa")); + assertEquals(pivot.toString(), 300, pivot.getCount()); + } + pivot = pivots.get(5); + assertEquals(pivot.toString(), "tail", pivot.getValue()); + assertEquals(pivot.toString(), 135, pivot.getCount()); + // check the sub pivots + pivots = pivot.getPivot(); + assertEquals(5, pivots.size()); + for (int i = 0; i < 5; i++) { // ccc(0-4) + pivot = pivots.get(i); + assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc")); + assertEquals(pivot.toString(), 14, pivot.getCount()); + } + } + +} diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java new file mode 100644 index 00000000000..15a6f3d3217 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java @@ -0,0 +1,439 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.response.PivotField; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.ModifiableSolrParams; + +import junit.framework.AssertionFailedError; + +public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCase { + + public DistributedFacetPivotSmallTest() { + this.fixShardCount = true; + this.shardCount = 4; + } + + @Override + public void doTest() throws Exception { + + del("*:*"); + + // NOTE: we use the literal (4 character) string "null" as a company name + // to help ensure there isn't any bugs where the literal string is treated as if it + // were a true NULL value. + index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat"); + index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null"); + index(id, 21, "place_t", "london la dublin", "company_t", + "microsoft fujitsu null polecat"); + index(id, 22, "place_t", "krakow london cardiff", "company_t", + "polecat null bbc"); + index(id, 23, "place_t", "london", "company_t", ""); + index(id, 24, "place_t", "la", "company_t", ""); + index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc"); + index(id, 26, "place_t", "krakow", "company_t", "null"); + index(id, 27, "place_t", "krakow cardiff dublin london la", "company_t", + "null microsoft polecat bbc fujitsu"); + index(id, 28, "place_t", "cork", "company_t", + "fujitsu rte"); + commit(); + + handle.clear(); + handle.put("QTime", SKIPVAL); + handle.put("timestamp", SKIPVAL); + handle.put("maxScore", SKIPVAL); + + + final ModifiableSolrParams params = new ModifiableSolrParams(); + setDistributedParams(params); + params.add("q", "*:*"); + params.add("facet", "true"); + params.add("facet.pivot", "place_t,company_t"); + + + QueryResponse rsp = queryServer(params); + + List expectedPlacePivots = new UnorderedEqualityArrayList(); + List expectedCardiffPivots = new UnorderedEqualityArrayList(); + expectedCardiffPivots.add(new ComparablePivotField("company_t", "microsoft", 2, null)); + expectedCardiffPivots.add(new ComparablePivotField("company_t", "null", 2, null)); + expectedCardiffPivots.add(new ComparablePivotField("company_t", "bbc", 2, null)); + expectedCardiffPivots.add(new ComparablePivotField("company_t", "polecat", 3, null)); + expectedCardiffPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null)); + List expectedDublinPivots = new UnorderedEqualityArrayList(); + expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat", 4, null)); + expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft", 4, null)); + expectedDublinPivots.add(new ComparablePivotField("company_t", "null", 3, null)); + expectedDublinPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null)); + expectedDublinPivots.add(new ComparablePivotField("company_t", "bbc", 1, null)); + List expectedLondonPivots = new UnorderedEqualityArrayList(); + expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, null)); + expectedLondonPivots.add(new ComparablePivotField("company_t", "microsoft", 2, null)); + expectedLondonPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null)); + expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3,null)); + expectedLondonPivots.add(new ComparablePivotField("company_t", "bbc", 2, null)); + List expectedLAPivots = new UnorderedEqualityArrayList(); + expectedLAPivots.add(new ComparablePivotField("company_t", "microsoft", 2,null)); + expectedLAPivots.add(new ComparablePivotField("company_t", "fujitsu", 2,null)); + expectedLAPivots.add(new ComparablePivotField("company_t", "null", 2, null)); + expectedLAPivots.add(new ComparablePivotField("company_t", "bbc", 1, null)); + expectedLAPivots.add(new ComparablePivotField("company_t", "polecat", 2,null)); + List expectedKrakowPivots = new UnorderedEqualityArrayList(); + expectedKrakowPivots.add(new ComparablePivotField("company_t", "polecat",2, null)); + expectedKrakowPivots.add(new ComparablePivotField("company_t", "bbc", 2, null)); + expectedKrakowPivots.add(new ComparablePivotField("company_t", "null", 3,null)); + expectedKrakowPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null)); + expectedKrakowPivots.add(new ComparablePivotField("company_t", "microsoft", 1, null)); + List expectedCorkPivots = new UnorderedEqualityArrayList(); + expectedCorkPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null)); + expectedCorkPivots.add(new ComparablePivotField("company_t", "rte", 1, null)); + expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, expectedDublinPivots)); + expectedPlacePivots.add(new ComparablePivotField("place_t", "cardiff", 3, expectedCardiffPivots)); + expectedPlacePivots.add(new ComparablePivotField("place_t", "london", 4, expectedLondonPivots)); + expectedPlacePivots.add(new ComparablePivotField("place_t", "la", 3, expectedLAPivots)); + expectedPlacePivots.add(new ComparablePivotField("place_t", "krakow", 3, expectedKrakowPivots)); + expectedPlacePivots.add(new ComparablePivotField("place_t", "cork", 1, expectedCorkPivots)); + + + List placePivots = rsp.getFacetPivot().get("place_t,company_t"); + + // Useful to check for errors, orders lists and does toString() equality + // check + testOrderedPivotsStringEquality(expectedPlacePivots, placePivots); + + assertEquals(expectedPlacePivots, placePivots); + + // Test sorting by count + + params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT); + + rsp = queryServer(params); + + placePivots = rsp.getFacetPivot().get("place_t,company_t"); + + testCountSorting(placePivots); + + // Test limit + + params.set(FacetParams.FACET_LIMIT, 2); + + rsp = queryServer(params); + + expectedPlacePivots = new UnorderedEqualityArrayList(); + expectedDublinPivots = new UnorderedEqualityArrayList(); + expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat", + 4, null)); + expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft", + 4, null)); + expectedLondonPivots = new UnorderedEqualityArrayList(); + expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3, + null)); + expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, + null)); + expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, + expectedDublinPivots)); + expectedPlacePivots.add(new ComparablePivotField("place_t", "london", 4, + expectedLondonPivots)); + + placePivots = rsp.getFacetPivot().get("place_t,company_t"); + + assertEquals(expectedPlacePivots, placePivots); + + // Test individual facet.limit values + params.remove(FacetParams.FACET_LIMIT); + + params.set("f.place_t." + FacetParams.FACET_LIMIT, 1); + params.set("f.company_t." + FacetParams.FACET_LIMIT, 4); + + rsp = queryServer(params); + + expectedPlacePivots = new UnorderedEqualityArrayList(); + + expectedDublinPivots = new UnorderedEqualityArrayList(); + expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft",4, null)); + expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat",4, null)); + expectedDublinPivots.add(new ComparablePivotField("company_t", "null",3, null)); + expectedDublinPivots.add(new ComparablePivotField("company_t", "fujitsu",2, null)); + + expectedLondonPivots = new UnorderedEqualityArrayList(); + expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3, null)); + expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, null)); + expectedLondonPivots.add(new ComparablePivotField("company_t", "bbc", 2, null)); + expectedLondonPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null)); + + expectedCardiffPivots = new UnorderedEqualityArrayList(); + expectedCardiffPivots.add(new ComparablePivotField("company_t", "polecat", 3, null)); + + expectedKrakowPivots = new UnorderedEqualityArrayList(); + expectedKrakowPivots.add(new ComparablePivotField("company_t", "null", 3, null)); + + expectedLAPivots = new UnorderedEqualityArrayList(); + expectedLAPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null)); + + expectedCorkPivots = new UnorderedEqualityArrayList(); + expectedCorkPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null)); + + expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, expectedDublinPivots)); + + placePivots = rsp.getFacetPivot().get("place_t,company_t"); + assertEquals(expectedPlacePivots, placePivots); + + params.remove("f.company_t." + FacetParams.FACET_LIMIT); + params.remove("f.place_t." + FacetParams.FACET_LIMIT); + params.set(FacetParams.FACET_LIMIT, 2); + + // Test facet.missing=true with diff sorts + + index("id",777); // NOTE: id=25 has no place as well + commit(); + + SolrParams missingA = params( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","place_t,company_t", + // default facet.sort + FacetParams.FACET_MISSING, "true" ); + SolrParams missingB = SolrParams.wrapDefaults(missingA, + params(FacetParams.FACET_LIMIT, "4", + "facet.sort", "index")); + for (SolrParams p : new SolrParams[] { missingA, missingB }) { + // in either case, the last pivot option should be the same + rsp = query( p ); + placePivots = rsp.getFacetPivot().get("place_t,company_t"); + assertTrue("not enough values for pivot: " + p + " => " + placePivots, + 1 < placePivots.size()); + PivotField missing = placePivots.get(placePivots.size()-1); + assertNull("not the missing place value: " + p, missing.getValue()); + assertEquals("wrong missing place count: " + p, 2, missing.getCount()); + assertTrue("not enough sub-pivots for missing place: "+ p +" => " + missing.getPivot(), + 1 < missing.getPivot().size()); + missing = missing.getPivot().get(missing.getPivot().size()-1); + assertNull("not the missing company value: " + p, missing.getValue()); + assertEquals("wrong missing company count: " + p, 1, missing.getCount()); + assertNull("company shouldn't have sub-pivots: " + p, missing.getPivot()); + } + + // sort=index + mincount + limit + for (SolrParams variableParams : new SolrParams[] { + // we should get the same results regardless of overrequest + params("facet.overrequest.count","0", + "facet.overrequest.ratio","0"), + params() }) { + + + SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","company_t", + "facet.sort", "index", + "facet.pivot.mincount", "4", + "facet.limit", "4"), + variableParams ); + + try { + List pivots = query( p ).getFacetPivot().get("company_t"); + assertEquals(4, pivots.size()); + assertEquals("fujitsu", pivots.get(0).getValue()); + assertEquals(4, pivots.get(0).getCount()); + assertEquals("microsoft", pivots.get(1).getValue()); + assertEquals(5, pivots.get(1).getCount()); + assertEquals("null", pivots.get(2).getValue()); + assertEquals(6, pivots.get(2).getCount()); + assertEquals("polecat", pivots.get(3).getValue()); + assertEquals(6, pivots.get(3).getCount()); + + } catch (AssertionFailedError ae) { + throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); + } + } + + // sort=index + mincount + limit + offset + for (SolrParams variableParams : new SolrParams[] { + // we should get the same results regardless of overrequest + params("facet.overrequest.count","0", + "facet.overrequest.ratio","0"), + params() }) { + + SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*", + "rows", "0", + "facet","true", + "facet.pivot","company_t", + "facet.sort", "index", + "facet.pivot.mincount", "4", + "facet.offset", "1", + "facet.limit", "4"), + variableParams ); + try { + List pivots = query( p ).getFacetPivot().get("company_t"); + assertEquals(3, pivots.size()); // asked for 4, but not enough meet the mincount + assertEquals("microsoft", pivots.get(0).getValue()); + assertEquals(5, pivots.get(0).getCount()); + assertEquals("null", pivots.get(1).getValue()); + assertEquals(6, pivots.get(1).getCount()); + assertEquals("polecat", pivots.get(2).getValue()); + assertEquals(6, pivots.get(2).getCount()); + + } catch (AssertionFailedError ae) { + throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); + } + + } + + // sort=index + mincount + limit + offset (more permutations) + for (SolrParams variableParams : new SolrParams[] { + // all of these combinations should result in the same first value + params("facet.pivot.mincount", "4", + "facet.offset", "2"), + params("facet.pivot.mincount", "5", + "facet.offset", "1"), + params("facet.pivot.mincount", "6", + "facet.offset", "0" ) }) { + + SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*", + "rows", "0", + "facet","true", + "facet.limit","1", + "facet.sort","index", + "facet.overrequest.ratio","0", + "facet.pivot", "company_t"), + variableParams ); + + try { + List pivots = query( p ).getFacetPivot().get("company_t"); + assertEquals(1, pivots.size()); + assertEquals(pivots.toString(), "null", pivots.get(0).getValue()); + assertEquals(pivots.toString(), 6, pivots.get(0).getCount()); + + } catch (AssertionFailedError ae) { + throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae); + } + } + } + + // Useful to check for errors, orders lists and does toString() equality check + private void testOrderedPivotsStringEquality( + List expectedPlacePivots, List placePivots) { + Collections.sort(expectedPlacePivots, new PivotFieldComparator()); + for (PivotField expectedPivot : expectedPlacePivots) { + if (expectedPivot.getPivot() != null) { + Collections.sort(expectedPivot.getPivot(), new PivotFieldComparator()); + } + } + Collections.sort(placePivots, new PivotFieldComparator()); + for (PivotField pivot : placePivots) { + if (pivot.getPivot() != null) { + Collections.sort(pivot.getPivot(), new PivotFieldComparator()); + } + } + assertEquals(expectedPlacePivots.toString(), placePivots.toString()); + } + + private void testCountSorting(List pivots) { + Integer lastCount = null; + for (PivotField pivot : pivots) { + if (lastCount != null) { + assertTrue(pivot.getCount() <= lastCount); + } + lastCount = pivot.getCount(); + if (pivot.getPivot() != null) { + testCountSorting(pivot.getPivot()); + } + } + } + + public static class ComparablePivotField extends PivotField { + + + public ComparablePivotField(String f, Object v, int count, + List pivot) { + super(f,v,count,pivot); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (!obj.getClass().isAssignableFrom(PivotField.class)) return false; + PivotField other = (PivotField) obj; + if (getCount() != other.getCount()) return false; + if (getField() == null) { + if (other.getField() != null) return false; + } else if (!getField().equals(other.getField())) return false; + if (getPivot() == null) { + if (other.getPivot() != null) return false; + } else if (!getPivot().equals(other.getPivot())) return false; + if (getValue() == null) { + if (other.getValue() != null) return false; + } else if (!getValue().equals(other.getValue())) return false; + return true; + } + } + + public static class UnorderedEqualityArrayList extends ArrayList { + + @Override + public boolean equals(Object o) { + boolean equal = false; + if (o instanceof ArrayList) { + List otherList = (List) o; + if (size() == otherList.size()) { + equal = true; + for (Object objectInOtherList : otherList) { + if (!contains(objectInOtherList)) { + equal = false; + } + } + } + } + return equal; + } + + public int indexOf(Object o) { + for (int i = 0; i < size(); i++) { + if (get(i).equals(o)) { + return i; + } + } + return -1; + } + } + + public class PivotFieldComparator implements Comparator { + + @Override + public int compare(PivotField o1, PivotField o2) { + Integer compare = (Integer.valueOf(o2.getCount())).compareTo(Integer + .valueOf(o1.getCount())); + if (compare == 0) { + compare = ((String) o2.getValue()).compareTo((String) o1.getValue()); + } + return compare; + } + + } + +} diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestPivotHelperCode.java b/solr/core/src/test/org/apache/solr/handler/component/TestPivotHelperCode.java new file mode 100644 index 00000000000..e772ba19e29 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/TestPivotHelperCode.java @@ -0,0 +1,118 @@ +package org.apache.solr.handler.component; + + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.handler.component.PivotFacetField; + +import org.apache.lucene.util.TestUtil; + +import java.util.List; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collections; + +/** + * A light weight test of various helper methods used in pivot faceting + * + **/ +public class TestPivotHelperCode extends SolrTestCaseJ4{ + + /** + * test refinement encoding/decoding matches specific expected encoded values + * @see PivotFacetHelper#encodeRefinementValuePath + * @see PivotFacetHelper#decodeRefinementValuePath + */ + public void testRefinementStringEncodingWhiteBox() { + // trivial example with some basci escaping of an embedded comma + assertBiDirectionalEncoding(strs("foo,bar","yak","zat"), "~foo\\,bar,~yak,~zat"); + + // simple single valued case + assertBiDirectionalEncoding( strs("foo"), "~foo"); + + // special case: empty list + assertBiDirectionalEncoding(strs(), ""); + + // special case: single element list containing empty string + assertBiDirectionalEncoding(strs(""), "~"); + + // special case: single element list containing null + assertBiDirectionalEncoding(strs((String)null), "^"); + + // mix of empty strings & null with other values + assertBiDirectionalEncoding(strs("", "foo", "", "", null, "bar"), + "~,~foo,~,~,^,~bar"); + } + + /** + * test refinement encoding/decoding of random sets of values can be round tripped, + * w/o worrying about what the actual encoding looks like + * + * @see PivotFacetHelper#encodeRefinementValuePath + * @see PivotFacetHelper#decodeRefinementValuePath + */ + public void testRefinementStringEncodingBlockBoxRoundTrip() { + // random data: we should be able to round trip any set of random strings + final int numIters = atLeast(100); + for (int i = 0; i < numIters; i++) { + final int numStrs = atLeast(1); + List data = new ArrayList(numStrs); + for (int j = 0; j < numStrs; j++) { + // :TODO: mix in nulls + data.add(TestUtil.randomUnicodeString(random())); + } + String encoded = PivotFacetHelper.encodeRefinementValuePath(data); + List decoded = PivotFacetHelper.decodeRefinementValuePath(encoded); + assertEquals(data, decoded); + } + + } + + private void assertBiDirectionalEncoding(List data, String encoded) { + assertEquals(data, PivotFacetHelper.decodeRefinementValuePath(encoded)); + assertEquals(encoded, PivotFacetHelper.encodeRefinementValuePath(data)); + } + + + public void testCompareWithNullLast() throws Exception { + Long a = random().nextLong(); + Long b = random().nextLong(); + + assertEquals(a.compareTo(b), PivotFacetFieldValueCollection.compareWithNullLast(a, b)); + assertEquals(b.compareTo(a), PivotFacetFieldValueCollection.compareWithNullLast(b, a)); + + Long bb = new Long(b.longValue()); + assertEquals(0, PivotFacetFieldValueCollection.compareWithNullLast(b, bb)); + + assertEquals(0, PivotFacetFieldValueCollection.compareWithNullLast(null, null)); + + assertTrue( PivotFacetFieldValueCollection.compareWithNullLast(a, null) < 0 ); + assertTrue( PivotFacetFieldValueCollection.compareWithNullLast(b, null) < 0 ); + + assertTrue( 0 < PivotFacetFieldValueCollection.compareWithNullLast(null, a) ); + assertTrue( 0 < PivotFacetFieldValueCollection.compareWithNullLast(null, b) ); + + } + + + private List strs(String... strs) { + return Arrays.asList(strs); + } + +} diff --git a/solr/core/src/test/org/apache/solr/util/TestUtils.java b/solr/core/src/test/org/apache/solr/util/TestUtils.java index d057fc9deee..46a564ee34a 100644 --- a/solr/core/src/test/org/apache/solr/util/TestUtils.java +++ b/solr/core/src/test/org/apache/solr/util/TestUtils.java @@ -17,6 +17,7 @@ package org.apache.solr.util; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -31,6 +32,29 @@ import org.junit.Assert; * */ public class TestUtils extends LuceneTestCase { + + public void testJoin() { + assertEquals("a|b|c", StrUtils.join(Arrays.asList("a","b","c"), '|')); + assertEquals("a,b,c", StrUtils.join(Arrays.asList("a","b","c"), ',')); + assertEquals("a\\,b,c", StrUtils.join(Arrays.asList("a,b","c"), ',')); + assertEquals("a,b|c", StrUtils.join(Arrays.asList("a,b","c"), '|')); + + assertEquals("a\\\\b|c", StrUtils.join(Arrays.asList("a\\b","c"), '|')); + } + + public void testEscapeTextWithSeparator() { + assertEquals("a", StrUtils.escapeTextWithSeparator("a", '|')); + assertEquals("a", StrUtils.escapeTextWithSeparator("a", ',')); + + assertEquals("a\\|b", StrUtils.escapeTextWithSeparator("a|b", '|')); + assertEquals("a|b", StrUtils.escapeTextWithSeparator("a|b", ',')); + assertEquals("a,b", StrUtils.escapeTextWithSeparator("a,b", '|')); + assertEquals("a\\,b", StrUtils.escapeTextWithSeparator("a,b", ',')); + assertEquals("a\\\\b", StrUtils.escapeTextWithSeparator("a\\b", ',')); + + assertEquals("a\\\\\\,b", StrUtils.escapeTextWithSeparator("a\\,b", ',')); + } + public void testSplitEscaping() { List arr = StrUtils.splitSmart("\\r\\n:\\t\\f\\b", ":", true); assertEquals(2,arr.size()); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java index d942d964597..20d3e7b52fa 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java @@ -390,10 +390,19 @@ public class QueryResponse extends SolrResponseBase ArrayList values = new ArrayList<>( list.size() ); for( NamedList nl : list ) { // NOTE, this is cheating, but we know the order they are written in, so no need to check + assert "field".equals(nl.getName(0)); String f = (String)nl.getVal( 0 ); + assert "value".equals(nl.getName(1)); Object v = nl.getVal( 1 ); + assert "count".equals(nl.getName(2)); int cnt = ((Integer)nl.getVal( 2 )).intValue(); - List p = (nl.size()<4)?null:readPivots((List)nl.getVal(3) ); + List p = null; + if (4 <= nl.size()) { + assert "pivot".equals(nl.getName(3)); + Object subPiv = nl.getVal(3); + assert null != subPiv : "Server sent back 'null' for sub pivots?"; + p = readPivots( (List) subPiv ); + } values.add( new PivotField( f, v, cnt, p ) ); } return values; diff --git a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java index 33b1a36fb20..0509ea568ef 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java @@ -99,6 +99,24 @@ public interface FacetParams { */ public static final String FACET_MISSING = FACET + ".missing"; + + static final String FACET_OVERREQUEST = FACET + ".overrequest"; + + /** + * The percentage to over-request by when performing initial distributed requests. + * + * default value is 1.5 + */ + public static final String FACET_OVERREQUEST_RATIO = FACET_OVERREQUEST + ".ratio"; + + /** + * An additional amount to over-request by when performing initial distributed requests. This + * value will be added after accounting for the over-request ratio. + * + * default value is 10 + */ + public static final String FACET_OVERREQUEST_COUNT = FACET_OVERREQUEST + ".count"; + /** * Comma separated list of fields to pivot diff --git a/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java b/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java index 3c4ddfe2a73..e2b0cb24449 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java @@ -143,7 +143,10 @@ public class StrUtils { return result; } - /** Creates a backslash escaped string, joining all the items. */ + /** + * Creates a backslash escaped string, joining all the items. + * @see #escapeTextWithSeparator + */ public static String join(List items, char separator) { StringBuilder sb = new StringBuilder(items.size() << 3); boolean first=true; @@ -154,13 +157,7 @@ public class StrUtils { } else { sb.append(separator); } - for (int i=0; ilikely most (1/10) of the time, otherwise unlikely + */ + public static Object skewed(Object likely, Object unlikely) { + return (0 == TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely; + } + + /** + * Returns a randomly generated Date in the appropriate Solr external (input) format + * @see #randomSkewedDate + */ + public static String randomDate() { + return TrieDateField.formatExternal(new Date(random().nextLong())); + } + + /** + * Returns a Date such that all results from this method always have the same values for + * year+month+day+hour+minute but the seconds are randomized. This can be helpful for + * indexing documents with random date values that are biased for a narrow window + * (one day) to test collisions/overlaps + * + * @see #randomDate + */ + public static String randomSkewedDate() { + return String.format(Locale.ROOT, "2010-10-31T10:31:%02d.000Z", + TestUtil.nextInt(random(), 0, 59)); + } + + /** + * We want "realistic" unicode strings beyond simple ascii, but because our + * updates use XML we need to ensure we don't get "special" code block. + */ + public static String randomXmlUsableUnicodeString() { + String result = TestUtil.randomRealisticUnicodeString(random()); + if (result.matches(".*\\p{InSpecials}.*")) { + result = TestUtil.randomSimpleString(random()); + } + return result; + } }