mirror of https://github.com/apache/lucene.git
SOLR-2894: Distributed query support for facet.pivot
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1617789 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7e4603a988
commit
910d467a93
|
@ -188,6 +188,8 @@ New Features
|
|||
|
||||
* SOLR-6304 : JsonLoader should be able to flatten an input JSON to multiple docs (Noble Paul)
|
||||
|
||||
* SOLR-2894: Distributed query support for facet.pivot (Dan Cooper, Erik Hatcher, Chris Russell,
|
||||
Andrew Muldowney, Brett Lucey, Mark Miller, hossman)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
|
|
@ -21,12 +21,15 @@ import java.io.IOException;
|
|||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -52,17 +55,24 @@ import org.slf4j.LoggerFactory;
|
|||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class FacetComponent extends SearchComponent
|
||||
{
|
||||
@SuppressWarnings("rawtypes")
|
||||
public class FacetComponent extends SearchComponent {
|
||||
public static Logger log = LoggerFactory.getLogger(FacetComponent.class);
|
||||
|
||||
public static final String COMPONENT_NAME = "facet";
|
||||
|
||||
static final String PIVOT_KEY = "facet_pivot";
|
||||
private static final String PIVOT_KEY = "facet_pivot";
|
||||
private static final String PIVOT_REFINE_PREFIX = "{!"+PivotFacet.REFINE_PARAM+"=";
|
||||
|
||||
/**
|
||||
* incrememented counter used to track the values being refined in a given request.
|
||||
* This counter is used in conjunction with {@link PivotFacet#REFINE_PARAM} to identify
|
||||
* which refinement values are associated with which pivots
|
||||
*/
|
||||
int pivotRefinementCounter = 0;
|
||||
|
||||
@Override
|
||||
public void prepare(ResponseBuilder rb) throws IOException
|
||||
{
|
||||
public void prepare(ResponseBuilder rb) throws IOException {
|
||||
if (rb.req.getParams().getBool(FacetParams.FACET, false)) {
|
||||
rb.setNeedDocSet(true);
|
||||
rb.doFacets = true;
|
||||
|
@ -73,29 +83,24 @@ public class FacetComponent extends SearchComponent
|
|||
* Actually run the query
|
||||
*/
|
||||
@Override
|
||||
public void process(ResponseBuilder rb) throws IOException
|
||||
{
|
||||
public void process(ResponseBuilder rb) throws IOException {
|
||||
if (rb.doFacets) {
|
||||
SolrParams params = rb.req.getParams();
|
||||
SimpleFacets f = new SimpleFacets(rb.req,
|
||||
rb.getResults().docSet,
|
||||
params,
|
||||
rb );
|
||||
|
||||
SimpleFacets f = new SimpleFacets(rb.req, rb.getResults().docSet, params, rb);
|
||||
|
||||
NamedList<Object> counts = f.getFacetCounts();
|
||||
String[] pivots = params.getParams(FacetParams.FACET_PIVOT);
|
||||
if (pivots != null && pivots.length > 0) {
|
||||
PivotFacetHelper pivotHelper = new PivotFacetHelper(rb.req,
|
||||
rb.getResults().docSet,
|
||||
params,
|
||||
rb );
|
||||
NamedList v = pivotHelper.process(pivots);
|
||||
PivotFacetProcessor pivotProcessor
|
||||
= new PivotFacetProcessor(rb.req, rb.getResults().docSet, params, rb);
|
||||
SimpleOrderedMap<List<NamedList<Object>>> v
|
||||
= pivotProcessor.process(pivots);
|
||||
if (v != null) {
|
||||
counts.add(PIVOT_KEY, v);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO ???? add this directly to the response, or to the builder?
|
||||
rb.rsp.add("facet_counts", counts);
|
||||
}
|
||||
}
|
||||
|
@ -109,24 +114,23 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
|
||||
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
||||
// overlap facet refinement requests (those shards that we need a count for
|
||||
// particular facet values from), where possible, with
|
||||
// overlap facet refinement requests (those shards that we need a count
|
||||
// for particular facet values from), where possible, with
|
||||
// the requests to get fields (because we know that is the
|
||||
// only other required phase).
|
||||
// We do this in distributedProcess so we can look at all of the
|
||||
// requests in the outgoing queue at once.
|
||||
|
||||
|
||||
|
||||
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
|
||||
List<String> refinements = null;
|
||||
List<String> distribFieldFacetRefinements = null;
|
||||
|
||||
for (DistribFieldFacet dff : rb._facetInfo.facets.values()) {
|
||||
if (!dff.needRefinements) continue;
|
||||
List<String> refList = dff._toRefine[shardNum];
|
||||
if (refList == null || refList.size() == 0) continue;
|
||||
|
||||
String key = dff.getKey(); // reuse the same key that was used for the main facet
|
||||
String key = dff.getKey(); // reuse the same key that was used for the
|
||||
// main facet
|
||||
String termsKey = key + "__terms";
|
||||
String termsVal = StrUtils.join(refList, ',');
|
||||
|
||||
|
@ -136,69 +140,89 @@ public class FacetComponent extends SearchComponent
|
|||
|
||||
String termsKeyEncoded = QueryParsing.encodeLocalParamVal(termsKey);
|
||||
if (dff.localParams != null) {
|
||||
facetCommand = commandPrefix+termsKeyEncoded + " " + dff.facetStr.substring(2);
|
||||
facetCommand = commandPrefix + termsKeyEncoded + " "
|
||||
+ dff.facetStr.substring(2);
|
||||
} else {
|
||||
facetCommand = commandPrefix + termsKeyEncoded + '}' + dff.field;
|
||||
}
|
||||
|
||||
if (refinements == null) {
|
||||
refinements = new ArrayList<>();
|
||||
if (distribFieldFacetRefinements == null) {
|
||||
distribFieldFacetRefinements = new ArrayList<>();
|
||||
}
|
||||
|
||||
refinements.add(facetCommand);
|
||||
refinements.add(termsKey);
|
||||
refinements.add(termsVal);
|
||||
distribFieldFacetRefinements.add(facetCommand);
|
||||
distribFieldFacetRefinements.add(termsKey);
|
||||
distribFieldFacetRefinements.add(termsVal);
|
||||
}
|
||||
|
||||
if (refinements == null) continue;
|
||||
boolean pivotFacetRefinementRequestsExistForShard =
|
||||
doAnyPivotFacetRefinementRequestsExistForShard(rb._facetInfo, shardNum);
|
||||
|
||||
if (distribFieldFacetRefinements == null
|
||||
&& !pivotFacetRefinementRequestsExistForShard) {
|
||||
// nothing to refine, short circut out
|
||||
continue;
|
||||
}
|
||||
|
||||
String shard = rb.shards[shardNum];
|
||||
ShardRequest refine = null;
|
||||
ShardRequest shardsRefineRequest = null;
|
||||
boolean newRequest = false;
|
||||
|
||||
// try to find a request that is already going out to that shard.
|
||||
// If nshards becomes to great, we way want to move to hashing for better
|
||||
// scalability.
|
||||
// If nshards becomes to great, we way want to move to hashing for
|
||||
// better scalability.
|
||||
for (ShardRequest sreq : rb.outgoing) {
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0
|
||||
&& sreq.shards != null
|
||||
&& sreq.shards.length == 1
|
||||
&& sreq.shards[0].equals(shard))
|
||||
{
|
||||
refine = sreq;
|
||||
&& sreq.shards[0].equals(shard)) {
|
||||
shardsRefineRequest = sreq;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (refine == null) {
|
||||
// we didn't find any other suitable requests going out to that shard, so
|
||||
// create one ourselves.
|
||||
if (shardsRefineRequest == null) {
|
||||
// we didn't find any other suitable requests going out to that shard,
|
||||
// so create one ourselves.
|
||||
newRequest = true;
|
||||
refine = new ShardRequest();
|
||||
refine.shards = new String[]{rb.shards[shardNum]};
|
||||
refine.params = new ModifiableSolrParams(rb.req.getParams());
|
||||
shardsRefineRequest = new ShardRequest();
|
||||
shardsRefineRequest.shards = new String[] { rb.shards[shardNum] };
|
||||
shardsRefineRequest.params = new ModifiableSolrParams(rb.req.getParams());
|
||||
// don't request any documents
|
||||
refine.params.remove(CommonParams.START);
|
||||
refine.params.set(CommonParams.ROWS,"0");
|
||||
shardsRefineRequest.params.remove(CommonParams.START);
|
||||
shardsRefineRequest.params.set(CommonParams.ROWS, "0");
|
||||
}
|
||||
|
||||
refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
|
||||
refine.params.set(FacetParams.FACET, "true");
|
||||
refine.params.remove(FacetParams.FACET_FIELD);
|
||||
refine.params.remove(FacetParams.FACET_QUERY);
|
||||
// FieldFacetAdditions
|
||||
if (distribFieldFacetRefinements != null) {
|
||||
shardsRefineRequest.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
|
||||
shardsRefineRequest.params.set(FacetParams.FACET, "true");
|
||||
shardsRefineRequest.params.remove(FacetParams.FACET_FIELD);
|
||||
shardsRefineRequest.params.remove(FacetParams.FACET_QUERY);
|
||||
|
||||
for (int i=0; i<refinements.size();) {
|
||||
String facetCommand=refinements.get(i++);
|
||||
String termsKey=refinements.get(i++);
|
||||
String termsVal=refinements.get(i++);
|
||||
for (int i = 0; i < distribFieldFacetRefinements.size();) {
|
||||
String facetCommand = distribFieldFacetRefinements.get(i++);
|
||||
String termsKey = distribFieldFacetRefinements.get(i++);
|
||||
String termsVal = distribFieldFacetRefinements.get(i++);
|
||||
|
||||
refine.params.add(FacetParams.FACET_FIELD, facetCommand);
|
||||
refine.params.set(termsKey, termsVal);
|
||||
shardsRefineRequest.params.add(FacetParams.FACET_FIELD,
|
||||
facetCommand);
|
||||
shardsRefineRequest.params.set(termsKey, termsVal);
|
||||
}
|
||||
}
|
||||
|
||||
if (newRequest) {
|
||||
rb.addRequest(this, refine);
|
||||
rb.addRequest(this, shardsRefineRequest);
|
||||
}
|
||||
|
||||
// PivotFacetAdditions
|
||||
if (pivotFacetRefinementRequestsExistForShard) {
|
||||
if (newRequest) {
|
||||
shardsRefineRequest.params.remove(FacetParams.FACET_PIVOT);
|
||||
shardsRefineRequest.params.remove(FacetParams.FACET_PIVOT_MINCOUNT);
|
||||
}
|
||||
|
||||
enqueuePivotFacetShardRequests(null, rb, shardNum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -206,8 +230,63 @@ public class FacetComponent extends SearchComponent
|
|||
return ResponseBuilder.STAGE_DONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
private void enqueuePivotFacetShardRequests
|
||||
(HashMap<String,List<String>> pivotFacetRefinements,
|
||||
ResponseBuilder rb, int shardNum) {
|
||||
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
|
||||
ShardRequest shardsRefineRequestPivot = new ShardRequest();
|
||||
shardsRefineRequestPivot.shards = new String[] {rb.shards[shardNum]};
|
||||
shardsRefineRequestPivot.params = new ModifiableSolrParams(rb.req.getParams());
|
||||
|
||||
// don't request any documents
|
||||
shardsRefineRequestPivot.params.remove(CommonParams.START);
|
||||
shardsRefineRequestPivot.params.set(CommonParams.ROWS, "0");
|
||||
|
||||
shardsRefineRequestPivot.purpose |= ShardRequest.PURPOSE_REFINE_PIVOT_FACETS;
|
||||
shardsRefineRequestPivot.params.set(FacetParams.FACET, "true");
|
||||
shardsRefineRequestPivot.params.remove(FacetParams.FACET_PIVOT_MINCOUNT);
|
||||
shardsRefineRequestPivot.params.set(FacetParams.FACET_PIVOT_MINCOUNT, -1);
|
||||
shardsRefineRequestPivot.params.remove(FacetParams.FACET_PIVOT);
|
||||
shardsRefineRequestPivot.params.remove(FacetParams.FACET_OFFSET);
|
||||
|
||||
for (int pivotIndex = 0; pivotIndex < fi.pivotFacets.size(); pivotIndex++) {
|
||||
String pivotFacetKey = fi.pivotFacets.getName(pivotIndex);
|
||||
PivotFacet pivotFacet = fi.pivotFacets.getVal(pivotIndex);
|
||||
|
||||
List<PivotFacetValue> queuedRefinementsForShard =
|
||||
pivotFacet.getQueuedRefinements(shardNum);
|
||||
|
||||
if ( ! queuedRefinementsForShard.isEmpty() ) {
|
||||
|
||||
String fieldsKey = PivotFacet.REFINE_PARAM + pivotRefinementCounter;
|
||||
String command;
|
||||
|
||||
if (pivotFacet.localParams != null) {
|
||||
command = PIVOT_REFINE_PREFIX + pivotRefinementCounter + " "
|
||||
+ pivotFacet.facetStr.substring(2);
|
||||
} else {
|
||||
command = PIVOT_REFINE_PREFIX + pivotRefinementCounter + "}"
|
||||
+ pivotFacet.getKey();
|
||||
}
|
||||
|
||||
shardsRefineRequestPivot.params.add(FacetParams.FACET_PIVOT, command);
|
||||
for (PivotFacetValue refinementValue : queuedRefinementsForShard) {
|
||||
String refinementStr = PivotFacetHelper
|
||||
.encodeRefinementValuePath(refinementValue.getValuePath());
|
||||
shardsRefineRequestPivot.params.add(fieldsKey, refinementStr);
|
||||
|
||||
}
|
||||
}
|
||||
pivotRefinementCounter++;
|
||||
}
|
||||
|
||||
rb.addRequest(this, shardsRefineRequestPivot);
|
||||
}
|
||||
|
||||
public void modifyRequest(ResponseBuilder rb, SearchComponent who,ShardRequest sreq) {
|
||||
|
||||
if (!rb.doFacets) return;
|
||||
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
|
||||
|
@ -217,15 +296,25 @@ public class FacetComponent extends SearchComponent
|
|||
if (fi == null) {
|
||||
rb._facetInfo = fi = new FacetInfo();
|
||||
fi.parse(rb.req.getParams(), rb);
|
||||
// should already be true...
|
||||
// sreq.params.set(FacetParams.FACET, "true");
|
||||
}
|
||||
|
||||
modifyRequestForFieldFacets(rb, sreq, fi);
|
||||
|
||||
modifyRequestForPivotFacets(rb, sreq, fi.pivotFacets);
|
||||
|
||||
sreq.params.remove(FacetParams.FACET_MINCOUNT);
|
||||
sreq.params.remove(FacetParams.FACET_OFFSET);
|
||||
sreq.params.remove(FacetParams.FACET_LIMIT);
|
||||
|
||||
} else {
|
||||
// turn off faceting on other requests
|
||||
sreq.params.set(FacetParams.FACET, "false");
|
||||
// we could optionally remove faceting params
|
||||
}
|
||||
}
|
||||
|
||||
private void modifyRequestForFieldFacets(ResponseBuilder rb, ShardRequest sreq, FacetInfo fi) {
|
||||
for (DistribFieldFacet dff : fi.facets.values()) {
|
||||
|
||||
String paramStart = "f." + dff.field + '.';
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT);
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
|
||||
|
@ -235,46 +324,129 @@ public class FacetComponent extends SearchComponent
|
|||
if (dff.sort.equals(FacetParams.FACET_SORT_COUNT)) {
|
||||
if (dff.limit > 0) {
|
||||
// set the initial limit higher to increase accuracy
|
||||
dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
|
||||
dff.initialMincount = 0; // TODO: we could change this to 1, but would then need more refinement for small facet result sets?
|
||||
dff.initialLimit = doOverRequestMath(dff.initialLimit, dff.overrequestRatio,
|
||||
dff.overrequestCount);
|
||||
dff.initialMincount = 0; // TODO: we could change this to 1, but would
|
||||
// then need more refinement for small facet
|
||||
// result sets?
|
||||
} else {
|
||||
// if limit==-1, then no need to artificially lower mincount to 0 if it's 1
|
||||
// if limit==-1, then no need to artificially lower mincount to 0 if
|
||||
// it's 1
|
||||
dff.initialMincount = Math.min(dff.minCount, 1);
|
||||
}
|
||||
} else {
|
||||
// we're sorting by index order.
|
||||
// if minCount==0, we should always be able to get accurate results w/o over-requesting or refining
|
||||
// if minCount==1, we should be able to get accurate results w/o over-requesting, but we'll need to refine
|
||||
// if minCount==n (>1), we can set the initialMincount to minCount/nShards, rounded up.
|
||||
// For example, we know that if minCount=10 and we have 3 shards, then at least one shard must have a count of 4 for the term
|
||||
// For the minCount>1 case, we can generate too short of a list (miss terms at the end of the list) unless limit==-1
|
||||
// For example: each shard could produce a list of top 10, but some of those could fail to make it into the combined list (i.e.
|
||||
// we needed to go beyond the top 10 to generate the top 10 combined). Overrequesting can help a little here, but not as
|
||||
// if minCount==0, we should always be able to get accurate results w/o
|
||||
// over-requesting or refining
|
||||
// if minCount==1, we should be able to get accurate results w/o
|
||||
// over-requesting, but we'll need to refine
|
||||
// if minCount==n (>1), we can set the initialMincount to
|
||||
// minCount/nShards, rounded up.
|
||||
// For example, we know that if minCount=10 and we have 3 shards, then
|
||||
// at least one shard must have a count of 4 for the term
|
||||
// For the minCount>1 case, we can generate too short of a list (miss
|
||||
// terms at the end of the list) unless limit==-1
|
||||
// For example: each shard could produce a list of top 10, but some of
|
||||
// those could fail to make it into the combined list (i.e.
|
||||
// we needed to go beyond the top 10 to generate the top 10 combined).
|
||||
// Overrequesting can help a little here, but not as
|
||||
// much as when sorting by count.
|
||||
if (dff.minCount <= 1) {
|
||||
dff.initialMincount = dff.minCount;
|
||||
} else {
|
||||
dff.initialMincount = (int) Math.ceil((double) dff.minCount / rb.slices.length);
|
||||
// dff.initialMincount = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (dff.initialMincount != 0) {
|
||||
sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount);
|
||||
}
|
||||
|
||||
// Currently this is for testing only and allows overriding of the
|
||||
// facet.limit set to the shards
|
||||
dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
|
||||
|
||||
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit);
|
||||
sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void modifyRequestForPivotFacets(ResponseBuilder rb,
|
||||
ShardRequest sreq,
|
||||
SimpleOrderedMap<PivotFacet> pivotFacets) {
|
||||
for (Entry<String,PivotFacet> pfwEntry : pivotFacets) {
|
||||
PivotFacet pivot = pfwEntry.getValue();
|
||||
for (String pivotField : StrUtils.splitSmart(pivot.getKey(), ',')) {
|
||||
modifyRequestForIndividualPivotFacets(rb, sreq, pivotField);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void modifyRequestForIndividualPivotFacets(ResponseBuilder rb, ShardRequest sreq,
|
||||
String fieldToOverRequest) {
|
||||
|
||||
final SolrParams originalParams = rb.req.getParams();
|
||||
final String paramStart = "f." + fieldToOverRequest + ".";
|
||||
|
||||
final int requestedLimit = originalParams.getFieldInt(fieldToOverRequest,
|
||||
FacetParams.FACET_LIMIT, 100);
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_LIMIT);
|
||||
|
||||
final int offset = originalParams.getFieldInt(fieldToOverRequest,
|
||||
FacetParams.FACET_OFFSET, 0);
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
|
||||
|
||||
final double overRequestRatio = originalParams.getFieldDouble
|
||||
(fieldToOverRequest, FacetParams.FACET_OVERREQUEST_RATIO, 1.5);
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_OVERREQUEST_RATIO);
|
||||
|
||||
final int overRequestCount = originalParams.getFieldInt
|
||||
(fieldToOverRequest, FacetParams.FACET_OVERREQUEST_COUNT, 10);
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_OVERREQUEST_COUNT);
|
||||
|
||||
final int requestedMinCount = originalParams.getFieldInt
|
||||
(fieldToOverRequest, FacetParams.FACET_PIVOT_MINCOUNT, 1);
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_PIVOT_MINCOUNT);
|
||||
|
||||
final String defaultSort = (requestedLimit > 0)
|
||||
? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX;
|
||||
final String sort = originalParams.getFieldParam
|
||||
(fieldToOverRequest, FacetParams.FACET_SORT, defaultSort);
|
||||
|
||||
int shardLimit = requestedLimit + offset;
|
||||
int shardMinCount = requestedMinCount;
|
||||
|
||||
// per-shard mincount & overrequest
|
||||
if ( FacetParams.FACET_SORT_INDEX.equals(sort) &&
|
||||
1 < requestedMinCount &&
|
||||
0 < requestedLimit) {
|
||||
|
||||
// We can divide the mincount by num shards rounded up, because unless
|
||||
// a single shard has at least that many it can't compete...
|
||||
shardMinCount = (int) Math.ceil((double) requestedMinCount / rb.slices.length);
|
||||
|
||||
// ...but we still need to overrequest to reduce chances of missing something
|
||||
shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
|
||||
|
||||
// (for mincount <= 1, no overrequest needed)
|
||||
|
||||
} else if ( FacetParams.FACET_SORT_COUNT.equals(sort) ) {
|
||||
if ( 0 < requestedLimit ) {
|
||||
shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
|
||||
shardMinCount = 0;
|
||||
} else {
|
||||
// turn off faceting on other requests
|
||||
sreq.params.set(FacetParams.FACET, "false");
|
||||
// we could optionally remove faceting params
|
||||
shardMinCount = Math.min(requestedMinCount, 1);
|
||||
}
|
||||
}
|
||||
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, shardLimit);
|
||||
sreq.params.set(paramStart + FacetParams.FACET_PIVOT_MINCOUNT, shardMinCount);
|
||||
}
|
||||
|
||||
private int doOverRequestMath(int limit, double ratio, int count) {
|
||||
// NOTE: normally, "1.0F < ratio"
|
||||
//
|
||||
// if the user chooses a ratio < 1, we allow it and don't "bottom out" at
|
||||
// the original limit until *after* we've also added the count.
|
||||
int adjustedLimit = (int) (limit * ratio) + count;
|
||||
return Math.max(limit, adjustedLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
|
||||
|
@ -282,13 +454,18 @@ public class FacetComponent extends SearchComponent
|
|||
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS) != 0) {
|
||||
countFacets(rb, sreq);
|
||||
} else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) {
|
||||
} else {
|
||||
// at present PURPOSE_REFINE_FACETS and PURPOSE_REFINE_PIVOT_FACETS
|
||||
// don't co-exist in individual requests, but don't assume that
|
||||
// will always be the case
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS) != 0) {
|
||||
refineFacets(rb, sreq);
|
||||
}
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_PIVOT_FACETS) != 0) {
|
||||
refinePivotFacets(rb, sreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private void countFacets(ResponseBuilder rb, ShardRequest sreq) {
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
|
@ -298,12 +475,12 @@ public class FacetComponent extends SearchComponent
|
|||
NamedList facet_counts = null;
|
||||
try {
|
||||
facet_counts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts");
|
||||
}
|
||||
catch(Exception ex) {
|
||||
} catch (Exception ex) {
|
||||
if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) {
|
||||
continue; // looks like a shard did not return anything
|
||||
}
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to read facet info for shard: "+srsp.getShard(), ex);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Unable to read facet info for shard: " + srsp.getShard(), ex);
|
||||
}
|
||||
|
||||
// handle facet queries
|
||||
|
@ -329,14 +506,21 @@ public class FacetComponent extends SearchComponent
|
|||
// Distributed facet_dates
|
||||
doDistribDates(fi, facet_counts);
|
||||
|
||||
|
||||
// Distributed facet_ranges
|
||||
doDistribRanges(fi, facet_counts);
|
||||
|
||||
|
||||
// Distributed facet_intervals
|
||||
doDistribIntervals(fi, facet_counts);
|
||||
|
||||
// Distributed facet_pivots - this is just the per shard collection,
|
||||
// refinement reqs still needed (below) once we've considered every shard
|
||||
doDistribPivots(rb, shardNum, facet_counts);
|
||||
|
||||
} // end for-each-response-in-shard-request...
|
||||
|
||||
// refine each pivot based on the new shard data
|
||||
for (Entry<String,PivotFacet> pivotFacet : fi.pivotFacets) {
|
||||
pivotFacet.getValue().queuePivotRefinementRequests();
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -344,7 +528,6 @@ public class FacetComponent extends SearchComponent
|
|||
// request ((with responses from all shards) sent out to get facets...
|
||||
// otherwise we would need to wait until all facet responses were received.
|
||||
//
|
||||
|
||||
for (DistribFieldFacet dff : fi.facets.values()) {
|
||||
// no need to check these facets for refinement
|
||||
if (dff.initialLimit <= 0 && dff.initialMincount <= 1) continue;
|
||||
|
@ -357,7 +540,8 @@ public class FacetComponent extends SearchComponent
|
|||
dff._toRefine = tmp;
|
||||
|
||||
ShardFacetCount[] counts = dff.getCountSorted();
|
||||
int ntop = Math.min(counts.length, dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
|
||||
int ntop = Math.min(counts.length,
|
||||
dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
|
||||
long smallestCount = counts.length == 0 ? 0 : counts[ntop - 1].count;
|
||||
|
||||
for (int i = 0; i < counts.length; i++) {
|
||||
|
@ -376,7 +560,8 @@ public class FacetComponent extends SearchComponent
|
|||
long maxCount = sfc.count;
|
||||
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
|
||||
FixedBitSet fbs = dff.counted[shardNum];
|
||||
if (fbs!=null && (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum))) { // fbs can be null if a shard request failed
|
||||
// fbs can be null if a shard request failed
|
||||
if (fbs != null && (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum))) {
|
||||
// if missing from this shard, add the max it could be
|
||||
maxCount += dff.maxPossible(sfc, shardNum);
|
||||
}
|
||||
|
@ -391,7 +576,11 @@ public class FacetComponent extends SearchComponent
|
|||
// add a query for each shard missing the term that needs refinement
|
||||
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
|
||||
FixedBitSet fbs = dff.counted[shardNum];
|
||||
if(fbs!=null && (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) && dff.maxPossible(sfc,shardNum)>0) {
|
||||
// fbs can be null if a shard request failed
|
||||
if (fbs != null &&
|
||||
(sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) &&
|
||||
dff.maxPossible(sfc, shardNum) > 0) {
|
||||
|
||||
dff.needRefinements = true;
|
||||
List<String> lst = dff._toRefine[shardNum];
|
||||
if (lst == null) {
|
||||
|
@ -405,7 +594,6 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
}
|
||||
|
||||
//
|
||||
// The implementation below uses the first encountered shard's
|
||||
// facet_intervals as the basis for subsequent shards' data to be merged.
|
||||
private void doDistribIntervals(FacetInfo fi, NamedList facet_counts) {
|
||||
|
@ -456,7 +644,6 @@ public class FacetComponent extends SearchComponent
|
|||
//
|
||||
// The implementation below uses the first encountered shard's
|
||||
// facet_ranges as the basis for subsequent shards' data to be merged.
|
||||
|
||||
private void doDistribRanges(FacetInfo fi, NamedList facet_counts) {
|
||||
@SuppressWarnings("unchecked")
|
||||
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
|
||||
|
@ -544,6 +731,20 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
}
|
||||
|
||||
private void doDistribPivots(ResponseBuilder rb, int shardNum, NamedList facet_counts) {
|
||||
@SuppressWarnings("unchecked")
|
||||
SimpleOrderedMap<List<NamedList<Object>>> facet_pivot
|
||||
= (SimpleOrderedMap<List<NamedList<Object>>>) facet_counts.get(PIVOT_KEY);
|
||||
|
||||
if (facet_pivot != null) {
|
||||
for (Map.Entry<String,List<NamedList<Object>>> pivot : facet_pivot) {
|
||||
final String pivotName = pivot.getKey();
|
||||
PivotFacet facet = rb._facetInfo.pivotFacets.get(pivotName);
|
||||
facet.mergeResponseFromShard(shardNum, rb, pivot.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
|
@ -568,11 +769,10 @@ public class FacetComponent extends SearchComponent
|
|||
ShardFacetCount sfc = dff.counts.get(name);
|
||||
if (sfc == null) {
|
||||
// we got back a term we didn't ask for?
|
||||
log.error("Unexpected term returned for facet refining. key=" + key + " term='" + name + "'"
|
||||
+ "\n\trequest params=" + sreq.params
|
||||
+ "\n\ttoRefine=" + dff._toRefine
|
||||
+ "\n\tresponse=" + shardCounts
|
||||
);
|
||||
log.error("Unexpected term returned for facet refining. key=" + key
|
||||
+ " term='" + name + "'" + "\n\trequest params=" + sreq.params
|
||||
+ "\n\ttoRefine=" + dff._toRefine + "\n\tresponse="
|
||||
+ shardCounts);
|
||||
continue;
|
||||
}
|
||||
sfc.count += count;
|
||||
|
@ -581,13 +781,75 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
}
|
||||
|
||||
private void refinePivotFacets(ResponseBuilder rb, ShardRequest sreq) {
|
||||
// This is after the shard has returned the refinement request
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
for (ShardResponse srsp : sreq.responses) {
|
||||
|
||||
int shardNumber = rb.getShardNum(srsp.getShard());
|
||||
|
||||
NamedList facetCounts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts");
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<List<NamedList<Object>>> pivotFacetResponsesFromShard
|
||||
= (NamedList<List<NamedList<Object>>>) facetCounts.get(PIVOT_KEY);
|
||||
|
||||
if (null == pivotFacetResponsesFromShard) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"No pivot refinement response from shard: " + srsp.getShard());
|
||||
}
|
||||
|
||||
for (Entry<String,List<NamedList<Object>>> pivotFacetResponseFromShard : pivotFacetResponsesFromShard) {
|
||||
PivotFacet masterPivotFacet = fi.pivotFacets.get(pivotFacetResponseFromShard.getKey());
|
||||
masterPivotFacet.mergeResponseFromShard(shardNumber, rb, pivotFacetResponseFromShard.getValue());
|
||||
masterPivotFacet.removeAllRefinementsForShard(shardNumber);
|
||||
}
|
||||
}
|
||||
|
||||
if (allPivotFacetsAreFullyRefined(fi)) {
|
||||
for (Entry<String,PivotFacet> pf : fi.pivotFacets) {
|
||||
pf.getValue().queuePivotRefinementRequests();
|
||||
}
|
||||
reQueuePivotFacetShardRequests(rb);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean allPivotFacetsAreFullyRefined(FacetInfo fi) {
|
||||
|
||||
for (Entry<String,PivotFacet> pf : fi.pivotFacets) {
|
||||
if (pf.getValue().isRefinementsRequired()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean doAnyPivotFacetRefinementRequestsExistForShard(FacetInfo fi,
|
||||
int shardNum) {
|
||||
for (int i = 0; i < fi.pivotFacets.size(); i++) {
|
||||
PivotFacet pf = fi.pivotFacets.getVal(i);
|
||||
if ( ! pf.getQueuedRefinements(shardNum).isEmpty() ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void reQueuePivotFacetShardRequests(ResponseBuilder rb) {
|
||||
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
|
||||
if (doAnyPivotFacetRefinementRequestsExistForShard(rb._facetInfo, shardNum)) {
|
||||
enqueuePivotFacetShardRequests(null, rb, shardNum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishStage(ResponseBuilder rb) {
|
||||
pivotRefinementCounter = 0;
|
||||
if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
|
||||
// wait until STAGE_GET_FIELDS
|
||||
// so that "result" is already stored in the response (for aesthetics)
|
||||
|
||||
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
|
||||
NamedList<Object> facet_counts = new SimpleOrderedMap<>();
|
||||
|
@ -602,7 +864,8 @@ public class FacetComponent extends SearchComponent
|
|||
facet_counts.add("facet_fields", facet_fields);
|
||||
|
||||
for (DistribFieldFacet dff : fi.facets.values()) {
|
||||
NamedList<Object> fieldCounts = new NamedList<>(); // order is more important for facets
|
||||
// order is important for facet values, so use NamedList
|
||||
NamedList<Object> fieldCounts = new NamedList<>();
|
||||
facet_fields.add(dff.getKey(), fieldCounts);
|
||||
|
||||
ShardFacetCount[] counts;
|
||||
|
@ -619,7 +882,8 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
|
||||
if (countSorted) {
|
||||
int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length);
|
||||
int end = dff.limit < 0
|
||||
? counts.length : Math.min(dff.offset + dff.limit, counts.length);
|
||||
for (int i = dff.offset; i < end; i++) {
|
||||
if (counts[i].count < dff.minCount) {
|
||||
break;
|
||||
|
@ -655,11 +919,30 @@ public class FacetComponent extends SearchComponent
|
|||
facet_counts.add("facet_ranges", fi.rangeFacets);
|
||||
facet_counts.add("facet_intervals", fi.intervalFacets);
|
||||
|
||||
if (fi.pivotFacets != null && fi.pivotFacets.size() > 0) {
|
||||
facet_counts.add(PIVOT_KEY, createPivotFacetOutput(rb));
|
||||
}
|
||||
|
||||
rb.rsp.add("facet_counts", facet_counts);
|
||||
|
||||
rb._facetInfo = null; // could be big, so release asap
|
||||
}
|
||||
|
||||
private SimpleOrderedMap<List<NamedList<Object>>> createPivotFacetOutput(ResponseBuilder rb) {
|
||||
|
||||
SimpleOrderedMap<List<NamedList<Object>>> combinedPivotFacets = new SimpleOrderedMap<>();
|
||||
for (Entry<String,PivotFacet> entry : rb._facetInfo.pivotFacets) {
|
||||
String key = entry.getKey();
|
||||
PivotFacet pivot = entry.getValue();
|
||||
List<NamedList<Object>> trimmedPivots = pivot.getTrimmedPivotsAsListOfNamedLists(rb);
|
||||
if (null == trimmedPivots) {
|
||||
trimmedPivots = Collections.<NamedList<Object>>emptyList();
|
||||
}
|
||||
|
||||
combinedPivotFacets.add(key, trimmedPivots);
|
||||
}
|
||||
return combinedPivotFacets;
|
||||
}
|
||||
|
||||
// use <int> tags for smaller facet counts (better back compatibility)
|
||||
private Number num(long val) {
|
||||
|
@ -699,6 +982,8 @@ public class FacetComponent extends SearchComponent
|
|||
= new SimpleOrderedMap<>();
|
||||
public SimpleOrderedMap<SimpleOrderedMap<Integer>> intervalFacets
|
||||
= new SimpleOrderedMap<>();
|
||||
public SimpleOrderedMap<PivotFacet> pivotFacets
|
||||
= new SimpleOrderedMap<>();
|
||||
|
||||
void parse(SolrParams params, ResponseBuilder rb) {
|
||||
queryFacets = new LinkedHashMap<>();
|
||||
|
@ -720,6 +1005,15 @@ public class FacetComponent extends SearchComponent
|
|||
facets.put(ff.getKey(), ff);
|
||||
}
|
||||
}
|
||||
|
||||
// Develop Pivot Facet Information
|
||||
String[] facetPFs = params.getParams(FacetParams.FACET_PIVOT);
|
||||
if (facetPFs != null) {
|
||||
for (String fieldGroup : facetPFs) {
|
||||
PivotFacet pf = new PivotFacet(rb, fieldGroup);
|
||||
pivotFacets.add(pf.getKey(), pf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -730,14 +1024,16 @@ public class FacetComponent extends SearchComponent
|
|||
String facetType; // facet.field, facet.query, etc (make enum?)
|
||||
String facetStr; // original parameter value of facetStr
|
||||
String facetOn; // the field or query, absent localParams if appropriate
|
||||
private String key; // label in the response for the result... "foo" for {!key=foo}myfield
|
||||
private String key; // label in the response for the result...
|
||||
// "foo" for {!key=foo}myfield
|
||||
SolrParams localParams; // any local params for the facet
|
||||
|
||||
public FacetBase(ResponseBuilder rb, String facetType, String facetStr) {
|
||||
this.facetType = facetType;
|
||||
this.facetStr = facetStr;
|
||||
try {
|
||||
this.localParams = QueryParsing.getLocalParams(facetStr, rb.req.getParams());
|
||||
this.localParams = QueryParsing.getLocalParams(facetStr,
|
||||
rb.req.getParams());
|
||||
} catch (SyntaxError e) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
|
||||
}
|
||||
|
@ -775,7 +1071,8 @@ public class FacetComponent extends SearchComponent
|
|||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
public static class FieldFacet extends FacetBase {
|
||||
public String field; // the field to facet on... "myfield" for {!key=foo}myfield
|
||||
public String field; // the field to facet on... "myfield" for
|
||||
// {!key=foo}myfield
|
||||
public FieldType ftype;
|
||||
public int offset;
|
||||
public int limit;
|
||||
|
@ -790,7 +1087,7 @@ public class FacetComponent extends SearchComponent
|
|||
fillParams(rb, rb.req.getParams(), facetOn);
|
||||
}
|
||||
|
||||
private void fillParams(ResponseBuilder rb, SolrParams params, String field) {
|
||||
protected void fillParams(ResponseBuilder rb, SolrParams params, String field) {
|
||||
this.field = field;
|
||||
this.ftype = rb.req.getSchema().getFieldTypeNoEx(this.field);
|
||||
this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
|
||||
|
@ -805,7 +1102,10 @@ public class FacetComponent extends SearchComponent
|
|||
this.minCount = mincount;
|
||||
this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
|
||||
// default to sorting by count if there is a limit.
|
||||
this.sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
|
||||
this.sort = params.getFieldParam(field, FacetParams.FACET_SORT,
|
||||
(limit > 0 ?
|
||||
FacetParams.FACET_SORT_COUNT
|
||||
: FacetParams.FACET_SORT_INDEX));
|
||||
if (this.sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
|
||||
this.sort = FacetParams.FACET_SORT_COUNT;
|
||||
} else if (this.sort.equals(FacetParams.FACET_SORT_INDEX_LEGACY)) {
|
||||
|
@ -818,21 +1118,27 @@ public class FacetComponent extends SearchComponent
|
|||
/**
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
@SuppressWarnings("rawtypes")
|
||||
public static class DistribFieldFacet extends FieldFacet {
|
||||
public List<String>[] _toRefine; // a List<String> of refinements needed, one for each shard.
|
||||
public List<String>[] _toRefine; // a List<String> of refinements needed,
|
||||
// one for each shard.
|
||||
|
||||
// SchemaField sf; // currently unneeded
|
||||
|
||||
// the max possible count for a term appearing on no list
|
||||
public long missingMaxPossible;
|
||||
// the max possible count for a missing term for each shard (indexed by shardNum)
|
||||
// the max possible count for a missing term for each shard (indexed by
|
||||
// shardNum)
|
||||
public long[] missingMax;
|
||||
public FixedBitSet[] counted; // a bitset for each shard, keeping track of which terms seen
|
||||
// a bitset for each shard, keeping track of which terms seen
|
||||
public FixedBitSet[] counted;
|
||||
public HashMap<String,ShardFacetCount> counts = new HashMap<>(128);
|
||||
public int termNum;
|
||||
|
||||
public int initialLimit; // how many terms requested in first phase
|
||||
public int initialMincount; // mincount param sent to each shard
|
||||
public double overrequestRatio;
|
||||
public int overrequestCount;
|
||||
public boolean needRefinements;
|
||||
public ShardFacetCount[] countSorted;
|
||||
|
||||
|
@ -843,6 +1149,15 @@ public class FacetComponent extends SearchComponent
|
|||
counted = new FixedBitSet[rb.shards.length];
|
||||
}
|
||||
|
||||
protected void fillParams(ResponseBuilder rb, SolrParams params, String field) {
|
||||
super.fillParams(rb, params, field);
|
||||
this.overrequestRatio
|
||||
= params.getFieldDouble(field, FacetParams.FACET_OVERREQUEST_RATIO, 1.5);
|
||||
this.overrequestCount
|
||||
= params.getFieldInt(field, FacetParams.FACET_OVERREQUEST_COUNT, 10);
|
||||
|
||||
}
|
||||
|
||||
void add(int shardNum, NamedList shardCounts, int numRequested) {
|
||||
// shardCounts could be null if there was an exception
|
||||
int sz = shardCounts == null ? 0 : shardCounts.size();
|
||||
|
@ -872,8 +1187,8 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
}
|
||||
|
||||
// the largest possible missing term is initialMincount if we received less
|
||||
// than the number requested.
|
||||
// the largest possible missing term is initialMincount if we received
|
||||
// less than the number requested.
|
||||
if (numRequested < 0 || numRequested != 0 && numReceived < numRequested) {
|
||||
last = initialMincount;
|
||||
}
|
||||
|
@ -884,7 +1199,8 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
|
||||
public ShardFacetCount[] getLexSorted() {
|
||||
ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
|
||||
ShardFacetCount[] arr
|
||||
= counts.values().toArray(new ShardFacetCount[counts.size()]);
|
||||
Arrays.sort(arr, new Comparator<ShardFacetCount>() {
|
||||
@Override
|
||||
public int compare(ShardFacetCount o1, ShardFacetCount o2) {
|
||||
|
@ -896,7 +1212,8 @@ public class FacetComponent extends SearchComponent
|
|||
}
|
||||
|
||||
public ShardFacetCount[] getCountSorted() {
|
||||
ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
|
||||
ShardFacetCount[] arr
|
||||
= counts.values().toArray(new ShardFacetCount[counts.size()]);
|
||||
Arrays.sort(arr, new Comparator<ShardFacetCount>() {
|
||||
@Override
|
||||
public int compare(ShardFacetCount o1, ShardFacetCount o2) {
|
||||
|
@ -923,7 +1240,8 @@ public class FacetComponent extends SearchComponent
|
|||
*/
|
||||
public static class ShardFacetCount {
|
||||
public String name;
|
||||
public String indexed; // the indexed form of the name... used for comparisons.
|
||||
// the indexed form of the name... used for comparisons
|
||||
public String indexed;
|
||||
public long count;
|
||||
public int termNum; // term number starting at 0 (used in bit arrays)
|
||||
|
||||
|
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.component.FacetComponent.FacetBase;
|
||||
|
||||
/**
|
||||
* Models a single instance of a "pivot" specified by a {@link FacetParams#FACET_PIVOT}
|
||||
* param, which may contain multiple nested fields.
|
||||
*
|
||||
* This class is also used to coordinate the refinement requests needed from various
|
||||
* shards when doing processing a distributed request
|
||||
*/
|
||||
public class PivotFacet extends FacetBase {
|
||||
|
||||
/**
|
||||
* Local param used to indicate that refinements are requried on a pivot. Should
|
||||
* also be used as the prefix for contatenanting with the value to determine the
|
||||
* name of the multi-valued param that will contain all of the values needed for
|
||||
* refinement.
|
||||
*/
|
||||
public static final String REFINE_PARAM = "fpt";
|
||||
|
||||
// TODO: is this really needed? can't we just loop over 0<=i<rb.shards.length ?
|
||||
public final BitSet knownShards = new BitSet();
|
||||
|
||||
private final Map<Integer, List<PivotFacetValue>> queuedRefinements = new HashMap<>();
|
||||
|
||||
// if null, then either we haven't collected any responses from shards
|
||||
// or all the shards that have responded so far haven't had any values for the top
|
||||
// field of this pivot. May be null forever if no doc in any shard has a value
|
||||
// for the top field of the pivot
|
||||
private PivotFacetField pivotFacetField;
|
||||
|
||||
public PivotFacet(ResponseBuilder rb, String facetStr) {
|
||||
super(rb, FacetParams.FACET_PIVOT, facetStr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks that the specified shard needs to be asked to refine the specified
|
||||
* {@link PivotFacetValue}
|
||||
*
|
||||
* @see #getQueuedRefinements
|
||||
*/
|
||||
public void addRefinement(int shardNumber, PivotFacetValue value) {
|
||||
|
||||
if (!queuedRefinements.containsKey(shardNumber)) {
|
||||
queuedRefinements.put(shardNumber, new ArrayList<PivotFacetValue>());
|
||||
}
|
||||
|
||||
queuedRefinements.get(shardNumber).add(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* An immutable List of the {@link PivotFacetValue}s that need to be
|
||||
* refined for this pivot. Once these refinements have been processed,
|
||||
* the caller should clear them using {@link #removeAllRefinementsForShard}
|
||||
*
|
||||
* @see #addRefinement
|
||||
* @see #removeAllRefinementsForShard
|
||||
* @return a list of the values to refine, or an empty list.
|
||||
*/
|
||||
public List<PivotFacetValue> getQueuedRefinements(int shardNumber) {
|
||||
List<PivotFacetValue> raw = queuedRefinements.get(shardNumber);
|
||||
if (null == raw) {
|
||||
raw = Collections.<PivotFacetValue>emptyList();
|
||||
}
|
||||
return Collections.<PivotFacetValue>unmodifiableList(raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the list of queued refinements for the specified shard
|
||||
*
|
||||
* @see #addRefinement
|
||||
* @see #getQueuedRefinements
|
||||
*/
|
||||
public void removeAllRefinementsForShard(int shardNumber) {
|
||||
queuedRefinements.remove(shardNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* If true, then additional refinement requests are needed to flesh out the correct
|
||||
* counts for this Pivot
|
||||
*
|
||||
* @see #getQueuedRefinements
|
||||
*/
|
||||
public boolean isRefinementsRequired() {
|
||||
return ! queuedRefinements.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive method for generating <code>NamedLists</code> for this pivot
|
||||
* suitable for including in a pivot facet response to the original distributed request.
|
||||
*
|
||||
* @see PivotFacetField#trim
|
||||
* @see PivotFacetField#convertToListOfNamedLists
|
||||
*/
|
||||
public List<NamedList<Object>> getTrimmedPivotsAsListOfNamedLists(ResponseBuilder rb) {
|
||||
if (null == pivotFacetField) {
|
||||
// no values in any shard for the top field of this pivot
|
||||
return Collections.<NamedList<Object>>emptyList();
|
||||
}
|
||||
|
||||
pivotFacetField.trim();
|
||||
return pivotFacetField.convertToListOfNamedLists();
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive method for determining which {@link PivotFacetValue}s need to be
|
||||
* refined for this pivot.
|
||||
*
|
||||
* @see PivotFacetField#queuePivotRefinementRequests
|
||||
*/
|
||||
public void queuePivotRefinementRequests() {
|
||||
if (null == pivotFacetField) return; // NOOP
|
||||
|
||||
pivotFacetField.sort();
|
||||
pivotFacetField.queuePivotRefinementRequests(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively merges the response from the specified shard, tracking the known shards.
|
||||
*
|
||||
* @see PivotFacetField#contributeFromShard
|
||||
* @see PivotFacetField#createFromListOfNamedLists
|
||||
*/
|
||||
public void mergeResponseFromShard(int shardNumber, ResponseBuilder rb, List<NamedList<Object>> response) {
|
||||
|
||||
knownShards.set(shardNumber);
|
||||
if (pivotFacetField == null) {
|
||||
pivotFacetField = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, null, response);
|
||||
} else {
|
||||
pivotFacetField.contributeFromShard(shardNumber, rb, response);
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "[" + facetStr + "] | " + this.getKey();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,386 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
|
||||
/**
|
||||
* Models a single field somewhere in a hierarchy of fields as part of a pivot facet.
|
||||
* This pivot field contains {@link PivotFacetValue}s which may each contain a nested
|
||||
* {@link PivotFacetField} child. This <code>PivotFacetField</code> may itself
|
||||
* be a child of a {@link PivotFacetValue} parent.
|
||||
*
|
||||
* @see PivotFacetValue
|
||||
* @see PivotFacetFieldValueCollection
|
||||
*/
|
||||
@SuppressWarnings("rawtypes")
|
||||
public class PivotFacetField {
|
||||
|
||||
public final String field;
|
||||
|
||||
// null if this is a top level pivot,
|
||||
// otherwise the value of the parent pivot we are nested under
|
||||
public final PivotFacetValue parentValue;
|
||||
|
||||
public final PivotFacetFieldValueCollection valueCollection;
|
||||
|
||||
// Facet parameters relating to this field
|
||||
private final int facetFieldLimit;
|
||||
private final int facetFieldMinimumCount;
|
||||
private final int facetFieldOffset;
|
||||
private final String facetFieldSort;
|
||||
|
||||
private final Map<Integer, Integer> numberOfValuesContributedByShard = new HashMap<>();
|
||||
private final Map<Integer, Integer> shardLowestCount = new HashMap<>();
|
||||
|
||||
private boolean needRefinementAtThisLevel = true;
|
||||
|
||||
private PivotFacetField(ResponseBuilder rb, PivotFacetValue parent, String fieldName) {
|
||||
|
||||
field = fieldName;
|
||||
parentValue = parent;
|
||||
|
||||
// facet params
|
||||
SolrParams parameters = rb.req.getParams();
|
||||
facetFieldMinimumCount = parameters.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
|
||||
facetFieldOffset = parameters.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
|
||||
facetFieldLimit = parameters.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
|
||||
String defaultSort = (facetFieldLimit > 0) ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX;
|
||||
facetFieldSort = parameters.getFieldParam(field, FacetParams.FACET_SORT, defaultSort);
|
||||
|
||||
valueCollection = new PivotFacetFieldValueCollection(facetFieldMinimumCount, facetFieldOffset, facetFieldLimit, facetFieldSort);
|
||||
|
||||
if ( (facetFieldLimit < 0) ||
|
||||
// TODO: possible refinement issue if limit=0 & mincount=0 & missing=true
|
||||
// (ie: we only want the missing count for this field)
|
||||
(facetFieldLimit <= 0 && facetFieldMinimumCount == 0) ||
|
||||
(facetFieldSort.equals(FacetParams.FACET_SORT_INDEX) && facetFieldMinimumCount <= 0)
|
||||
) {
|
||||
// in any of these cases, there's no need to refine this level of the pivot
|
||||
needRefinementAtThisLevel = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive method that walks up the tree of pivot fields/values to build
|
||||
* a list of String representations of the values that lead down to this
|
||||
* PivotFacetField.
|
||||
*
|
||||
* @return A mutable List of the pivot values leading down to this pivot field,
|
||||
* will never be null but may contain nulls and may be empty if this is a top
|
||||
* level pivot field
|
||||
* @see PivotFacetValue#getValuePath
|
||||
*/
|
||||
public List<String> getValuePath() {
|
||||
if (null != parentValue) {
|
||||
return parentValue.getValuePath();
|
||||
}
|
||||
return new ArrayList<String>(3);
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive method to construct a new <code>PivotFacetField</code> object from
|
||||
* the contents of the {@link NamedList}s provided by the specified shard, relative
|
||||
* to a parent value (if this is not the top field in the pivot hierarchy)
|
||||
*
|
||||
* The associated child {@link PivotFacetValue}s will be recursively built as well.
|
||||
*
|
||||
* @see PivotFacetValue#createFromNamedList
|
||||
* @param shardNumber the id of the shard that provided this data
|
||||
* @param rb The response builder of the current request
|
||||
* @param owner the parent value in the current pivot (may be null)
|
||||
* @param pivotValues the data from the specified shard for this pivot field, may be null or empty
|
||||
* @return the new PivotFacetField, null if pivotValues is null or empty.
|
||||
*/
|
||||
public static PivotFacetField createFromListOfNamedLists(int shardNumber, ResponseBuilder rb, PivotFacetValue owner, List<NamedList<Object>> pivotValues) {
|
||||
|
||||
if (null == pivotValues || pivotValues.size() <= 0) return null;
|
||||
|
||||
NamedList<Object> firstValue = pivotValues.get(0);
|
||||
PivotFacetField createdPivotFacetField
|
||||
= new PivotFacetField(rb, owner, PivotFacetHelper.getField(firstValue));
|
||||
|
||||
int lowestCount = Integer.MAX_VALUE;
|
||||
|
||||
for (NamedList<Object> pivotValue : pivotValues) {
|
||||
|
||||
lowestCount = Math.min(lowestCount, PivotFacetHelper.getCount(pivotValue));
|
||||
|
||||
PivotFacetValue newValue = PivotFacetValue.createFromNamedList
|
||||
(shardNumber, rb, createdPivotFacetField, pivotValue);
|
||||
createdPivotFacetField.valueCollection.add(newValue);
|
||||
}
|
||||
|
||||
createdPivotFacetField.shardLowestCount.put(shardNumber, lowestCount);
|
||||
createdPivotFacetField.numberOfValuesContributedByShard.put(shardNumber, pivotValues.size());
|
||||
|
||||
return createdPivotFacetField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructive method that recursively prunes values from the data structure
|
||||
* based on the counts for those values and the effective sort, mincount, limit,
|
||||
* and offset being used for each field.
|
||||
* <p>
|
||||
* This method should only be called after all refinement is completed just prior
|
||||
* calling {@link #convertToListOfNamedLists}
|
||||
* </p>
|
||||
*
|
||||
* @see PivotFacet#getTrimmedPivotsAsListOfNamedLists
|
||||
* @see PivotFacetFieldValueCollection#trim
|
||||
*/
|
||||
public void trim() {
|
||||
// SOLR-6331...
|
||||
//
|
||||
// we can probably optimize the memory usage by trimming each level of the pivot once
|
||||
// we know we've fully refined the values at that level
|
||||
// (ie: fold this logic into refineNextLevelOfFacets)
|
||||
this.valueCollection.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively sorts the collection of values associated with this field, and
|
||||
* any sub-pivots those values have.
|
||||
*
|
||||
* @see FacetParams#FACET_SORT
|
||||
* @see PivotFacetFieldValueCollection#sort
|
||||
*/
|
||||
public void sort() {
|
||||
this.valueCollection.sort();
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive method for generating <code>NamedLists</code> from this field
|
||||
* suitable for including in a pivot facet response to the original distributed request.
|
||||
*/
|
||||
public List<NamedList<Object>> convertToListOfNamedLists() {
|
||||
|
||||
List<NamedList<Object>> convertedPivotList = null;
|
||||
|
||||
if (valueCollection.size() > 0) {
|
||||
convertedPivotList = new LinkedList<>();
|
||||
for (PivotFacetValue pivot : valueCollection)
|
||||
convertedPivotList.add(pivot.convertToNamedList());
|
||||
}
|
||||
|
||||
return convertedPivotList;
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive method for determining which {@link PivotFacetValue}s need to be
|
||||
* refined for this pivot.
|
||||
*
|
||||
* @see PivotFacet#queuePivotRefinementRequests
|
||||
*/
|
||||
public void queuePivotRefinementRequests(PivotFacet pf) {
|
||||
|
||||
if (needRefinementAtThisLevel && ! valueCollection.getExplicitValuesList().isEmpty()) {
|
||||
|
||||
if (FacetParams.FACET_SORT_COUNT.equals(facetFieldSort)) {
|
||||
// we only need to things that are currently in our limit,
|
||||
// or might be in our limit if we get increased counts from shards that
|
||||
// didn't include this value the first time
|
||||
final int indexOfCountThreshold
|
||||
= Math.min(valueCollection.getExplicitValuesListSize(),
|
||||
facetFieldOffset + facetFieldLimit) - 1;
|
||||
final int countThreshold = valueCollection.getAt(indexOfCountThreshold).getCount();
|
||||
|
||||
int positionInResults = 0;
|
||||
|
||||
for (PivotFacetValue value : valueCollection.getExplicitValuesList()) {
|
||||
if (positionInResults <= indexOfCountThreshold) {
|
||||
// This element is within the top results, so we need to get information
|
||||
// from all of the shards.
|
||||
processDefiniteCandidateElement(pf, value);
|
||||
} else {
|
||||
// This element is not within the top results, but may still need to be refined.
|
||||
processPossibleCandidateElement(pf, value, countThreshold);
|
||||
}
|
||||
|
||||
positionInResults++;
|
||||
}
|
||||
} else { // FACET_SORT_INDEX
|
||||
// everything needs refined to see what the per-shard mincount excluded
|
||||
for (PivotFacetValue value : valueCollection.getExplicitValuesList()) {
|
||||
processDefiniteCandidateElement(pf, value);
|
||||
}
|
||||
}
|
||||
|
||||
needRefinementAtThisLevel = false;
|
||||
}
|
||||
|
||||
if ( pf.isRefinementsRequired() ) {
|
||||
// if any refinements are needed, then we need to stop and wait to
|
||||
// see how the picture may change before drilling down to child pivot fields
|
||||
return;
|
||||
} else {
|
||||
// Since outstanding requests have been filled, then we can drill down
|
||||
// to the next deeper level and check it.
|
||||
refineNextLevelOfFacets(pf);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds refinement requests for the value for each shard that has not already contributed
|
||||
* a count for this value.
|
||||
*/
|
||||
private void processDefiniteCandidateElement(PivotFacet pf, PivotFacetValue value) {
|
||||
|
||||
for (int shard = pf.knownShards.nextSetBit(0);
|
||||
0 <= shard;
|
||||
shard = pf.knownShards.nextSetBit(shard+1)) {
|
||||
if ( ! value.shardHasContributed(shard) ) {
|
||||
if ( // if we're doing index order, we need to refine anything
|
||||
// (mincount may have excluded from a shard)
|
||||
FacetParams.FACET_SORT_INDEX.equals(facetFieldSort)
|
||||
// if we are doing count order, we need to refine if the limit was hit
|
||||
// (if it not, the shard doesn't have the value or it would have returned already)
|
||||
|| numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(shard) ) {
|
||||
|
||||
pf.addRefinement(shard, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(int shardNumber) {
|
||||
return facetFieldLimit <= numberOfValuesContributedByShard(shardNumber);
|
||||
}
|
||||
|
||||
private int numberOfValuesContributedByShard(final int shardNumber) {
|
||||
return numberOfValuesContributedByShard.containsKey(shardNumber)
|
||||
? numberOfValuesContributedByShard.get(shardNumber)
|
||||
: 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks the {@link #lowestCountContributedbyShard} for each shard, combined with the
|
||||
* counts we already know, to see if this value is a viable candidate --
|
||||
* <b>Does not make sense when using {@link FacetParams#FACET_SORT_INDEX}</b>
|
||||
*
|
||||
* @see #processDefiniteCandidateElement
|
||||
*/
|
||||
private void processPossibleCandidateElement(PivotFacet pf, PivotFacetValue value,
|
||||
final int refinementThreshold) {
|
||||
|
||||
assert FacetParams.FACET_SORT_COUNT.equals(facetFieldSort)
|
||||
: "Method only makes sense when sorting by count";
|
||||
|
||||
int maxPossibleCountAfterRefinement = value.getCount();
|
||||
|
||||
for (int shard = pf.knownShards.nextSetBit(0);
|
||||
0 <= shard;
|
||||
shard = pf.knownShards.nextSetBit(shard+1)) {
|
||||
if ( ! value.shardHasContributed(shard) ) {
|
||||
maxPossibleCountAfterRefinement += lowestCountContributedbyShard(shard);
|
||||
}
|
||||
}
|
||||
|
||||
if (refinementThreshold <= maxPossibleCountAfterRefinement) {
|
||||
processDefiniteCandidateElement(pf, value);
|
||||
}
|
||||
}
|
||||
|
||||
private int lowestCountContributedbyShard(int shardNumber) {
|
||||
return (shardLowestCount.containsKey(shardNumber))
|
||||
? shardLowestCount.get(shardNumber)
|
||||
: 0;
|
||||
}
|
||||
|
||||
private void refineNextLevelOfFacets(PivotFacet pf) {
|
||||
|
||||
List<PivotFacetValue> explicitValsToRefine
|
||||
= valueCollection.getNextLevelValuesToRefine();
|
||||
|
||||
for (PivotFacetValue value : explicitValsToRefine) {
|
||||
if (null != value.getChildPivot()) {
|
||||
value.getChildPivot().queuePivotRefinementRequests(pf);
|
||||
}
|
||||
}
|
||||
|
||||
PivotFacetValue missing = this.valueCollection.getMissingValue();
|
||||
if(null != missing && null != missing.getChildPivot()) {
|
||||
missing.getChildPivot().queuePivotRefinementRequests(pf);
|
||||
}
|
||||
}
|
||||
|
||||
private void incrementShardValueCount(int shardNumber) {
|
||||
if (!numberOfValuesContributedByShard.containsKey(shardNumber)) {
|
||||
numberOfValuesContributedByShard.put(shardNumber, 1);
|
||||
} else {
|
||||
numberOfValuesContributedByShard.put(shardNumber, numberOfValuesContributedByShard.get(shardNumber)+1);
|
||||
}
|
||||
}
|
||||
|
||||
private void contributeValueFromShard(int shardNumber, ResponseBuilder rb, NamedList<Object> shardValue) {
|
||||
|
||||
incrementShardValueCount(shardNumber);
|
||||
|
||||
Comparable value = PivotFacetHelper.getValue(shardValue);
|
||||
int count = PivotFacetHelper.getCount(shardValue);
|
||||
|
||||
// We're changing values so we most mark the collection as dirty
|
||||
valueCollection.markDirty();
|
||||
|
||||
if ( ( !shardLowestCount.containsKey(shardNumber) )
|
||||
|| shardLowestCount.get(shardNumber) > count) {
|
||||
shardLowestCount.put(shardNumber, count);
|
||||
}
|
||||
|
||||
PivotFacetValue facetValue = valueCollection.get(value);
|
||||
if (null == facetValue) {
|
||||
// never seen before, we need to create it from scratch
|
||||
facetValue = PivotFacetValue.createFromNamedList(shardNumber, rb, this, shardValue);
|
||||
this.valueCollection.add(facetValue);
|
||||
} else {
|
||||
facetValue.mergeContributionFromShard(shardNumber, rb, shardValue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively merges the contributions from the specified shard for each
|
||||
* {@link PivotFacetValue} represended in the <code>response</code>.
|
||||
*
|
||||
* @see PivotFacetValue#mergeContributionFromShard
|
||||
* @param shardNumber the id of the shard that provided this data
|
||||
* @param rb The response builder of the current request
|
||||
* @param response the data from the specified shard for this pivot field, may be null
|
||||
*/
|
||||
public void contributeFromShard(int shardNumber, ResponseBuilder rb, List<NamedList<Object>> response) {
|
||||
if (null == response) return;
|
||||
|
||||
for (NamedList<Object> responseValue : response) {
|
||||
contributeValueFromShard(shardNumber, rb, responseValue);
|
||||
}
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
return String.format(Locale.ROOT, "P:%s F:%s V:%s",
|
||||
parentValue, field, valueCollection);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,342 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
|
||||
/**
|
||||
* Emcapsulates a collection of {@link PivotFacetValue}s associated with a
|
||||
* {@link PivotFacetField} withs pecial tracking of a {@link PivotFacetValue}
|
||||
* corrisponding to the <code>null</code> value when {@link FacetParams#FACET_MISSING}
|
||||
* is used.
|
||||
*
|
||||
* @see #markDirty
|
||||
* @see PivotFacetField
|
||||
* @see PivotFacetValue
|
||||
*/
|
||||
@SuppressWarnings("rawtypes")
|
||||
public class PivotFacetFieldValueCollection implements Iterable<PivotFacetValue> {
|
||||
private List<PivotFacetValue> explicitValues;
|
||||
private PivotFacetValue missingValue;
|
||||
private Map<Comparable, PivotFacetValue> valuesMap;
|
||||
private boolean dirty = true;
|
||||
|
||||
//Facet parameters relating to this field
|
||||
private final int facetFieldMinimumCount;
|
||||
private final int facetFieldOffset;
|
||||
private final int facetFieldLimit;
|
||||
private final String facetFieldSort;
|
||||
|
||||
|
||||
public PivotFacetFieldValueCollection(int minCount, int offset, int limit, String fieldSort){
|
||||
this.explicitValues = new ArrayList<>();
|
||||
this.valuesMap = new HashMap<>();
|
||||
this.facetFieldMinimumCount = minCount;
|
||||
this.facetFieldOffset = offset;
|
||||
this.facetFieldLimit = limit;
|
||||
this.facetFieldSort = fieldSort;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates that the values in this collection have been modified by the caller.
|
||||
*
|
||||
* Any caller that manipulates the {@link PivotFacetValue}s contained in this collection
|
||||
* must call this method after doing so.
|
||||
*/
|
||||
public void markDirty() {
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The {@link PivotFacetValue} with corisponding to a a value of
|
||||
* <code>null</code> when {@link FacetParams#FACET_MISSING} is used.
|
||||
*
|
||||
* @return the appropriate <code>PivotFacetValue</code> object, may be null
|
||||
* if we "missing" is not in use, or if it does not meat the mincount.
|
||||
*/
|
||||
public PivotFacetValue getMissingValue(){
|
||||
return missingValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read-Only access to the Collection of {@link PivotFacetValue}s corrisponding to
|
||||
* non-missing values.
|
||||
*
|
||||
* @see #getMissingValue
|
||||
*/
|
||||
public List<PivotFacetValue> getExplicitValuesList() {
|
||||
return Collections.unmodifiableList(explicitValues);
|
||||
}
|
||||
|
||||
/**
|
||||
* Size of {@link #getExplicitValuesList}
|
||||
*/
|
||||
public int getExplicitValuesListSize() {
|
||||
return this.explicitValues.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Total number of {@link PivotFacetValue}s, including the "missing" value if used.
|
||||
*
|
||||
* @see #getMissingValue
|
||||
* @see #getExplicitValuesList
|
||||
*/
|
||||
public int size() {
|
||||
return this.getExplicitValuesListSize() + (this.missingValue == null ? 0 : 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the appropriate sub-list of the explicit values that need to be refined,
|
||||
* based on the {@link FacetParams#FACET_OFFSET} & {@link FacetParams#FACET_LIMIT}
|
||||
* for this field.
|
||||
*
|
||||
* @see #getExplicitValuesList
|
||||
* @see List#subList
|
||||
*/
|
||||
public List<PivotFacetValue> getNextLevelValuesToRefine() {
|
||||
final int numRefinableValues = getExplicitValuesListSize();
|
||||
if (facetFieldOffset < numRefinableValues) {
|
||||
final int offsetPlusCount = (facetFieldLimit >= 0)
|
||||
? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues)
|
||||
: numRefinableValues;
|
||||
return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount);
|
||||
} else {
|
||||
return Collections.<PivotFacetValue>emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast lookup to retrieve a {@link PivotFacetValue} from this collection if it
|
||||
* exists
|
||||
*
|
||||
* @param value of the <code>PivotFacetValue</code> to lookup, if
|
||||
* <code>null</code> this returns the same as {@link #getMissingValue}
|
||||
* @return the corrisponding <code>PivotFacetValue</code> or null if there is
|
||||
* no <code>PivotFacetValue</code> in this collection corrisponding to
|
||||
* the specified value.
|
||||
*/
|
||||
public PivotFacetValue get(Comparable value){
|
||||
return valuesMap.get(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetchs a {@link PivotFacetValue} from this collection via the index, may not
|
||||
* be used to fetch the <code>PivotFacetValue</code> corrisponding to the missing-value.
|
||||
*
|
||||
* @see #getExplicitValuesList
|
||||
* @see List#get(int)
|
||||
* @see #getMissingValue
|
||||
*/
|
||||
public PivotFacetValue getAt(int index){
|
||||
return explicitValues.get(index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a {@link PivotFacetValue} to this collection -- callers must not use this
|
||||
* method if a {@link PivotFacetValue} with the same value already exists in this collection
|
||||
*/
|
||||
public void add(PivotFacetValue pfValue) {
|
||||
Comparable val = pfValue.getValue();
|
||||
assert ! this.valuesMap.containsKey(val)
|
||||
: "Must not add duplicate PivotFacetValue with redundent inner value";
|
||||
|
||||
dirty = true;
|
||||
if(null == val) {
|
||||
this.missingValue = pfValue;
|
||||
} else {
|
||||
this.explicitValues.add(pfValue);
|
||||
}
|
||||
this.valuesMap.put(val, pfValue);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Destructive method that recursively prunes values from the data structure
|
||||
* based on the counts for those values and the effective sort, mincount, limit,
|
||||
* and offset being used for each field.
|
||||
* <p>
|
||||
* This method should only be called after all refinement is completed.
|
||||
* </p>
|
||||
*
|
||||
* @see PivotFacetField#trim
|
||||
* @see PivotFacet#getTrimmedPivotsAsListOfNamedLists
|
||||
*/
|
||||
public void trim() { // NOTE: destructive
|
||||
// TODO: see comment in PivotFacetField about potential optimization
|
||||
// (ie: trim as we refine)
|
||||
trimNonNullValues();
|
||||
trimNullValue();
|
||||
}
|
||||
|
||||
private void trimNullValue(){
|
||||
if (missingValue == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (missingValue.getCount() >= facetFieldMinimumCount){
|
||||
if (null != missingValue.getChildPivot()) {
|
||||
missingValue.getChildPivot().trim();
|
||||
}
|
||||
} else { // missing count less than mincount
|
||||
missingValue = null;
|
||||
}
|
||||
}
|
||||
|
||||
private void trimNonNullValues(){
|
||||
if (explicitValues != null && explicitValues.size() > 0) {
|
||||
|
||||
sort();
|
||||
|
||||
ArrayList<PivotFacetValue> trimmedValues = new ArrayList<>();
|
||||
|
||||
int facetsSkipped = 0;
|
||||
|
||||
for (PivotFacetValue pivotValue : explicitValues) {
|
||||
|
||||
if (pivotValue.getCount() >= facetFieldMinimumCount) {
|
||||
if (facetsSkipped >= facetFieldOffset) {
|
||||
trimmedValues.add(pivotValue);
|
||||
if (pivotValue.getChildPivot() != null) {
|
||||
pivotValue.getChildPivot().trim();
|
||||
}
|
||||
if (facetFieldLimit > 0 && trimmedValues.size() >= facetFieldLimit) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
facetsSkipped++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
explicitValues = trimmedValues;
|
||||
valuesMap.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the collection and recursively sorts the collections assocaited with
|
||||
* any sub-pivots.
|
||||
*
|
||||
* @see FacetParams#FACET_SORT
|
||||
* @see PivotFacetField#sort
|
||||
*/
|
||||
public void sort() {
|
||||
|
||||
if (dirty) {
|
||||
if (facetFieldSort.equals(FacetParams.FACET_SORT_COUNT)) {
|
||||
Collections.sort(this.explicitValues, new PivotFacetCountComparator());
|
||||
} else if (facetFieldSort.equals(FacetParams.FACET_SORT_INDEX)) {
|
||||
Collections.sort(this.explicitValues, new PivotFacetValueComparator());
|
||||
}
|
||||
dirty = false;
|
||||
}
|
||||
|
||||
for (PivotFacetValue value : this.explicitValues)
|
||||
if (value.getChildPivot() != null) {
|
||||
value.getChildPivot().sort();
|
||||
}
|
||||
|
||||
if (missingValue != null && missingValue.getChildPivot() != null) {
|
||||
missingValue.getChildPivot().sort();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterator over all elements in this Collection, including the result of
|
||||
* {@link #getMissingValue} as the last element (if it exists)
|
||||
*/
|
||||
@Override
|
||||
public Iterator<PivotFacetValue> iterator() {
|
||||
Iterator<PivotFacetValue> it = new Iterator<PivotFacetValue>() {
|
||||
private final Iterator valuesIterator = explicitValues.iterator();
|
||||
private boolean shouldGiveMissingValue = (missingValue != null);
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return valuesIterator.hasNext() || shouldGiveMissingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PivotFacetValue next() {
|
||||
while(valuesIterator.hasNext()){
|
||||
return (PivotFacetValue) valuesIterator.next();
|
||||
}
|
||||
//else
|
||||
if(shouldGiveMissingValue){
|
||||
shouldGiveMissingValue = false;
|
||||
return missingValue;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can't remove from this iterator");
|
||||
}
|
||||
};
|
||||
return it;
|
||||
}
|
||||
|
||||
/** Sorts {@link PivotFacetValue} instances by their count */
|
||||
public class PivotFacetCountComparator implements Comparator<PivotFacetValue> {
|
||||
public int compare(PivotFacetValue left, PivotFacetValue right) {
|
||||
int countCmp = right.getCount() - left.getCount();
|
||||
return (0 != countCmp) ? countCmp :
|
||||
compareWithNullLast(left.getValue(), right.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
/** Sorts {@link PivotFacetValue} instances by their value */
|
||||
public class PivotFacetValueComparator implements Comparator<PivotFacetValue> {
|
||||
public int compare(PivotFacetValue left, PivotFacetValue right) {
|
||||
return compareWithNullLast(left.getValue(), right.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A helper method for use in <code>Comparator</code> classes where object properties
|
||||
* are <code>Comparable</code> but may be null.
|
||||
*/
|
||||
static int compareWithNullLast(final Comparable o1, final Comparable o2) {
|
||||
if (null == o1) {
|
||||
if (null == o2) {
|
||||
return 0;
|
||||
}
|
||||
return 1; // o1 is null, o2 is not
|
||||
}
|
||||
if (null == o2) {
|
||||
return -1; // o2 is null, o1 is not
|
||||
}
|
||||
return o1.compareTo(o2);
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
return String.format(Locale.ROOT, "Values:%s | Missing:%s ", explicitValues, missingValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -17,254 +17,105 @@
|
|||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Deque;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.util.PivotListEntry;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.request.SimpleFacets;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Collections;
|
||||
|
||||
public class PivotFacetHelper {
|
||||
|
||||
/**
|
||||
* @since solr 4.0
|
||||
* Encodes a value path as a string for the purposes of a refinement request
|
||||
*
|
||||
* @see PivotFacetValue#getValuePath
|
||||
* @see #decodeRefinementValuePath
|
||||
*/
|
||||
public class PivotFacetHelper extends SimpleFacets
|
||||
{
|
||||
public static String encodeRefinementValuePath(List<String> values) {
|
||||
// HACK: prefix flag every value to account for empty string vs null
|
||||
// NOTE: even if we didn't have to worry about null's smartSplit is stupid about
|
||||
// pruning empty strings from list
|
||||
// "^" prefix = null
|
||||
// "~" prefix = not null, may be empty string
|
||||
|
||||
protected int minMatch;
|
||||
assert null != values;
|
||||
|
||||
public PivotFacetHelper(SolrQueryRequest req, DocSet docs, SolrParams params, ResponseBuilder rb) {
|
||||
super(req, docs, params, rb);
|
||||
minMatch = params.getInt( FacetParams.FACET_PIVOT_MINCOUNT, 1 );
|
||||
// special case: empty list => empty string
|
||||
if (values.isEmpty()) { return ""; }
|
||||
|
||||
|
||||
StringBuilder out = new StringBuilder();
|
||||
for (String val : values) {
|
||||
if (null == val) {
|
||||
out.append('^');
|
||||
} else {
|
||||
out.append('~');
|
||||
StrUtils.appendEscapedTextToBuilder(out, val, ',');
|
||||
}
|
||||
out.append(',');
|
||||
}
|
||||
out.deleteCharAt(out.length()-1); // prune the last seperator
|
||||
return out.toString();
|
||||
// return StrUtils.join(values, ',');
|
||||
}
|
||||
|
||||
public SimpleOrderedMap<List<NamedList<Object>>> process(String[] pivots) throws IOException {
|
||||
if (!rb.doFacets || pivots == null)
|
||||
/**
|
||||
* Decodes a value path string specified for refinement.
|
||||
*
|
||||
* @see #encodeRefinementValuePath
|
||||
*/
|
||||
public static List<String> decodeRefinementValuePath(String valuePath) {
|
||||
List <String> rawvals = StrUtils.splitSmart(valuePath, ",", true);
|
||||
// special case: empty list => empty string
|
||||
if (rawvals.isEmpty()) return rawvals;
|
||||
|
||||
List<String> out = new ArrayList<String>(rawvals.size());
|
||||
for (String raw : rawvals) {
|
||||
assert 0 < raw.length();
|
||||
if ('^' == raw.charAt(0)) {
|
||||
assert 1 == raw.length();
|
||||
out.add(null);
|
||||
} else {
|
||||
assert '~' == raw.charAt(0);
|
||||
out.add(raw.substring(1));
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/** @see PivotListEntry#VALUE */
|
||||
public static Comparable getValue(NamedList<Object> pivotList) {
|
||||
return (Comparable) PivotFacetHelper.retrieve(PivotListEntry.VALUE,
|
||||
pivotList);
|
||||
}
|
||||
|
||||
/** @see PivotListEntry#FIELD */
|
||||
public static String getField(NamedList<Object> pivotList) {
|
||||
return (String) PivotFacetHelper.retrieve(PivotListEntry.FIELD, pivotList);
|
||||
}
|
||||
|
||||
/** @see PivotListEntry#COUNT */
|
||||
public static Integer getCount(NamedList<Object> pivotList) {
|
||||
return (Integer) PivotFacetHelper.retrieve(PivotListEntry.COUNT, pivotList);
|
||||
}
|
||||
|
||||
/** @see PivotListEntry#PIVOT */
|
||||
public static List<NamedList<Object>> getPivots(NamedList<Object> pivotList) {
|
||||
int pivotIdx = pivotList.indexOf(PivotListEntry.PIVOT.getName(), 0);
|
||||
if (pivotIdx > -1) {
|
||||
return (List<NamedList<Object>>) pivotList.getVal(pivotIdx);
|
||||
}
|
||||
return null;
|
||||
|
||||
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
|
||||
for (String pivot : pivots) {
|
||||
//ex: pivot == "features,cat" or even "{!ex=mytag}features,cat"
|
||||
try {
|
||||
this.parseParams(FacetParams.FACET_PIVOT, pivot);
|
||||
} catch (SyntaxError e) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, e);
|
||||
}
|
||||
pivot = facetValue;//facetValue potentially modified from parseParams()
|
||||
|
||||
String[] fields = pivot.split(",");
|
||||
|
||||
if( fields.length < 2 ) {
|
||||
throw new SolrException( ErrorCode.BAD_REQUEST,
|
||||
"Pivot Facet needs at least two fields: "+pivot );
|
||||
}
|
||||
|
||||
String field = fields[0];
|
||||
String subField = fields[1];
|
||||
Deque<String> fnames = new LinkedList<>();
|
||||
for( int i=fields.length-1; i>1; i-- ) {
|
||||
fnames.push( fields[i] );
|
||||
private static Object retrieve(PivotListEntry entryToGet, NamedList<Object> pivotList) {
|
||||
return pivotList.get(entryToGet.getName(), entryToGet.getIndex());
|
||||
}
|
||||
|
||||
NamedList<Integer> superFacets = this.getTermCounts(field);
|
||||
|
||||
//super.key usually == pivot unless local-param 'key' used
|
||||
pivotResponse.add(key, doPivots(superFacets, field, subField, fnames, docs));
|
||||
}
|
||||
return pivotResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive function to do all the pivots
|
||||
*/
|
||||
protected List<NamedList<Object>> doPivots(NamedList<Integer> superFacets,
|
||||
String field, String subField, Deque<String> fnames,
|
||||
DocSet docs) throws IOException
|
||||
{
|
||||
SolrIndexSearcher searcher = rb.req.getSearcher();
|
||||
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below
|
||||
SchemaField sfield = searcher.getSchema().getField(field);
|
||||
FieldType ftype = sfield.getType();
|
||||
|
||||
String nextField = fnames.poll();
|
||||
|
||||
List<NamedList<Object>> values = new ArrayList<>( superFacets.size() );
|
||||
for (Map.Entry<String, Integer> kv : superFacets) {
|
||||
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
|
||||
if (kv.getValue() >= minMatch) {
|
||||
|
||||
// may be null when using facet.missing
|
||||
final String fieldValue = kv.getKey();
|
||||
|
||||
// don't reuse the same BytesRef each time since we will be
|
||||
// constructing Term objects used in TermQueries that may be cached.
|
||||
BytesRefBuilder termval = null;
|
||||
|
||||
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<>();
|
||||
pivot.add( "field", field );
|
||||
if (null == fieldValue) {
|
||||
pivot.add( "value", null );
|
||||
} else {
|
||||
termval = new BytesRefBuilder();
|
||||
ftype.readableToIndexed(fieldValue, termval);
|
||||
pivot.add( "value", ftype.toObject(sfield, termval.get()) );
|
||||
}
|
||||
pivot.add( "count", kv.getValue() );
|
||||
|
||||
if( subField == null ) {
|
||||
values.add( pivot );
|
||||
}
|
||||
else {
|
||||
DocSet subset = null;
|
||||
if ( null == termval ) {
|
||||
DocSet hasVal = searcher.getDocSet
|
||||
(new TermRangeQuery(field, null, null, false, false));
|
||||
subset = docs.andNot(hasVal);
|
||||
} else {
|
||||
Query query = new TermQuery(new Term(field, termval.get()));
|
||||
subset = searcher.getDocSet(query, docs);
|
||||
}
|
||||
super.docs = subset;//used by getTermCounts()
|
||||
|
||||
NamedList<Integer> nl = this.getTermCounts(subField);
|
||||
if (nl.size() >= minMatch) {
|
||||
pivot.add( "pivot", doPivots( nl, subField, nextField, fnames, subset) );
|
||||
values.add( pivot ); // only add response if there are some counts
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// put the field back on the list
|
||||
fnames.push( nextField );
|
||||
return values;
|
||||
}
|
||||
|
||||
// TODO: This is code from various patches to support distributed search.
|
||||
// Some parts may be helpful for whoever implements distributed search.
|
||||
//
|
||||
// @Override
|
||||
// public int distributedProcess(ResponseBuilder rb) throws IOException {
|
||||
// if (!rb.doFacets) {
|
||||
// return ResponseBuilder.STAGE_DONE;
|
||||
// }
|
||||
//
|
||||
// if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
||||
// SolrParams params = rb.req.getParams();
|
||||
// String[] pivots = params.getParams(FacetParams.FACET_PIVOT);
|
||||
// for ( ShardRequest sreq : rb.outgoing ) {
|
||||
// if (( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0
|
||||
// && sreq.shards != null && sreq.shards.length == 1 ) {
|
||||
// sreq.params.set( FacetParams.FACET, "true" );
|
||||
// sreq.params.set( FacetParams.FACET_PIVOT, pivots );
|
||||
// sreq.params.set( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); // keep this at 1 regardless so that it accumulates everything
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ResponseBuilder.STAGE_DONE;
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
|
||||
// if (!rb.doFacets) return;
|
||||
//
|
||||
//
|
||||
// if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
|
||||
// SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
|
||||
// if ( null == tf ) {
|
||||
// tf = new SimpleOrderedMap<List<NamedList<Object>>>();
|
||||
// rb._pivots = tf;
|
||||
// }
|
||||
// for (ShardResponse srsp: sreq.responses) {
|
||||
// int shardNum = rb.getShardNum(srsp.getShard());
|
||||
//
|
||||
// NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
|
||||
//
|
||||
// // handle facet trees from shards
|
||||
// SimpleOrderedMap<List<NamedList<Object>>> shard_pivots =
|
||||
// (SimpleOrderedMap<List<NamedList<Object>>>)facet_counts.get( PIVOT_KEY );
|
||||
//
|
||||
// if ( shard_pivots != null ) {
|
||||
// for (int j=0; j< shard_pivots.size(); j++) {
|
||||
// // TODO -- accumulate the results from each shard
|
||||
// // The following code worked to accumulate facets for an previous
|
||||
// // two level patch... it is here for reference till someone can upgrade
|
||||
// /**
|
||||
// String shard_tree_name = (String) shard_pivots.getName( j );
|
||||
// SimpleOrderedMap<NamedList> shard_tree = (SimpleOrderedMap<NamedList>)shard_pivots.getVal( j );
|
||||
// SimpleOrderedMap<NamedList> facet_tree = tf.get( shard_tree_name );
|
||||
// if ( null == facet_tree) {
|
||||
// facet_tree = new SimpleOrderedMap<NamedList>();
|
||||
// tf.add( shard_tree_name, facet_tree );
|
||||
// }
|
||||
//
|
||||
// for( int o = 0; o < shard_tree.size() ; o++ ) {
|
||||
// String shard_outer = (String) shard_tree.getName( o );
|
||||
// NamedList shard_innerList = (NamedList) shard_tree.getVal( o );
|
||||
// NamedList tree_innerList = (NamedList) facet_tree.get( shard_outer );
|
||||
// if ( null == tree_innerList ) {
|
||||
// tree_innerList = new NamedList();
|
||||
// facet_tree.add( shard_outer, tree_innerList );
|
||||
// }
|
||||
//
|
||||
// for ( int i = 0 ; i < shard_innerList.size() ; i++ ) {
|
||||
// String shard_term = (String) shard_innerList.getName( i );
|
||||
// long shard_count = ((Number) shard_innerList.getVal(i)).longValue();
|
||||
// int tree_idx = tree_innerList.indexOf( shard_term, 0 );
|
||||
//
|
||||
// if ( -1 == tree_idx ) {
|
||||
// tree_innerList.add( shard_term, shard_count );
|
||||
// } else {
|
||||
// long tree_count = ((Number) tree_innerList.getVal( tree_idx )).longValue();
|
||||
// tree_innerList.setVal( tree_idx, shard_count + tree_count );
|
||||
// }
|
||||
// } // innerList loop
|
||||
// } // outer loop
|
||||
// **/
|
||||
// } // each tree loop
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ;
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public void finishStage(ResponseBuilder rb) {
|
||||
// if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
|
||||
// // wait until STAGE_GET_FIELDS
|
||||
// // so that "result" is already stored in the response (for aesthetics)
|
||||
//
|
||||
// SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
|
||||
//
|
||||
// // get 'facet_counts' from the response
|
||||
// NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts");
|
||||
// if (facetCounts == null) {
|
||||
// facetCounts = new NamedList();
|
||||
// rb.rsp.add("facet_counts", facetCounts);
|
||||
// }
|
||||
// facetCounts.add( PIVOT_KEY, tf );
|
||||
// rb._pivots = null;
|
||||
// }
|
||||
//
|
||||
// public String getDescription() {
|
||||
// return "Handle Pivot (multi-level) Faceting";
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -0,0 +1,252 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.request.SimpleFacets;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Deque;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Processes all Pivot facet logic for a single node -- both non-distrib, and per-shard
|
||||
*/
|
||||
public class PivotFacetProcessor extends SimpleFacets
|
||||
{
|
||||
protected SolrParams params;
|
||||
|
||||
public PivotFacetProcessor(SolrQueryRequest req, DocSet docs, SolrParams params, ResponseBuilder rb) {
|
||||
super(req, docs, params, rb);
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes all of the specified {@link FacetParams#FACET_PIVOT} strings, generating
|
||||
* a completel response tree for each pivot. The values in this response will either
|
||||
* be the complete tree of fields and values for the specified pivot in the local index,
|
||||
* or the requested refinements if the pivot params include the {@link PivotFacet#REFINE_PARAM}
|
||||
*/
|
||||
public SimpleOrderedMap<List<NamedList<Object>>> process(String[] pivots) throws IOException {
|
||||
if (!rb.doFacets || pivots == null)
|
||||
return null;
|
||||
|
||||
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
|
||||
for (String pivotList : pivots) {
|
||||
try {
|
||||
this.parseParams(FacetParams.FACET_PIVOT, pivotList);
|
||||
} catch (SyntaxError e) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, e);
|
||||
}
|
||||
List<String> pivotFields = StrUtils.splitSmart(facetValue, ",", true);
|
||||
if( pivotFields.size() < 1 ) {
|
||||
throw new SolrException( ErrorCode.BAD_REQUEST,
|
||||
"Pivot Facet needs at least one field name: " + pivotList);
|
||||
} else {
|
||||
SolrIndexSearcher searcher = rb.req.getSearcher();
|
||||
for (String fieldName : pivotFields) {
|
||||
SchemaField sfield = searcher.getSchema().getField(fieldName);
|
||||
if (sfield == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + fieldName + "\" is not a valid field name in pivot: " + pivotList);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//REFINEMENT
|
||||
String fieldValueKey = localParams == null ? null : localParams.get(PivotFacet.REFINE_PARAM);
|
||||
if(fieldValueKey != null ){
|
||||
String[] refinementValuesByField = params.getParams(PivotFacet.REFINE_PARAM+fieldValueKey);
|
||||
for(String refinements : refinementValuesByField){
|
||||
pivotResponse.addAll(processSingle(pivotFields, refinements));
|
||||
}
|
||||
} else{
|
||||
pivotResponse.addAll(processSingle(pivotFields, null));
|
||||
}
|
||||
}
|
||||
return pivotResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single branch of refinement values for a specific pivot
|
||||
* @param pivotFields the ordered list of fields in this pivot
|
||||
* @param refinements the comma seperate list of refinement values corrisponding to each field in the pivot, or null if there are no refinements
|
||||
*/
|
||||
private SimpleOrderedMap<List<NamedList<Object>>> processSingle(List<String> pivotFields,
|
||||
String refinements) throws IOException {
|
||||
SolrIndexSearcher searcher = rb.req.getSearcher();
|
||||
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<>();
|
||||
|
||||
String field = pivotFields.get(0);
|
||||
SchemaField sfield = searcher.getSchema().getField(field);
|
||||
|
||||
Deque<String> fnames = new LinkedList<>();
|
||||
for( int i = pivotFields.size()-1; i>1; i-- ) {
|
||||
fnames.push( pivotFields.get(i) );
|
||||
}
|
||||
|
||||
NamedList<Integer> facetCounts;
|
||||
Deque<String> vnames = new LinkedList<>();
|
||||
|
||||
if (null != refinements) {
|
||||
// All values, split by the field they should go to
|
||||
List<String> refinementValuesByField
|
||||
= PivotFacetHelper.decodeRefinementValuePath(refinements);
|
||||
|
||||
for( int i=refinementValuesByField.size()-1; i>0; i-- ) {
|
||||
vnames.push(refinementValuesByField.get(i));//Only for [1] and on
|
||||
}
|
||||
|
||||
String firstFieldsValues = refinementValuesByField.get(0);
|
||||
|
||||
facetCounts = new NamedList<Integer>();
|
||||
facetCounts.add(firstFieldsValues,
|
||||
getSubsetSize(this.docs, sfield, firstFieldsValues));
|
||||
} else {
|
||||
// no refinements needed
|
||||
facetCounts = this.getTermCountsForPivots(field, this.docs);
|
||||
}
|
||||
|
||||
if(pivotFields.size() > 1) {
|
||||
String subField = pivotFields.get(1);
|
||||
pivotResponse.add(key,
|
||||
doPivots(facetCounts, field, subField, fnames, vnames, this.docs));
|
||||
} else {
|
||||
pivotResponse.add(key, doPivots(facetCounts, field, null, fnames, vnames, this.docs));
|
||||
}
|
||||
return pivotResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive function to compute all the pivot counts for the values under teh specified field
|
||||
*/
|
||||
protected List<NamedList<Object>> doPivots(NamedList<Integer> superFacets,
|
||||
String field, String subField, Deque<String> fnames,Deque<String> vnames,DocSet docs) throws IOException {
|
||||
|
||||
SolrIndexSearcher searcher = rb.req.getSearcher();
|
||||
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below
|
||||
SchemaField sfield = searcher.getSchema().getField(field);
|
||||
FieldType ftype = sfield.getType();
|
||||
|
||||
String nextField = fnames.poll();
|
||||
|
||||
// re-useable BytesRefBuilder for conversion of term values to Objects
|
||||
BytesRefBuilder termval = new BytesRefBuilder();
|
||||
|
||||
List<NamedList<Object>> values = new ArrayList<>( superFacets.size() );
|
||||
for (Map.Entry<String, Integer> kv : superFacets) {
|
||||
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
|
||||
if (kv.getValue() >= getMinCountForField(field)) {
|
||||
final String fieldValue = kv.getKey();
|
||||
|
||||
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<>();
|
||||
pivot.add( "field", field );
|
||||
if (null == fieldValue) {
|
||||
pivot.add( "value", null );
|
||||
} else {
|
||||
ftype.readableToIndexed(fieldValue, termval);
|
||||
pivot.add( "value", ftype.toObject(sfield, termval.get()) );
|
||||
}
|
||||
pivot.add( "count", kv.getValue() );
|
||||
|
||||
DocSet subset = getSubset(docs, sfield, fieldValue);
|
||||
|
||||
if( subField != null ) {
|
||||
NamedList<Integer> facetCounts;
|
||||
if(!vnames.isEmpty()){
|
||||
String val = vnames.pop();
|
||||
facetCounts = new NamedList<Integer>();
|
||||
facetCounts.add(val, getSubsetSize(subset,
|
||||
searcher.getSchema().getField(subField),
|
||||
val));
|
||||
} else {
|
||||
facetCounts = this.getTermCountsForPivots(subField, subset);
|
||||
}
|
||||
|
||||
if (facetCounts.size() >= 1) {
|
||||
pivot.add( "pivot", doPivots( facetCounts, subField, nextField, fnames, vnames, subset) );
|
||||
}
|
||||
}
|
||||
values.add( pivot );
|
||||
}
|
||||
|
||||
}
|
||||
// put the field back on the list
|
||||
fnames.push( nextField );
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a base docset, computes the size of the subset of documents corrisponding to the specified pivotValue
|
||||
*
|
||||
* @param base the set of documents to evalute relative to
|
||||
* @param field the field type used by the pivotValue
|
||||
* @param pivotValue String representation of the value, may be null (ie: "missing")
|
||||
*/
|
||||
private int getSubsetSize(DocSet base, SchemaField field, String pivotValue) throws IOException {
|
||||
FieldType ft = field.getType();
|
||||
if ( null == pivotValue ) {
|
||||
Query query = ft.getRangeQuery(null, field, null, null, false, false);
|
||||
DocSet hasVal = searcher.getDocSet(query);
|
||||
return base.andNotSize(hasVal);
|
||||
} else {
|
||||
Query query = ft.getFieldQuery(null, field, pivotValue);
|
||||
return searcher.numDocs(query, base);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a base docset, computes the subset of documents corrisponding to the specified pivotValue
|
||||
*
|
||||
* @param base the set of documents to evalute relative to
|
||||
* @param field the field type used by the pivotValue
|
||||
* @param pivotValue String representation of the value, may be null (ie: "missing")
|
||||
*/
|
||||
private DocSet getSubset(DocSet base, SchemaField field, String pivotValue) throws IOException {
|
||||
FieldType ft = field.getType();
|
||||
if ( null == pivotValue ) {
|
||||
Query query = ft.getRangeQuery(null, field, null, null, false, false);
|
||||
DocSet hasVal = searcher.getDocSet(query);
|
||||
return base.andNot(hasVal);
|
||||
} else {
|
||||
Query query = ft.getFieldQuery(null, field, pivotValue);
|
||||
return searcher.getDocSet(query, base);
|
||||
}
|
||||
}
|
||||
|
||||
private int getMinCountForField(String fieldname){
|
||||
return params.getFieldInt(fieldname, FacetParams.FACET_PIVOT_MINCOUNT, 1);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,206 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.schema.TrieDateField;
|
||||
import org.apache.solr.util.PivotListEntry;
|
||||
|
||||
/**
|
||||
* Models a single (value, count) pair that will exist in the collection of values for a
|
||||
* {@link PivotFacetField} parent. This <code>PivotFacetValue</code> may itself have a
|
||||
* nested {@link PivotFacetField} child
|
||||
*
|
||||
* @see PivotFacetField
|
||||
* @see PivotFacetFieldValueCollection
|
||||
*/
|
||||
@SuppressWarnings("rawtypes")
|
||||
public class PivotFacetValue {
|
||||
|
||||
private final BitSet sourceShards = new BitSet();
|
||||
private final PivotFacetField parentPivot;
|
||||
private final Comparable value;
|
||||
// child can't be final, circular ref on construction
|
||||
private PivotFacetField childPivot = null;
|
||||
private int count; // mutable
|
||||
|
||||
private PivotFacetValue(PivotFacetField parent, Comparable val) {
|
||||
this.parentPivot = parent;
|
||||
this.value = val;
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of the asssocated field modeled by this <code>PivotFacetValue</code>.
|
||||
* May be null if this <code>PivotFacetValue</code> models the count for docs
|
||||
* "missing" the field value.
|
||||
*
|
||||
* @see FacetParams#FACET_MISSING
|
||||
*/
|
||||
public Comparable getValue() { return value; }
|
||||
|
||||
/** The count corrisponding to the value modeled by this <code>PivotFacetValue</code> */
|
||||
public int getCount() { return count; }
|
||||
|
||||
/**
|
||||
* The {@link PivotFacetField} corrisponding to the nested child pivot for this
|
||||
* <code>PivotFacetValue</code>. May be null if this object is the leaf of a pivot.
|
||||
*/
|
||||
public PivotFacetField getChildPivot() { return childPivot; }
|
||||
|
||||
|
||||
/**
|
||||
* A recursive method that walks up the tree of pivot fields/values to build
|
||||
* a list of the String representations of the values that lead down to this
|
||||
* PivotFacetValue.
|
||||
*
|
||||
* @return a mutable List of the pivot value Strings leading down to and including
|
||||
* this pivot value, will never be null but may contain nulls
|
||||
* @see PivotFacetField#getValuePath
|
||||
*/
|
||||
public List<String> getValuePath() {
|
||||
List<String> out = parentPivot.getValuePath();
|
||||
|
||||
// Note: this code doesn't play nice with custom FieldTypes -- see SOLR-6330
|
||||
|
||||
if (null == value) {
|
||||
out.add(null);
|
||||
} else if (value instanceof Date) {
|
||||
out.add(TrieDateField.formatExternal((Date) value));
|
||||
} else {
|
||||
out.add(value.toString());
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive method to construct a new <code>PivotFacetValue</code> object from
|
||||
* the contents of the {@link NamedList} provided by the specified shard, relative
|
||||
* to the specified field.
|
||||
*
|
||||
* If the <code>NamedList</code> contains data for a child {@link PivotFacetField}
|
||||
* that will be recursively built as well.
|
||||
*
|
||||
* @see PivotFacetField#createFromListOfNamedLists
|
||||
* @param shardNumber the id of the shard that provided this data
|
||||
* @param rb The response builder of the current request
|
||||
* @param parentField the parent field in the current pivot associated with this value
|
||||
* @param pivotData the data from the specified shard for this pivot value
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static PivotFacetValue createFromNamedList(int shardNumber, ResponseBuilder rb, PivotFacetField parentField, NamedList<Object> pivotData) {
|
||||
|
||||
Comparable pivotVal = null;
|
||||
int pivotCount = 0;
|
||||
List<NamedList<Object>> childPivotData = null;
|
||||
|
||||
for (int i = 0; i < pivotData.size(); i++) {
|
||||
String key = pivotData.getName(i);
|
||||
Object value = pivotData.getVal(i);
|
||||
PivotListEntry entry = PivotListEntry.get(key);
|
||||
|
||||
switch (entry) {
|
||||
|
||||
case VALUE:
|
||||
pivotVal = (Comparable)value;
|
||||
break;
|
||||
case FIELD:
|
||||
assert parentField.field.equals(value)
|
||||
: "Parent Field mismatch: " + parentField.field + "!=" + value;
|
||||
break;
|
||||
case COUNT:
|
||||
pivotCount = (Integer)value;
|
||||
break;
|
||||
case PIVOT:
|
||||
childPivotData = (List<NamedList<Object>>)value;
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException("PivotListEntry contains unaccounted for item: " + entry);
|
||||
}
|
||||
}
|
||||
|
||||
PivotFacetValue newPivotFacet = new PivotFacetValue(parentField, pivotVal);
|
||||
newPivotFacet.count = pivotCount;
|
||||
newPivotFacet.sourceShards.set(shardNumber);
|
||||
|
||||
newPivotFacet.childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, newPivotFacet, childPivotData);
|
||||
|
||||
return newPivotFacet;
|
||||
}
|
||||
|
||||
/**
|
||||
* A <b>NON-Recursive</b> method indicating if the specified shard has already
|
||||
* contributed to the count for this value.
|
||||
*/
|
||||
public boolean shardHasContributed(int shardNum) {
|
||||
return sourceShards.get(shardNum);
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive method for generating a NamedList from this value suitable for
|
||||
* including in a pivot facet response to the original distributed request.
|
||||
*
|
||||
* @see PivotFacetField#convertToListOfNamedLists
|
||||
*/
|
||||
public NamedList<Object> convertToNamedList() {
|
||||
NamedList<Object> newList = new SimpleOrderedMap<>();
|
||||
newList.add(PivotListEntry.FIELD.getName(), parentPivot.field);
|
||||
newList.add(PivotListEntry.VALUE.getName(), value);
|
||||
newList.add(PivotListEntry.COUNT.getName(), count);
|
||||
if (childPivot != null && childPivot.convertToListOfNamedLists() != null) {
|
||||
newList.add(PivotListEntry.PIVOT.getName(), childPivot.convertToListOfNamedLists());
|
||||
}
|
||||
return newList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges in the count contributions from the specified shard for each.
|
||||
* This method is recursive if the shard data includes sub-pivots
|
||||
*
|
||||
* @see PivotFacetField#contributeFromShard
|
||||
* @see PivotFacetField#createFromListOfNamedLists
|
||||
*/
|
||||
public void mergeContributionFromShard(int shardNumber, ResponseBuilder rb, NamedList<Object> value) {
|
||||
assert null != value : "can't merge in null data";
|
||||
|
||||
if (!shardHasContributed(shardNumber)) {
|
||||
sourceShards.set(shardNumber);
|
||||
count += PivotFacetHelper.getCount(value);
|
||||
}
|
||||
|
||||
List<NamedList<Object>> shardChildPivots = PivotFacetHelper.getPivots(value);
|
||||
// sub pivot -- we may not have seen this yet depending on refinement
|
||||
if (null == childPivot) {
|
||||
childPivot = PivotFacetField.createFromListOfNamedLists(shardNumber, rb, this, shardChildPivots);
|
||||
} else {
|
||||
childPivot.contributeFromShard(shardNumber, rb, shardChildPivots);
|
||||
}
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
return String.format(Locale.ROOT, "F:%s V:%s Co:%d Ch?:%s",
|
||||
parentPivot.field, value, count, (this.childPivot !=null));
|
||||
}
|
||||
|
||||
}
|
|
@ -39,6 +39,7 @@ public class ShardRequest {
|
|||
public final static int PURPOSE_GET_TERMS =0x400;
|
||||
public final static int PURPOSE_GET_TOP_GROUPS =0x800;
|
||||
public final static int PURPOSE_GET_MLT_RESULTS =0x1000;
|
||||
public final static int PURPOSE_REFINE_PIVOT_FACETS =0x2000;
|
||||
|
||||
public int purpose; // the purpose of this request
|
||||
|
||||
|
|
|
@ -332,15 +332,45 @@ public class SimpleFacets {
|
|||
ENUM, FC, FCS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Term counts for use in pivot faceting that resepcts the appropriate mincount
|
||||
* @see FacetParams#FACET_PIVOT_MINCOUNT
|
||||
*/
|
||||
public NamedList<Integer> getTermCountsForPivots(String field, DocSet docs) throws IOException {
|
||||
Integer mincount = params.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
|
||||
return getTermCounts(field, mincount, docs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Term counts for use in field faceting that resepects the appropriate mincount
|
||||
*
|
||||
* @see FacetParams#FACET_MINCOUNT
|
||||
*/
|
||||
public NamedList<Integer> getTermCounts(String field) throws IOException {
|
||||
return getTermCounts(field, this.docs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Term counts for use in field faceting that resepects the appropriate mincount
|
||||
*
|
||||
* @see FacetParams#FACET_MINCOUNT
|
||||
*/
|
||||
public NamedList<Integer> getTermCounts(String field, DocSet base) throws IOException {
|
||||
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
|
||||
return getTermCounts(field, mincount, base);
|
||||
}
|
||||
|
||||
/**
|
||||
* Term counts for use in field faceting that resepcts the specified mincount -
|
||||
* if mincount is null, the "zeros" param is consulted for the appropriate backcompat
|
||||
* default
|
||||
*
|
||||
* @see FacetParams#FACET_ZEROS
|
||||
*/
|
||||
private NamedList<Integer> getTermCounts(String field, Integer mincount, DocSet base) throws IOException {
|
||||
int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
|
||||
int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
|
||||
if (limit == 0) return new NamedList<>();
|
||||
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
|
||||
if (mincount==null) {
|
||||
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
|
||||
// mincount = (zeros!=null && zeros) ? 0 : 1;
|
||||
|
@ -554,7 +584,8 @@ public class SimpleFacets {
|
|||
try {
|
||||
NamedList<Object> result = new SimpleOrderedMap<>();
|
||||
if(termList != null) {
|
||||
result.add(workerKey, getListedTermCounts(workerFacetValue, termList, workerBase));
|
||||
List<String> terms = StrUtils.splitSmart(termList, ",", true);
|
||||
result.add(workerKey, getListedTermCounts(workerFacetValue, workerBase, terms));
|
||||
} else {
|
||||
result.add(workerKey, getTermCounts(workerFacetValue, workerBase));
|
||||
}
|
||||
|
@ -597,13 +628,25 @@ public class SimpleFacets {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes the term->count counts for the specified termList relative to the
|
||||
* @param field the name of the field to compute term counts against
|
||||
* @param termList a comma seperated (and backslash escaped) list of term values (in the specified field) to compute the counts for
|
||||
* @see StrUtils#splitSmart
|
||||
*/
|
||||
private NamedList<Integer> getListedTermCounts(String field, String termList) throws IOException {
|
||||
return getListedTermCounts(field, termList, this.docs);
|
||||
List<String> terms = StrUtils.splitSmart(termList, ",", true);
|
||||
return getListedTermCounts(field, this.docs, terms);
|
||||
}
|
||||
|
||||
private NamedList getListedTermCounts(String field, String termList, DocSet base) throws IOException {
|
||||
/**
|
||||
* Computes the term->count counts for the specified term values relative to the
|
||||
* @param field the name of the field to compute term counts against
|
||||
* @param base the docset to compute term counts relative to
|
||||
* @param terms a list of term values (in the specified field) to compute the counts for
|
||||
*/
|
||||
protected NamedList<Integer> getListedTermCounts(String field, DocSet base, List<String> terms) throws IOException {
|
||||
FieldType ft = searcher.getSchema().getFieldType(field);
|
||||
List<String> terms = StrUtils.splitSmart(termList, ",", true);
|
||||
NamedList<Integer> res = new NamedList<>();
|
||||
for (String term : terms) {
|
||||
String internal = ft.toInternal(term);
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
package org.apache.solr.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Enum for modeling the elements of a (nested) pivot entry as expressed in a NamedList
|
||||
*/
|
||||
public enum PivotListEntry {
|
||||
|
||||
FIELD(0),
|
||||
VALUE(1),
|
||||
COUNT(2),
|
||||
PIVOT(3);
|
||||
|
||||
// we could just use the ordinal(), but safer to be very explicit
|
||||
private final int index;
|
||||
|
||||
private PivotListEntry(int index) {
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Case-insensitive lookup of PivotListEntry by name
|
||||
* @see #getName
|
||||
*/
|
||||
public static PivotListEntry get(String name) {
|
||||
return PivotListEntry.valueOf(name.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
/**
|
||||
* Name of this entry when used in response
|
||||
* @see #get
|
||||
*/
|
||||
public String getName() {
|
||||
return name().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Indec of this entry when used in response
|
||||
*/
|
||||
public int getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
}
|
|
@ -31,7 +31,6 @@ import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;
|
|||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.TrieDateField;
|
||||
import org.apache.solr.search.CursorMark; //jdoc
|
||||
|
||||
import org.noggit.ObjectBuilder;
|
||||
|
@ -39,12 +38,10 @@ import org.noggit.ObjectBuilder;
|
|||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
|
@ -619,11 +616,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
|
|||
return 0 != TestUtil.nextInt(random(), 0, 30);
|
||||
}
|
||||
|
||||
/** returns likely most (1/10) of the time, otherwise unlikely */
|
||||
private static Object skewed(Object likely, Object unlikely) {
|
||||
return (0 == TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely;
|
||||
}
|
||||
|
||||
/**
|
||||
* An immutable list of the fields in the schema that can be used for sorting,
|
||||
* deterministically random order.
|
||||
|
@ -898,7 +890,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
|
|||
1.0D / random().nextInt(37)));
|
||||
}
|
||||
if (useField()) {
|
||||
doc.addField("str", skewed(randomUsableUnicodeString(),
|
||||
doc.addField("str", skewed(randomXmlUsableUnicodeString(),
|
||||
TestUtil.randomSimpleString(random(), 1, 1)));
|
||||
}
|
||||
if (useField()) {
|
||||
|
@ -908,8 +900,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
|
|||
doc.addField("bin", ByteBuffer.wrap(randBytes));
|
||||
}
|
||||
if (useField()) {
|
||||
doc.addField("date", skewed(randomDate(),
|
||||
dateWithRandomSecondOn2010_10_31_at_10_31()));
|
||||
doc.addField("date", skewed(randomDate(), randomSkewedDate()));
|
||||
}
|
||||
if (useField()) {
|
||||
doc.addField("uuid", UUID.randomUUID().toString());
|
||||
|
@ -949,28 +940,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* We want "realistic" unicode strings beyond simple ascii, but because our
|
||||
* updates use XML we need to ensure we don't get "special" code block.
|
||||
*/
|
||||
private static String randomUsableUnicodeString() {
|
||||
String result = TestUtil.randomRealisticUnicodeString(random());
|
||||
if (result.matches(".*\\p{InSpecials}.*")) {
|
||||
// oh well
|
||||
result = TestUtil.randomSimpleString(random());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String randomDate() {
|
||||
return TrieDateField.formatExternal(new Date(random().nextLong()));
|
||||
}
|
||||
|
||||
private static String dateWithRandomSecondOn2010_10_31_at_10_31() {
|
||||
return String.format(Locale.ROOT, "2010-10-31T10:31:%02d.000Z",
|
||||
TestUtil.nextInt(random(), 0, 59));
|
||||
}
|
||||
|
||||
private static final String[] currencies = { "USD", "EUR", "NOK" };
|
||||
|
||||
public static String randomCurrency() {
|
||||
|
|
|
@ -61,6 +61,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
QueryResponse rsp = null;
|
||||
int backupStress = stress; // make a copy so we can restore
|
||||
|
||||
|
||||
|
@ -175,6 +176,13 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
// a facet query to test out chars out of the ascii range
|
||||
query("q","*:*", "rows",0, "facet","true", "facet.query","{!term f=foo_s}international\u00ff\u01ff\u2222\u3333");
|
||||
|
||||
// simple field facet on date fields
|
||||
rsp = query("q","*:*", "rows",0, "facet","true", "facet.field", tdate_a);
|
||||
assertEquals(1, rsp.getFacetFields().size());
|
||||
rsp = query("q","*:*", "rows",0, "facet","true",
|
||||
"facet.field", tdate_b, "facet.field", tdate_a);
|
||||
assertEquals(2, rsp.getFacetFields().size());
|
||||
|
||||
// simple date facet on one field
|
||||
query("q","*:*", "rows",100, "facet","true",
|
||||
"facet.date",tdate_a,
|
||||
|
@ -337,7 +345,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
q.set("q", "*:*");
|
||||
q.set(ShardParams.SHARDS_INFO, true);
|
||||
setDistributedParams(q);
|
||||
QueryResponse rsp = queryServer(q);
|
||||
rsp = queryServer(q);
|
||||
NamedList<?> sinfo = (NamedList<?>) rsp.getResponse().get(ShardParams.SHARDS_INFO);
|
||||
String shards = getShardsString();
|
||||
int cnt = StringUtils.countMatches(shards, ",")+1;
|
||||
|
|
|
@ -0,0 +1,530 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.PivotField;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.schema.TrieDateField;
|
||||
|
||||
import static org.apache.solr.common.params.FacetParams.*;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Randomized testing of Pivot Faceting using SolrCloud.
|
||||
* </p>
|
||||
* <p>
|
||||
* After indexing a bunch of random docs, picks some random fields to pivot facet on,
|
||||
* and then confirms that the resulting counts match the results of filtering on those
|
||||
* values. This gives us strong assertions on the correctness of the total counts for
|
||||
* each pivot value, but no assertions that the correct "top" counts were chosen.
|
||||
* </p>
|
||||
* <p>
|
||||
* NOTE: this test ignores the control collection and only deals with the
|
||||
* CloudSolrServer - this is because the randomized field values make it very easy for
|
||||
* the term stats to miss values even with the overrequest.
|
||||
* (because so many values will tie for "1"). What we care about here is
|
||||
* that the counts we get back are correct and match what we get when filtering on those
|
||||
* constraints.
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
@SuppressSSL // Too Slow
|
||||
public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
|
||||
|
||||
public static Logger log = LoggerFactory.getLogger(TestCloudPivotFacet.class);
|
||||
|
||||
// param used by test purely for tracing & validation
|
||||
private static String TRACE_MIN = "_test_min";
|
||||
// param used by test purely for tracing & validation
|
||||
private static String TRACE_MISS = "_test_miss";
|
||||
// param used by test purely for tracing & validation
|
||||
private static String TRACE_SORT = "_test_sort";
|
||||
|
||||
/**
|
||||
* Controls the odds of any given doc having a value in any given field -- as this gets lower,
|
||||
* the counts for "facet.missing" pivots should increase.
|
||||
* @see #useField()
|
||||
*/
|
||||
private static int useFieldRandomizedFactor = -1;
|
||||
|
||||
@BeforeClass
|
||||
public static void initUseFieldRandomizedFactor() {
|
||||
useFieldRandomizedFactor = TestUtil.nextInt(random(), 2, 30);
|
||||
log.info("init'ing useFieldRandomizedFactor = {}", useFieldRandomizedFactor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
handle.clear();
|
||||
handle.put("QTime", SKIPVAL);
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
|
||||
final Set<String> fieldNameSet = new HashSet<>();
|
||||
|
||||
// build up a randomized index
|
||||
final int numDocs = atLeast(500);
|
||||
log.info("numDocs: {}", numDocs);
|
||||
|
||||
for (int i = 1; i <= numDocs; i++) {
|
||||
SolrInputDocument doc = buildRandomDocument(i);
|
||||
|
||||
// not efficient, but it garuntees that even if people change buildRandomDocument
|
||||
// we'll always have the full list of fields w/o needing to keep code in sync
|
||||
fieldNameSet.addAll(doc.getFieldNames());
|
||||
|
||||
cloudClient.add(doc);
|
||||
}
|
||||
cloudClient.commit();
|
||||
|
||||
fieldNameSet.remove("id");
|
||||
assertTrue("WTF, bogus field exists?", fieldNameSet.add("bogus_not_in_any_doc_s"));
|
||||
|
||||
final String[] fieldNames = fieldNameSet.toArray(new String[fieldNameSet.size()]);
|
||||
Arrays.sort(fieldNames); // need determinism for buildRandomPivot calls
|
||||
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
|
||||
String q = "*:*";
|
||||
if (random().nextBoolean()) {
|
||||
q = "id:[* TO " + TestUtil.nextInt(random(),300,numDocs) + "]";
|
||||
}
|
||||
ModifiableSolrParams baseP = params("rows", "0", "q", q);
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
baseP.add("fq", "id:[* TO " + TestUtil.nextInt(random(),200,numDocs) + "]");
|
||||
}
|
||||
|
||||
ModifiableSolrParams pivotP = params(FACET,"true",
|
||||
FACET_PIVOT, buildRandomPivot(fieldNames));
|
||||
if (random().nextBoolean()) {
|
||||
pivotP.add(FACET_PIVOT, buildRandomPivot(fieldNames));
|
||||
}
|
||||
|
||||
// keep limit low - lots of unique values, and lots of depth in pivots
|
||||
pivotP.add(FACET_LIMIT, ""+TestUtil.nextInt(random(),1,17));
|
||||
|
||||
// sometimes use an offset
|
||||
if (random().nextBoolean()) {
|
||||
pivotP.add(FACET_OFFSET, ""+TestUtil.nextInt(random(),0,7));
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
String min = ""+TestUtil.nextInt(random(),0,numDocs+10);
|
||||
pivotP.add(FACET_PIVOT_MINCOUNT, min);
|
||||
// trace param for validation
|
||||
baseP.add(TRACE_MIN, min);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
String missing = ""+random().nextBoolean();
|
||||
pivotP.add(FACET_MISSING, missing);
|
||||
// trace param for validation
|
||||
baseP.add(TRACE_MISS, missing);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
String sort = random().nextBoolean() ? "index" : "count";
|
||||
pivotP.add(FACET_SORT, sort);
|
||||
// trace param for validation
|
||||
baseP.add(TRACE_SORT, sort);
|
||||
}
|
||||
|
||||
// overrequest
|
||||
//
|
||||
// NOTE: since this test focuses on accuracy of refinement, and doesn't do
|
||||
// control collection comparisons, there isn't a lot of need for excessive
|
||||
// overrequesting -- we focus here on trying to exercise the various edge cases
|
||||
// involved as different values are used with overrequest
|
||||
if (0 == TestUtil.nextInt(random(),0,4)) {
|
||||
// we want a decent chance of no overrequest at all
|
||||
pivotP.add(FACET_OVERREQUEST_COUNT, "0");
|
||||
pivotP.add(FACET_OVERREQUEST_RATIO, "0");
|
||||
} else {
|
||||
if (random().nextBoolean()) {
|
||||
pivotP.add(FACET_OVERREQUEST_COUNT, ""+TestUtil.nextInt(random(),0,5));
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
// sometimes give a ratio less then 1, code should be smart enough to deal
|
||||
float ratio = 0.5F + random().nextFloat();
|
||||
// sometimes go negative
|
||||
if (random().nextBoolean()) {
|
||||
ratio *= -1;
|
||||
}
|
||||
pivotP.add(FACET_OVERREQUEST_RATIO, ""+ratio);
|
||||
}
|
||||
}
|
||||
|
||||
assertPivotCountsAreCorrect(baseP, pivotP);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given some query params, executes the request against the cloudClient and
|
||||
* then walks the pivot facet values in the response, treating each one as a
|
||||
* filter query to assert the pivot counts are correct.
|
||||
*/
|
||||
private void assertPivotCountsAreCorrect(SolrParams baseParams,
|
||||
SolrParams pivotParams)
|
||||
throws SolrServerException {
|
||||
|
||||
SolrParams initParams = SolrParams.wrapAppended(pivotParams, baseParams);
|
||||
|
||||
log.info("Doing full run: {}", initParams);
|
||||
countNumFoundChecks = 0;
|
||||
|
||||
NamedList<List<PivotField>> pivots = null;
|
||||
try {
|
||||
QueryResponse initResponse = cloudClient.query(initParams);
|
||||
pivots = initResponse.getFacetPivot();
|
||||
assertNotNull(initParams + " has null pivots?", pivots);
|
||||
assertEquals(initParams + " num pivots",
|
||||
initParams.getParams("facet.pivot").length, pivots.size());
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("init query failed: " + initParams + ": " +
|
||||
e.getMessage(), e);
|
||||
}
|
||||
try {
|
||||
for (Map.Entry<String,List<PivotField>> pivot : pivots) {
|
||||
final String pivotKey = pivot.getKey();
|
||||
// :HACK: for counting the max possible pivot depth
|
||||
final int maxDepth = 1 + pivotKey.length() - pivotKey.replace(",","").length();
|
||||
|
||||
assertTraceOk(pivotKey, baseParams, pivot.getValue());
|
||||
|
||||
// NOTE: we can't make any assumptions/assertions about the number of
|
||||
// constraints here because of the random data - which means if pivotting is
|
||||
// completely broken and there are no constrains this loop could be a No-Op
|
||||
// but in that case we just have to trust that DistributedFacetPivotTest
|
||||
// will catch it.
|
||||
for (PivotField constraint : pivot.getValue()) {
|
||||
int depth = assertPivotCountsAreCorrect(pivotKey, baseParams, constraint);
|
||||
|
||||
// we can't assert that the depth reached is the same as the depth requested
|
||||
// because the fq and/or mincount may have pruned the tree too much
|
||||
assertTrue("went too deep: "+depth+": " + pivotKey + " ==> " + pivot,
|
||||
depth <= maxDepth);
|
||||
|
||||
}
|
||||
}
|
||||
} catch (AssertionError e) {
|
||||
throw new AssertionError(initParams + " ==> " + e.getMessage(), e);
|
||||
} finally {
|
||||
log.info("Ending full run (countNumFoundChecks={}): {}",
|
||||
countNumFoundChecks, initParams);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive Helper method for asserting that pivot constraint counds match
|
||||
* results when filtering on those constraints. Returns the recursive depth reached
|
||||
* (for sanity checking)
|
||||
*/
|
||||
private int assertPivotCountsAreCorrect(String pivotName,
|
||||
SolrParams baseParams,
|
||||
PivotField constraint)
|
||||
throws SolrServerException {
|
||||
|
||||
SolrParams p = SolrParams.wrapAppended(baseParams,
|
||||
params("fq", buildFilter(constraint)));
|
||||
List<PivotField> subPivots = null;
|
||||
try {
|
||||
assertNumFound(pivotName, constraint.getCount(), p);
|
||||
subPivots = constraint.getPivot();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(pivotName + ": count query failed: " + p + ": " +
|
||||
e.getMessage(), e);
|
||||
}
|
||||
int depth = 0;
|
||||
if (null != subPivots) {
|
||||
assertTraceOk(pivotName, baseParams, subPivots);
|
||||
|
||||
for (PivotField subPivot : subPivots) {
|
||||
depth = assertPivotCountsAreCorrect(pivotName, p, subPivot);
|
||||
}
|
||||
}
|
||||
return depth + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that the PivotFields we're lookin at doesn't violate any of the expected
|
||||
* behaviors based on the <code>TRACE_*</code> params found in the base params
|
||||
*/
|
||||
private void assertTraceOk(String pivotName, SolrParams baseParams, List<PivotField> constraints) {
|
||||
if (null == constraints || 0 == constraints.size()) {
|
||||
return;
|
||||
}
|
||||
final int maxIdx = constraints.size() - 1;
|
||||
|
||||
final int min = baseParams.getInt(TRACE_MIN, -1);
|
||||
final boolean expectMissing = baseParams.getBool(TRACE_MISS, false);
|
||||
final boolean checkCount = "count".equals(baseParams.get(TRACE_SORT, "count"));
|
||||
|
||||
int prevCount = Integer.MAX_VALUE;
|
||||
|
||||
for (int i = 0; i <= maxIdx; i++) {
|
||||
final PivotField constraint = constraints.get(i);
|
||||
final int count = constraint.getCount();
|
||||
|
||||
if (0 < min) {
|
||||
assertTrue(pivotName + ": val #"+i +" of " + maxIdx +
|
||||
": count("+count+") < facet.mincount("+min+"): " + constraint,
|
||||
min <= count);
|
||||
}
|
||||
// missing value must always come last, but only if facet.missing was used
|
||||
// and may not exist at all (mincount, none missing for this sub-facet, etc...)
|
||||
if ((i < maxIdx) || (!expectMissing)) {
|
||||
assertNotNull(pivotName + ": val #"+i +" of " + maxIdx +
|
||||
" has null value: " + constraint,
|
||||
constraint.getValue());
|
||||
}
|
||||
// if we are expecting count based sort, then the count of each constraint
|
||||
// must be lt-or-eq the count that came before -- or it must be the last value and
|
||||
// be "missing"
|
||||
if (checkCount) {
|
||||
assertTrue(pivotName + ": val #"+i +" of" + maxIdx +
|
||||
": count("+count+") > prevCount("+prevCount+"): " + constraint,
|
||||
((count <= prevCount)
|
||||
|| (expectMissing && i == maxIdx && null == constraint.getValue())));
|
||||
prevCount = count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a PivotField constraint, generate a query for the field+value
|
||||
* for use in an <code>fq</code> to verify the constraint count
|
||||
*/
|
||||
private static String buildFilter(PivotField constraint) {
|
||||
Object value = constraint.getValue();
|
||||
if (null == value) {
|
||||
// facet.missing, exclude any indexed term
|
||||
return "-" + constraint.getField() + ":[* TO *]";
|
||||
}
|
||||
// otherwise, build up a term filter...
|
||||
String prefix = "{!term f=" + constraint.getField() + "}";
|
||||
if (value instanceof Date) {
|
||||
return prefix + TrieDateField.formatExternal((Date)value);
|
||||
} else {
|
||||
return prefix + value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a random facet.pivot param string using some of the specified fieldNames
|
||||
*/
|
||||
private static String buildRandomPivot(String[] fieldNames) {
|
||||
final int depth = TestUtil.nextInt(random(), 1, 3);
|
||||
String [] fields = new String[depth];
|
||||
for (int i = 0; i < depth; i++) {
|
||||
// yes this means we might use the same field twice
|
||||
// makes it a robust test (especially for multi-valued fields)
|
||||
fields[i] = fieldNames[TestUtil.nextInt(random(),0,fieldNames.length-1)];
|
||||
}
|
||||
return StringUtils.join(fields, ",");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a document with randomized field values, some of which be missing values,
|
||||
* some of which will be multi-valued (per the schema) and some of which will be
|
||||
* skewed so that small subsets of the ranges will be more common (resulting in an
|
||||
* increased likelihood of duplicate values)
|
||||
*
|
||||
* @see #buildRandomPivot
|
||||
*/
|
||||
private static SolrInputDocument buildRandomDocument(int id) {
|
||||
SolrInputDocument doc = sdoc("id", id);
|
||||
// most fields are in most docs
|
||||
// if field is in a doc, then "skewed" chance val is from a dense range
|
||||
// (hopefully with lots of duplication)
|
||||
for (String prefix : new String[] { "pivot_i", "pivot_ti" }) {
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", skewed(TestUtil.nextInt(random(), 20, 50),
|
||||
random().nextInt()));
|
||||
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, skewed(TestUtil.nextInt(random(), 20, 50),
|
||||
random().nextInt()));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (String prefix : new String[] { "pivot_l", "pivot_tl" }) {
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", skewed(TestUtil.nextInt(random(), 5000, 5100),
|
||||
random().nextLong()));
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, skewed(TestUtil.nextInt(random(), 5000, 5100),
|
||||
random().nextLong()));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (String prefix : new String[] { "pivot_f", "pivot_tf" }) {
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", skewed(1.0F / random().nextInt(13),
|
||||
random().nextFloat() * random().nextInt()));
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, skewed(1.0F / random().nextInt(13),
|
||||
random().nextFloat() * random().nextInt()));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (String prefix : new String[] { "pivot_d", "pivot_td" }) {
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", skewed(1.0D / random().nextInt(19),
|
||||
random().nextDouble() * random().nextInt()));
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, skewed(1.0D / random().nextInt(19),
|
||||
random().nextDouble() * random().nextInt()));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (String prefix : new String[] { "pivot_dt", "pivot_tdt" }) {
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", skewed(randomSkewedDate(), randomDate()));
|
||||
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, skewed(randomSkewedDate(), randomDate()));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
String prefix = "pivot_b";
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", random().nextBoolean() ? "t" : "f");
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, random().nextBoolean() ? "t" : "f");
|
||||
}
|
||||
}
|
||||
}
|
||||
for (String prefix : new String[] { "pivot_x_s", "pivot_y_s", "pivot_z_s"}) {
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", skewed(TestUtil.randomSimpleString(random(), 1, 1),
|
||||
randomXmlUsableUnicodeString()));
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, skewed(TestUtil.randomSimpleString(random(), 1, 1),
|
||||
randomXmlUsableUnicodeString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// for the remaining fields, make every doc have a value in a dense range
|
||||
//
|
||||
|
||||
for (String prefix : new String[] { "dense_pivot_x_s", "dense_pivot_y_s" }) {
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", TestUtil.randomSimpleString(random(), 1, 1));
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, TestUtil.randomSimpleString(random(), 1, 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (String prefix : new String[] { "dense_pivot_i", "dense_pivot_ti" }) {
|
||||
if (useField()) {
|
||||
doc.addField(prefix+"1", TestUtil.nextInt(random(), 20, 50));
|
||||
}
|
||||
if (useField()) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
doc.addField(prefix, TestUtil.nextInt(random(), 20, 50));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to usually() but we want it to happen just as often regardless
|
||||
* of test multiplier and nightly status
|
||||
*
|
||||
* @see #useFieldRandomizedFactor
|
||||
*/
|
||||
private static boolean useField() {
|
||||
assert 0 < useFieldRandomizedFactor;
|
||||
return 0 != TestUtil.nextInt(random(), 0, useFieldRandomizedFactor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts the number of docs matching the SolrParams aganst the cloudClient
|
||||
*/
|
||||
private void assertNumFound(String msg, int expected, SolrParams p)
|
||||
throws SolrServerException {
|
||||
|
||||
countNumFoundChecks++;
|
||||
|
||||
SolrParams params = SolrParams.wrapDefaults(params("rows","0"), p);
|
||||
assertEquals(msg + ": " + params,
|
||||
expected, cloudClient.query(params).getResults().getNumFound());
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #assertNumFound
|
||||
* @see #assertPivotCountsAreCorrect(SolrParams,SolrParams)
|
||||
*/
|
||||
private int countNumFoundChecks = 0;
|
||||
}
|
|
@ -0,0 +1,762 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.response.PivotField;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
||||
import junit.framework.AssertionFailedError;
|
||||
|
||||
public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCase {
|
||||
|
||||
public static final String SPECIAL = "";
|
||||
|
||||
public DistributedFacetPivotLargeTest() {
|
||||
this.fixShardCount = true;
|
||||
this.shardCount = 4; // we leave one empty as an edge case
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
this.stress = 0 ;
|
||||
handle.clear();
|
||||
handle.put("QTime", SKIPVAL);
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
handle.put("maxScore", SKIPVAL);
|
||||
|
||||
setupDistributedPivotFacetDocuments();
|
||||
|
||||
QueryResponse rsp = null;
|
||||
|
||||
List<PivotField> pivots = null;
|
||||
PivotField firstInt = null;
|
||||
PivotField firstBool = null;
|
||||
PivotField firstDate = null;
|
||||
PivotField firstPlace = null;
|
||||
PivotField firstCompany = null;
|
||||
|
||||
// basic check w/ limit & default sort (count)
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
FacetParams.FACET_LIMIT, "12");
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(12, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
|
||||
// Microsoft will come back wrong if refinement was not done correctly
|
||||
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
|
||||
|
||||
// trivial mincount=0 check
|
||||
rsp = query( "q", "does_not_exist_s:foo",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","company_t",
|
||||
FacetParams.FACET_LIMIT, "10",
|
||||
FacetParams.FACET_PIVOT_MINCOUNT,"0");
|
||||
pivots = rsp.getFacetPivot().get("company_t");
|
||||
assertEquals(10, pivots.size());
|
||||
for (PivotField p : pivots) {
|
||||
assertEquals(0, p.getCount());
|
||||
}
|
||||
|
||||
// sanity check limit=0 w/ mincount=0 & missing=true
|
||||
//
|
||||
// SOLR-6328: doesn't work for single node, so can't work for distrib either (yet)
|
||||
//
|
||||
// PivotFacetField's init of needRefinementAtThisLevel as needing potential change
|
||||
//
|
||||
// rsp = query( "q", "*:*",
|
||||
// "rows", "0",
|
||||
// "facet","true",
|
||||
// "f.company_t.facet.limit", "10",
|
||||
// "facet.pivot","special_s,bogus_s,company_t",
|
||||
// "facet.missing", "true",
|
||||
// FacetParams.FACET_LIMIT, "0",
|
||||
// FacetParams.FACET_PIVOT_MINCOUNT,"0");
|
||||
// pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t");
|
||||
// assertEquals(1, pivots.size()); // only the missing
|
||||
// assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s
|
||||
// assertEquals(pivots.toString(), 1, pivots.get(0).getPivot());
|
||||
// assertPivot("bogus_s", null, docNumber, pivots.get(0).getPivot().get(0));
|
||||
// // TODO: some asserts on company results
|
||||
|
||||
// basic check w/ default sort, limit, & mincount==0
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
FacetParams.FACET_LIMIT, "50",
|
||||
FacetParams.FACET_PIVOT_MINCOUNT,"0");
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(50, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
|
||||
// Microsoft will come back wrong if refinement was not done correctly
|
||||
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
|
||||
|
||||
// sort=index + offset + limit w/ some variables
|
||||
for (SolrParams variableParams :
|
||||
new SolrParams[] { // bother variations should kwrk just as well
|
||||
// defauts
|
||||
params(),
|
||||
// force refinement
|
||||
params(FacetParams.FACET_OVERREQUEST_RATIO, "1",
|
||||
FacetParams.FACET_OVERREQUEST_COUNT, "0") }) {
|
||||
|
||||
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.sort","index",
|
||||
"f.place_s.facet.limit", "20",
|
||||
"f.place_s.facet.offset", "40",
|
||||
"facet.pivot", "place_s,company_t"),
|
||||
variableParams );
|
||||
|
||||
try {
|
||||
rsp = query( p );
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(20, pivots.size()); // limit
|
||||
for (int i = 0; i < 10; i++) {
|
||||
PivotField place = pivots.get(i);
|
||||
assertTrue(place.toString(), place.getValue().toString().endsWith("placeholder"));
|
||||
assertEquals(3, place.getPivot().size());
|
||||
assertPivot("company_t", "bbc", 6, place.getPivot().get(0));
|
||||
assertPivot("company_t", "microsoft", 6, place.getPivot().get(1));
|
||||
assertPivot("company_t", "polecat", 6, place.getPivot().get(2));
|
||||
}
|
||||
assertPivot("place_s", "cardiff", 257, pivots.get(10));
|
||||
assertPivot("place_s", "krakaw", 1, pivots.get(11));
|
||||
assertPivot("place_s", "medical staffing network holdings, inc.", 51, pivots.get(12));
|
||||
for (int i = 13; i < 20; i++) {
|
||||
PivotField place = pivots.get(i);
|
||||
assertTrue(place.toString(), place.getValue().toString().startsWith("placeholder"));
|
||||
assertEquals(1, place.getPivot().size());
|
||||
PivotField company = place.getPivot().get(0);
|
||||
assertTrue(company.toString(), company.getValue().toString().startsWith("compholder"));
|
||||
assertEquals(company.toString(), 1, company.getCount());
|
||||
}
|
||||
} catch (AssertionFailedError ae) {
|
||||
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
|
||||
}
|
||||
}
|
||||
|
||||
// sort=index + mincount=0
|
||||
//
|
||||
// SOLR-6329: facet.pivot.mincount=0 doesn't work well with distrib
|
||||
//
|
||||
// broken honda
|
||||
//
|
||||
// This is tricky, here's what i think is happening....
|
||||
// - "company:honda" only exists on twoShard, and only w/ "place:cardiff"
|
||||
// - twoShard has no other places in it's docs
|
||||
// - twoShard can't return any other places to w/ honda as a count=0 sub-value
|
||||
// - if we refined all other companies places, would twoShard return honda==0 ?
|
||||
// ... but there's no refinement since mincount==0
|
||||
// - would it even matter
|
||||
//
|
||||
// should we remove the refinement short circut?
|
||||
//
|
||||
// rsp = query( params( "q", "*:*",
|
||||
// "rows", "0",
|
||||
// "facet","true",
|
||||
// "facet.sort","index",
|
||||
// "f.place_s.facet.limit", "20",
|
||||
// "f.place_s.facet.offset", "40",
|
||||
// FacetParams.FACET_PIVOT_MINCOUNT,"0",
|
||||
// "facet.pivot", "place_s,company_t") );
|
||||
// // TODO: more asserts
|
||||
//
|
||||
//
|
||||
// really trivial demonstration of the above problem
|
||||
//
|
||||
// rsp = query( params( "q", "*:*",
|
||||
// "rows", "0",
|
||||
// "facet","true",
|
||||
// FacetParams.FACET_PIVOT_MINCOUNT,"0",
|
||||
// "facet.pivot", "top_s,sub_s") );
|
||||
|
||||
// basic check w/ limit & index sort
|
||||
for (SolrParams facetParams :
|
||||
// results should be the same regardless of wether local params are used
|
||||
new SolrParams[] {
|
||||
// Broken: SOLR-6193
|
||||
// params("facet.pivot","{!facet.limit=4 facet.sort=index}place_s,company_t"),
|
||||
// params("facet.pivot","{!facet.sort=index}place_s,company_t",
|
||||
// FacetParams.FACET_LIMIT, "4"),
|
||||
params("facet.pivot","place_s,company_t",
|
||||
FacetParams.FACET_LIMIT, "4",
|
||||
"facet.sort", "index") }) {
|
||||
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true"),
|
||||
facetParams );
|
||||
try {
|
||||
rsp = query( p );
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(4, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "0placeholder", 6, firstPlace);
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 6, firstCompany);
|
||||
} catch (AssertionFailedError ae) {
|
||||
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
|
||||
}
|
||||
}
|
||||
|
||||
// Pivot Faceting (combined wtih Field Faceting)
|
||||
for (SolrParams facetParams :
|
||||
// with and w/o an excluded fq
|
||||
// (either way, facet results should be the same)
|
||||
new SolrParams[] {
|
||||
params("facet.pivot","place_s,company_t",
|
||||
"facet.field","place_s"),
|
||||
params("facet.pivot","{!ex=ok}place_s,company_t",
|
||||
"facet.field","{!ex=ok}place_s",
|
||||
"fq","{!tag=ok}place_s:cardiff"),
|
||||
params("facet.pivot","{!ex=pl,co}place_s,company_t",
|
||||
"fq","{!tag=pl}place_s:cardiff",
|
||||
"fq","{!tag=co}company_t:bbc") }) {
|
||||
|
||||
// default order (count)
|
||||
rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
FacetParams.FACET_LIMIT, "4"),
|
||||
facetParams) );
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(4, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(4, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 101, firstCompany);
|
||||
|
||||
// Index Order
|
||||
rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
FacetParams.FACET_LIMIT, "4",
|
||||
"facet.sort", "index"),
|
||||
facetParams) );
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(4, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "0placeholder", 6, firstPlace);
|
||||
assertEquals(3, firstPlace.getPivot().size()); // num vals in data < limit==3
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 6, firstCompany);
|
||||
|
||||
// Field level limits
|
||||
rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"f.place_s.facet.limit","2",
|
||||
"f.company_t.facet.limit","4"),
|
||||
facetParams) );
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(2, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(4, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 101, firstCompany);
|
||||
}
|
||||
|
||||
// Pivot Faceting Count w/fq (not excluded)
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"fq","place_s:cardiff",
|
||||
"facet","true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
FacetParams.FACET_LIMIT, "4");
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(1, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(4, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 101, firstCompany);
|
||||
|
||||
|
||||
// Same Pivot - one with exclusion and one w/o
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"fq","{!tag=ff}pay_i:[2000 TO *]",
|
||||
"facet","true",
|
||||
"facet.pivot","{!key=filt}place_s,company_t",
|
||||
"facet.pivot","{!key=nofilt ex=ff}place_s,company_t",
|
||||
FacetParams.FACET_LIMIT, "4");
|
||||
pivots = rsp.getFacetPivot().get("filt");
|
||||
assertEquals(4, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 105, firstPlace);
|
||||
assertEquals(4, firstPlace.getPivot().size());
|
||||
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
|
||||
assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1));
|
||||
//
|
||||
pivots = rsp.getFacetPivot().get("nofilt");
|
||||
assertEquals(4, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(4, firstPlace.getPivot().size());
|
||||
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
|
||||
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
|
||||
|
||||
// Same Pivot - one in default (count) order and one in index order
|
||||
//
|
||||
// Broken: SOLR-6193 - the facet.sort localparam isn't being picked up correctly
|
||||
//
|
||||
// rsp = query( "q", "*:*",
|
||||
// "rows", "0",
|
||||
// "facet","true",
|
||||
// "fq","pay_i:[2000 TO *]",
|
||||
// "facet.pivot","{!key=sc}place_s,company_t",
|
||||
// "facet.pivot","{!key=si facet.sort=index}place_s,company_t",
|
||||
// FacetParams.FACET_LIMIT, "4");
|
||||
// pivots = rsp.getFacetPivot().get("sc");
|
||||
// assertEquals(4, pivots.size());
|
||||
// firstPlace = pivots.get(0);
|
||||
// assertPivot("place_s", "cardiff", 105, firstPlace);
|
||||
// assertEquals(4, firstPlace.getPivot().size());
|
||||
// assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
|
||||
// assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1));
|
||||
// //
|
||||
// pivots = rsp.getFacetPivot().get("si");
|
||||
// assertEquals(4, pivots.size());
|
||||
// firstPlace = pivots.get(0);
|
||||
// assertPivot("place_s", "0placeholder", 6, firstPlace);
|
||||
// assertEquals(3, firstPlace.getPivot().size()); // only 3 in the data < facet.limit
|
||||
// assertPivot("company_t", "bbc", 6, firstPlace.getPivot().get(0));
|
||||
// assertPivot("company_t", "microsoft", 6, firstPlace.getPivot().get(1));
|
||||
|
||||
|
||||
// Field level limits and small offset
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
"f.place_s.facet.limit","2",
|
||||
"f.company_t.facet.limit","4",
|
||||
"facet.offset","1");
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(2, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace);
|
||||
assertEquals(2, firstPlace.getPivot().size()); // num vals in data < limit==4
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 50, firstCompany);
|
||||
|
||||
|
||||
// Field level offsets and limit
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"fq","{!tag=pl}place_s:cardiff",
|
||||
"facet","true",
|
||||
"facet.pivot","{!ex=pl}place_s,company_t",
|
||||
"f.place_s.facet.offset","1",
|
||||
"f.company_t.facet.offset","2",
|
||||
FacetParams.FACET_LIMIT, "4");
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(4, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace);
|
||||
assertEquals(1, firstPlace.getPivot().size()); // num vals in data < limit==4
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "polecat", 50, firstCompany);
|
||||
|
||||
|
||||
// datetime
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","hiredate_dt,place_s,company_t",
|
||||
"f.hiredate_dt.facet.limit","2",
|
||||
"f.hiredate_dt.facet.offset","1",
|
||||
FacetParams.FACET_LIMIT, "4");
|
||||
pivots = rsp.getFacetPivot().get("hiredate_dt,place_s,company_t");
|
||||
assertEquals(2, pivots.size());
|
||||
firstDate = pivots.get(0); // 2012-09-01T12:30:00Z
|
||||
assertPivot("hiredate_dt", new Date(1346502600000L), 200, firstDate);
|
||||
assertEquals(1, firstDate.getPivot().size()); // num vals in data < limit==4
|
||||
firstPlace = firstDate.getPivot().get(0);
|
||||
assertPivot("place_s", "cardiff", 200, firstPlace);
|
||||
assertEquals(4, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 50, firstCompany);
|
||||
|
||||
// int
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","pay_i,place_s,company_t",
|
||||
"f.pay_i.facet.limit","2",
|
||||
"f.pay_i.facet.offset","1",
|
||||
FacetParams.FACET_LIMIT, "4");
|
||||
pivots = rsp.getFacetPivot().get("pay_i,place_s,company_t");
|
||||
assertEquals(2, pivots.size());
|
||||
firstInt = pivots.get(0);
|
||||
assertPivot("pay_i", 2000, 50, firstInt);
|
||||
assertEquals(4, firstInt.getPivot().size());
|
||||
firstPlace = firstInt.getPivot().get(0);
|
||||
assertPivot("place_s", "0placeholder", 1, firstPlace);
|
||||
assertEquals(3, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 1, firstCompany);
|
||||
|
||||
// boolean
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","real_b,place_s,company_t",
|
||||
"f.real_b.facet.missing","true",
|
||||
"f.real_b.facet.limit","2",
|
||||
FacetParams.FACET_LIMIT, "4");
|
||||
pivots = rsp.getFacetPivot().get("real_b,place_s,company_t");
|
||||
assertEquals(3, pivots.size());
|
||||
firstBool = pivots.get(0);
|
||||
assertPivot("real_b", false, 300, firstBool);
|
||||
assertEquals(4, firstBool.getPivot().size());
|
||||
firstPlace = firstBool.getPivot().get(0);
|
||||
assertPivot("place_s", "0placeholder", 6, firstPlace);
|
||||
assertEquals(3, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 6, firstCompany);
|
||||
|
||||
// bogus fields
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","doesntexist_t,neitherdoi_i",
|
||||
FacetParams.FACET_LIMIT, "4");
|
||||
pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i");
|
||||
assertEquals(0, pivots.size());
|
||||
|
||||
// bogus fields with facet.missing
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","doesntexist_t,neitherdoi_i",
|
||||
"facet.missing", "true",
|
||||
FacetParams.FACET_LIMIT, "4");
|
||||
pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i");
|
||||
assertEquals(1, pivots.size());
|
||||
assertPivot("doesntexist_t", null, docNumber, pivots.get(0));
|
||||
assertEquals(1, pivots.get(0).getPivot().size());
|
||||
assertPivot("neitherdoi_i", null, docNumber, pivots.get(0).getPivot().get(0));
|
||||
|
||||
// Negative facet limit
|
||||
for (SolrParams facetParams :
|
||||
// results should be the same regardless of wether facet.limit is global,
|
||||
// a local param, or specified as a per-field override for both fields
|
||||
new SolrParams[] {
|
||||
params(FacetParams.FACET_LIMIT, "-1",
|
||||
"facet.pivot","place_s,company_t"),
|
||||
// Broken: SOLR-6193
|
||||
// params("facet.pivot","{!facet.limit=-1}place_s,company_t"),
|
||||
params("f.place_s.facet.limit", "-1",
|
||||
"f.company_t.facet.limit", "-1",
|
||||
"facet.pivot","place_s,company_t") }) {
|
||||
|
||||
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.sort", "count" ),
|
||||
facetParams);
|
||||
try {
|
||||
rsp = query( p );
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(103, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(54, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t","bbc", 101, firstCompany);
|
||||
} catch (AssertionFailedError ae) {
|
||||
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
|
||||
}
|
||||
}
|
||||
|
||||
// Negative per-field facet limit (outer)
|
||||
for (SolrParams facetParams :
|
||||
// results should be the same regardless of wether per-field facet.limit is
|
||||
// a global or a local param
|
||||
new SolrParams[] {
|
||||
// Broken: SOLR-6193
|
||||
// params( "facet.pivot","{!f.id.facet.limit=-1}place_s,id" ),
|
||||
params( "facet.pivot","place_s,id",
|
||||
"f.id.facet.limit", "-1") }) {
|
||||
|
||||
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.sort", "count" ),
|
||||
facetParams);
|
||||
try {
|
||||
rsp = query( p );
|
||||
pivots = rsp.getFacetPivot().get("place_s,id");
|
||||
assertEquals(100, pivots.size()); // default
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(257, firstPlace.getPivot().size());
|
||||
} catch (AssertionFailedError ae) {
|
||||
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
|
||||
}
|
||||
}
|
||||
|
||||
// Negative per-field facet limit (inner)
|
||||
for (SolrParams facetParams :
|
||||
// results should be the same regardless of wether per-field facet.limit is
|
||||
// a global or a local param
|
||||
new SolrParams[] {
|
||||
// Broken: SOLR-6193
|
||||
// params( "facet.pivot","{!f.place_s.facet.limit=-1}place_s,id" ),
|
||||
params( "facet.pivot","place_s,id",
|
||||
"f.place_s.facet.limit", "-1") }) {
|
||||
|
||||
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.sort", "count" ),
|
||||
facetParams);
|
||||
try {
|
||||
rsp = query( p );
|
||||
pivots = rsp.getFacetPivot().get("place_s,id");
|
||||
assertEquals(103, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(100, firstPlace.getPivot().size()); // default
|
||||
} catch (AssertionFailedError ae) {
|
||||
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
|
||||
}
|
||||
}
|
||||
|
||||
// Mincount + facet.pivot 2 different ways (swap field order)
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
"facet.pivot","company_t,place_s",
|
||||
FacetParams.FACET_PIVOT_MINCOUNT,"6");
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
assertEquals(52, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(4, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "bbc", 101, firstCompany);
|
||||
//
|
||||
pivots = rsp.getFacetPivot().get("company_t,place_s");
|
||||
assertEquals(4, pivots.size());
|
||||
firstCompany = pivots.get(0);
|
||||
assertPivot("company_t", "bbc", 451, firstCompany);
|
||||
assertEquals(52, firstCompany.getPivot().size());
|
||||
firstPlace = firstCompany.getPivot().get(0);
|
||||
assertPivot("place_s", "cardiff", 101, firstPlace);
|
||||
|
||||
// refine on SPECIAL empty string
|
||||
rsp = query( "q", "*:*",
|
||||
"fq", "-place_s:0placeholder",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.limit","1",
|
||||
FacetParams.FACET_OVERREQUEST_RATIO, "0", // force refinement
|
||||
FacetParams.FACET_OVERREQUEST_COUNT, "1", // force refinement
|
||||
"facet.pivot","special_s,company_t");
|
||||
assertEquals(docNumber - 6, rsp.getResults().getNumFound()); // all docs but 0place
|
||||
pivots = rsp.getFacetPivot().get("special_s,company_t");
|
||||
assertEquals(1, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("special_s", SPECIAL, 3, firstPlace);
|
||||
assertEquals(1, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "microsoft", 2, firstCompany);
|
||||
|
||||
// TODO test "company_t,special_s" as well
|
||||
|
||||
|
||||
// refine on SPECIAL empty string & facet.missing
|
||||
// Also proves refinement on non-top elements occurs and allows them to get into the top
|
||||
rsp = query( "q", "*:*",
|
||||
"fq", "-place_s:0placeholder",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.limit","1",
|
||||
"facet.missing","true",
|
||||
FacetParams.FACET_OVERREQUEST_RATIO, "0", // force refinement
|
||||
FacetParams.FACET_OVERREQUEST_COUNT, "2", // force refinement
|
||||
"facet.pivot","special_s,company_t");
|
||||
assertEquals(docNumber - 6, rsp.getResults().getNumFound()); // all docs but 0place
|
||||
pivots = rsp.getFacetPivot().get("special_s,company_t");
|
||||
assertEquals(2, pivots.size());
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("special_s", SPECIAL, 3, firstPlace);
|
||||
assertEquals(1, firstPlace.getPivot().size());
|
||||
firstCompany = firstPlace.getPivot().get(0);
|
||||
assertPivot("company_t", "microsoft", 2, firstCompany);
|
||||
// last is "missing" val
|
||||
assertPivot("special_s", null, docNumber -6 -3 -2, pivots.get(1)); // -0place -SPECIAL -xxx
|
||||
|
||||
// forced refinement on facet.missing
|
||||
rsp = query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"f.bogus_x_s.facet.missing","true",
|
||||
"f.bogus_y_s.facet.missing","true",
|
||||
"facet.pivot","bogus_x_s,place_s,bogus_y_s,company_t",
|
||||
FacetParams.FACET_LIMIT, "12");
|
||||
pivots = rsp.getFacetPivot().get("bogus_x_s,place_s,bogus_y_s,company_t");
|
||||
assertEquals(1, pivots.size()); // just the missing value for bogus_x_s
|
||||
assertPivot("bogus_x_s", null, docNumber, pivots.get(0));
|
||||
pivots = pivots.get(0).getPivot();
|
||||
assertEquals(12, pivots.size()); // places
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 257, firstPlace);
|
||||
assertEquals(1, firstPlace.getPivot().size()); // just the missing value for bogus_y_s
|
||||
assertPivot("bogus_y_s", null, 257, firstPlace.getPivot().get(0));
|
||||
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0).getPivot().get(0));
|
||||
// Microsoft will come back wrong if refinement was not done correctly
|
||||
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(0).getPivot().get(1));
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Overrequesting a lot
|
||||
this.query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet", "true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
FacetParams.FACET_OVERREQUEST_RATIO, "10",
|
||||
FacetParams.FACET_OVERREQUEST_COUNT, "100");
|
||||
|
||||
// Overrequesting off
|
||||
this.query( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet", "true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
FacetParams.FACET_OVERREQUEST_RATIO, "0",
|
||||
FacetParams.FACET_OVERREQUEST_COUNT, "0");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* asserts that the actual PivotField matches the expected criteria
|
||||
*/
|
||||
private void assertPivot(String field, Object value, int count, // int numKids,
|
||||
PivotField actual) {
|
||||
assertEquals("FIELD: " + actual.toString(), field, actual.getField());
|
||||
assertEquals("VALUE: " + actual.toString(), value, actual.getValue());
|
||||
assertEquals("COUNT: " + actual.toString(), count, actual.getCount());
|
||||
// TODO: add arg && assert on number of kids
|
||||
//assertEquals("#KIDS: " + actual.toString(), numKids, actual.getPivot().size());
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void setupDistributedPivotFacetDocuments() throws Exception{
|
||||
|
||||
//Clear docs
|
||||
del("*:*");
|
||||
commit();
|
||||
|
||||
final int maxDocs = 50;
|
||||
final SolrServer zeroShard = clients.get(0);
|
||||
final SolrServer oneShard = clients.get(1);
|
||||
final SolrServer twoShard = clients.get(2);
|
||||
final SolrServer threeShard = clients.get(3); // edge case: never gets any matching docs
|
||||
|
||||
for(Integer i=0;i<maxDocs;i++){//50 entries
|
||||
addPivotDoc(zeroShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-07-01T12:30:00Z","real_b","true");
|
||||
addPivotDoc(zeroShard, "id", getDocNum(), "place_s", "medical staffing network holdings, inc.", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-07-01T12:30:00Z");
|
||||
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "placeholder"+i, "company_t", "compHolder"+i,"pay_i",24*i,"hiredate_dt", "2012-08-01T12:30:00Z");
|
||||
|
||||
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "bbc honda","pay_i",2400,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
|
||||
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",22*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
|
||||
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",21*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
|
||||
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",20*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
|
||||
|
||||
//For the filler content
|
||||
//Fifty places with 6 results each
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",3100,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",3400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",5400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",6400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",2000,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
|
||||
|
||||
}
|
||||
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",4367,"hiredate_dt", "2012-11-01T12:30:00Z");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft bbc","pay_i",8742,"hiredate_dt", "2012-11-01T12:30:00Z");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft polecat","pay_i",5824,"hiredate_dt", "2012-11-01T12:30:00Z");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft ","pay_i",6539,"hiredate_dt", "2012-11-01T12:30:00Z");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "medical staffing network holdings, inc.", "company_t", "microsoft ","pay_i",6539,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", "xxx");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "polecat","pay_i",4352,"hiredate_dt", "2012-1-01T12:30:00Z", "special_s", "xxx");
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "place_s", "krakaw", "company_t", "polecat","pay_i",4352,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
|
||||
|
||||
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",12,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
|
||||
addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",543,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
|
||||
|
||||
|
||||
// two really trivial documents, unrelated to the rest of the tests,
|
||||
// for the purpose of demoing the porblem with mincount=0
|
||||
addPivotDoc(oneShard, "id", getDocNum(), "top_s", "aaa", "sub_s", "bbb" );
|
||||
addPivotDoc(twoShard, "id", getDocNum(), "top_s", "xxx", "sub_s", "yyy" );
|
||||
|
||||
|
||||
commit();
|
||||
|
||||
assertEquals("shard #3 should never have any docs",
|
||||
0, threeShard.query(params("q", "*:*")).getResults().getNumFound());
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds up a SolrInputDocument using the specified fields, then adds it to the
|
||||
* specified client as well as the control client
|
||||
* @see #indexDoc(SolrServer,SolrParams,SolrInputDocument...)
|
||||
* @see #sdoc
|
||||
*/
|
||||
private void addPivotDoc(SolrServer client, Object... fields)
|
||||
throws IOException, SolrServerException {
|
||||
|
||||
indexDoc(client, params(), sdoc(fields));
|
||||
}
|
||||
|
||||
private int docNumber = 0;
|
||||
|
||||
public int getDocNum(){
|
||||
docNumber++;
|
||||
return docNumber;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,289 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.response.PivotField;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
||||
/**
|
||||
* test demonstrating how overrequesting helps finds top-terms in the "long tail"
|
||||
* of shards that don't have even distributions of terms (something that can be common
|
||||
* in cases of custom sharding -- even if you don't know that there is a corrolation
|
||||
* between the property you are sharding on and the property you are faceting on).
|
||||
*
|
||||
* NOTE: This test ignores the control collection (in single node mode, there is no
|
||||
* need for the overrequesting, all the data is local -- so comparisons with it wouldn't
|
||||
* be valid in the cases we are testing here)
|
||||
*/
|
||||
public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTestCase {
|
||||
|
||||
public DistributedFacetPivotLongTailTest(){
|
||||
this.fixShardCount = true;
|
||||
this.shardCount = 3;
|
||||
}
|
||||
|
||||
private int docNumber = 0;
|
||||
|
||||
public int getDocNum() {
|
||||
docNumber++;
|
||||
return docNumber;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
|
||||
final SolrServer shard0 = clients.get(0);
|
||||
final SolrServer shard1 = clients.get(1);
|
||||
final SolrServer shard2 = clients.get(2);
|
||||
|
||||
// the 5 top foo_s terms have 100 docs each on every shard
|
||||
for (int i = 0; i < 100; i++) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
|
||||
shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
|
||||
shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
|
||||
}
|
||||
}
|
||||
|
||||
// 20 foo_s terms that come in "second" with 50 docs each
|
||||
// on both shard0 & shard1 ("bbb_")
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (int j = 0; j < 20; j++) {
|
||||
shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j));
|
||||
shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j));
|
||||
}
|
||||
// distracting term appears on only on shard2 50 times
|
||||
shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA"));
|
||||
}
|
||||
// put "bbb0" on shard2 exactly once to sanity check refinement
|
||||
shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0"));
|
||||
|
||||
// long 'tail' foo_s term appears in 45 docs on every shard
|
||||
// foo_s:tail is the only term with bar_s sub-pivot terms
|
||||
for (int i = 0; i < 45; i++) {
|
||||
|
||||
// for sub-pivot, shard0 & shard1 have 6 docs each for "tailB"
|
||||
// but the top 5 terms are ccc(0-4) -- 7 on each shard
|
||||
// (4 docs each have junk terms)
|
||||
String sub_term = (i < 35) ? "ccc"+(i % 5) : ((i < 41) ? "tailB" : "junkA");
|
||||
shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
|
||||
shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
|
||||
|
||||
// shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
|
||||
// and 5 docs that use "tailB"
|
||||
sub_term = (i < 40) ? "junkB"+(i % 5) : "tailB";
|
||||
shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
|
||||
}
|
||||
|
||||
// really long tail uncommon foo_s terms on shard2
|
||||
for (int i = 0; i < 30; i++) {
|
||||
shard2.add(sdoc("id", getDocNum(), "foo_s", "zzz"+i));
|
||||
}
|
||||
|
||||
commit();
|
||||
|
||||
SolrParams req = params( "q", "*:*",
|
||||
"distrib", "false",
|
||||
"facet", "true",
|
||||
"facet.limit", "10",
|
||||
"facet.pivot", "foo_s,bar_s");
|
||||
|
||||
// sanity check that our expectations about each shard (non-distrib) are correct
|
||||
|
||||
PivotField pivot = null;
|
||||
List<PivotField> pivots = null;
|
||||
List<PivotField>[] shardPivots = new List[3];
|
||||
shardPivots[0] = shard0.query( req ).getFacetPivot().get("foo_s,bar_s");
|
||||
shardPivots[1] = shard1.query( req ).getFacetPivot().get("foo_s,bar_s");
|
||||
shardPivots[2] = shard2.query( req ).getFacetPivot().get("foo_s,bar_s");
|
||||
|
||||
// top 5 same on all shards
|
||||
for (int i = 0; i < 3; i++) {
|
||||
assertEquals(10, shardPivots[i].size());
|
||||
for (int j = 0; j < 5; j++) {
|
||||
pivot = shardPivots[i].get(j);
|
||||
assertEquals(pivot.toString(), "aaa"+j, pivot.getValue());
|
||||
assertEquals(pivot.toString(), 100, pivot.getCount());
|
||||
}
|
||||
}
|
||||
// top 6-10 same on shard0 & shard11
|
||||
for (int i = 0; i < 2; i++) {
|
||||
for (int j = 5; j < 10; j++) {
|
||||
pivot = shardPivots[i].get(j);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("bbb"));
|
||||
assertEquals(pivot.toString(), 50, pivot.getCount());
|
||||
}
|
||||
}
|
||||
// 6-10 on shard2
|
||||
assertEquals("junkA", shardPivots[2].get(5).getValue());
|
||||
assertEquals(50, shardPivots[2].get(5).getCount());
|
||||
assertEquals("tail", shardPivots[2].get(6).getValue());
|
||||
assertEquals(45, shardPivots[2].get(6).getCount());
|
||||
assertEquals("bbb0", shardPivots[2].get(7).getValue());
|
||||
assertEquals(1, shardPivots[2].get(7).getCount());
|
||||
for (int j = 8; j < 10; j++) {
|
||||
pivot = shardPivots[2].get(j);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("zzz"));
|
||||
assertEquals(pivot.toString(), 1, pivot.getCount());
|
||||
}
|
||||
// check sub-shardPivots on "tail" from shard2
|
||||
pivots = shardPivots[2].get(6).getPivot();
|
||||
assertEquals(6, pivots.size());
|
||||
for (int j = 0; j < 5; j++) {
|
||||
pivot = pivots.get(j);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("junkB"));
|
||||
assertEquals(pivot.toString(), 8, pivot.getCount());
|
||||
}
|
||||
pivot = pivots.get(5);
|
||||
assertEquals("tailB", pivot.getValue());
|
||||
assertEquals(5, pivot.getCount());
|
||||
|
||||
// if we disable overrequesting, we don't find the long tail
|
||||
|
||||
pivots = queryServer( params( "q", "*:*",
|
||||
"shards", getShardsString(),
|
||||
FacetParams.FACET_OVERREQUEST_COUNT, "0",
|
||||
FacetParams.FACET_OVERREQUEST_RATIO, "0",
|
||||
"facet", "true",
|
||||
"facet.limit", "6",
|
||||
"facet.pivot", "foo_s,bar_s" )
|
||||
).getFacetPivot().get("foo_s,bar_s");
|
||||
assertEquals(6, pivots.size());
|
||||
for (int i = 0; i < 5; i++) {
|
||||
pivot = pivots.get(i);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
|
||||
assertEquals(pivot.toString(), 300, pivot.getCount());
|
||||
}
|
||||
// even w/o the long tail, we should have still asked shard2 to refine bbb0
|
||||
assertTrue(pivots.get(5).toString(), pivots.get(5).getValue().equals("bbb0"));
|
||||
assertEquals(pivots.get(5).toString(), 101, pivots.get(5).getCount());
|
||||
|
||||
// with default overrequesting, we should find the correct top 6 including
|
||||
// long tail and top sub-pivots
|
||||
// (even if we disable overrequesting on the sub-pivot)
|
||||
for (ModifiableSolrParams q : new ModifiableSolrParams[] {
|
||||
params(),
|
||||
params("f.bar_s.facet.overrequest.ratio","0",
|
||||
"f.bar_s.facet.overrequest.count","0") }) {
|
||||
|
||||
q.add( params( "q", "*:*",
|
||||
"shards", getShardsString(),
|
||||
"facet", "true",
|
||||
"facet.limit", "6",
|
||||
"facet.pivot", "foo_s,bar_s" ));
|
||||
pivots = queryServer( q ).getFacetPivot().get("foo_s,bar_s");
|
||||
|
||||
assertEquals(6, pivots.size());
|
||||
for (int i = 0; i < 5; i++) {
|
||||
pivot = pivots.get(i);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
|
||||
assertEquals(pivot.toString(), 300, pivot.getCount());
|
||||
}
|
||||
pivot = pivots.get(5);
|
||||
assertEquals(pivot.toString(), "tail", pivot.getValue());
|
||||
assertEquals(pivot.toString(), 135, pivot.getCount());
|
||||
// check the sub pivots
|
||||
pivots = pivot.getPivot();
|
||||
assertEquals(6, pivots.size());
|
||||
pivot = pivots.get(0);
|
||||
assertEquals(pivot.toString(), "tailB", pivot.getValue());
|
||||
assertEquals(pivot.toString(), 17, pivot.getCount());
|
||||
for (int i = 1; i < 6; i++) { // ccc(0-4)
|
||||
pivot = pivots.get(i);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
|
||||
assertEquals(pivot.toString(), 14, pivot.getCount());
|
||||
}
|
||||
}
|
||||
|
||||
// if we lower the facet.limit on the sub-pivot, overrequesting should still ensure
|
||||
// that we get the correct top5 including "tailB"
|
||||
|
||||
pivots = queryServer( params( "q", "*:*",
|
||||
"shards", getShardsString(),
|
||||
"facet", "true",
|
||||
"facet.limit", "6",
|
||||
"f.bar_s.facet.limit", "5",
|
||||
"facet.pivot", "foo_s,bar_s" )
|
||||
).getFacetPivot().get("foo_s,bar_s");
|
||||
assertEquals(6, pivots.size());
|
||||
for (int i = 0; i < 5; i++) {
|
||||
pivot = pivots.get(i);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
|
||||
assertEquals(pivot.toString(), 300, pivot.getCount());
|
||||
}
|
||||
pivot = pivots.get(5);
|
||||
assertEquals(pivot.toString(), "tail", pivot.getValue());
|
||||
assertEquals(pivot.toString(), 135, pivot.getCount());
|
||||
// check the sub pivots
|
||||
pivots = pivot.getPivot();
|
||||
assertEquals(5, pivots.size());
|
||||
pivot = pivots.get(0);
|
||||
assertEquals(pivot.toString(), "tailB", pivot.getValue());
|
||||
assertEquals(pivot.toString(), 17, pivot.getCount());
|
||||
for (int i = 1; i < 5; i++) { // ccc(0-3)
|
||||
pivot = pivots.get(i);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
|
||||
assertEquals(pivot.toString(), 14, pivot.getCount());
|
||||
}
|
||||
|
||||
// however with a lower limit and overrequesting disabled,
|
||||
// we're going to miss out on tailB
|
||||
|
||||
pivots = queryServer( params( "q", "*:*",
|
||||
"shards", getShardsString(),
|
||||
"facet", "true",
|
||||
"facet.limit", "6",
|
||||
"f.bar_s.facet.overrequest.ratio", "0",
|
||||
"f.bar_s.facet.overrequest.count", "0",
|
||||
"f.bar_s.facet.limit", "5",
|
||||
"facet.pivot", "foo_s,bar_s" )
|
||||
).getFacetPivot().get("foo_s,bar_s");
|
||||
assertEquals(6, pivots.size());
|
||||
for (int i = 0; i < 5; i++) {
|
||||
pivot = pivots.get(i);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
|
||||
assertEquals(pivot.toString(), 300, pivot.getCount());
|
||||
}
|
||||
pivot = pivots.get(5);
|
||||
assertEquals(pivot.toString(), "tail", pivot.getValue());
|
||||
assertEquals(pivot.toString(), 135, pivot.getCount());
|
||||
// check the sub pivots
|
||||
pivots = pivot.getPivot();
|
||||
assertEquals(5, pivots.size());
|
||||
for (int i = 0; i < 5; i++) { // ccc(0-4)
|
||||
pivot = pivots.get(i);
|
||||
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
|
||||
assertEquals(pivot.toString(), 14, pivot.getCount());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,439 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.client.solrj.response.PivotField;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
||||
import junit.framework.AssertionFailedError;
|
||||
|
||||
public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCase {
|
||||
|
||||
public DistributedFacetPivotSmallTest() {
|
||||
this.fixShardCount = true;
|
||||
this.shardCount = 4;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
|
||||
del("*:*");
|
||||
|
||||
// NOTE: we use the literal (4 character) string "null" as a company name
|
||||
// to help ensure there isn't any bugs where the literal string is treated as if it
|
||||
// were a true NULL value.
|
||||
index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat");
|
||||
index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null");
|
||||
index(id, 21, "place_t", "london la dublin", "company_t",
|
||||
"microsoft fujitsu null polecat");
|
||||
index(id, 22, "place_t", "krakow london cardiff", "company_t",
|
||||
"polecat null bbc");
|
||||
index(id, 23, "place_t", "london", "company_t", "");
|
||||
index(id, 24, "place_t", "la", "company_t", "");
|
||||
index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc");
|
||||
index(id, 26, "place_t", "krakow", "company_t", "null");
|
||||
index(id, 27, "place_t", "krakow cardiff dublin london la", "company_t",
|
||||
"null microsoft polecat bbc fujitsu");
|
||||
index(id, 28, "place_t", "cork", "company_t",
|
||||
"fujitsu rte");
|
||||
commit();
|
||||
|
||||
handle.clear();
|
||||
handle.put("QTime", SKIPVAL);
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
handle.put("maxScore", SKIPVAL);
|
||||
|
||||
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
setDistributedParams(params);
|
||||
params.add("q", "*:*");
|
||||
params.add("facet", "true");
|
||||
params.add("facet.pivot", "place_t,company_t");
|
||||
|
||||
|
||||
QueryResponse rsp = queryServer(params);
|
||||
|
||||
List<PivotField> expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
List<PivotField> expectedCardiffPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedCardiffPivots.add(new ComparablePivotField("company_t", "microsoft", 2, null));
|
||||
expectedCardiffPivots.add(new ComparablePivotField("company_t", "null", 2, null));
|
||||
expectedCardiffPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
|
||||
expectedCardiffPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
|
||||
expectedCardiffPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
|
||||
List<PivotField> expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat", 4, null));
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft", 4, null));
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "null", 3, null));
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "bbc", 1, null));
|
||||
List<PivotField> expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "microsoft", 2, null));
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3,null));
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
|
||||
List<PivotField> expectedLAPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedLAPivots.add(new ComparablePivotField("company_t", "microsoft", 2,null));
|
||||
expectedLAPivots.add(new ComparablePivotField("company_t", "fujitsu", 2,null));
|
||||
expectedLAPivots.add(new ComparablePivotField("company_t", "null", 2, null));
|
||||
expectedLAPivots.add(new ComparablePivotField("company_t", "bbc", 1, null));
|
||||
expectedLAPivots.add(new ComparablePivotField("company_t", "polecat", 2,null));
|
||||
List<PivotField> expectedKrakowPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedKrakowPivots.add(new ComparablePivotField("company_t", "polecat",2, null));
|
||||
expectedKrakowPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
|
||||
expectedKrakowPivots.add(new ComparablePivotField("company_t", "null", 3,null));
|
||||
expectedKrakowPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
|
||||
expectedKrakowPivots.add(new ComparablePivotField("company_t", "microsoft", 1, null));
|
||||
List<PivotField> expectedCorkPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedCorkPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
|
||||
expectedCorkPivots.add(new ComparablePivotField("company_t", "rte", 1, null));
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, expectedDublinPivots));
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "cardiff", 3, expectedCardiffPivots));
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "london", 4, expectedLondonPivots));
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "la", 3, expectedLAPivots));
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "krakow", 3, expectedKrakowPivots));
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "cork", 1, expectedCorkPivots));
|
||||
|
||||
|
||||
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
|
||||
|
||||
// Useful to check for errors, orders lists and does toString() equality
|
||||
// check
|
||||
testOrderedPivotsStringEquality(expectedPlacePivots, placePivots);
|
||||
|
||||
assertEquals(expectedPlacePivots, placePivots);
|
||||
|
||||
// Test sorting by count
|
||||
|
||||
params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT);
|
||||
|
||||
rsp = queryServer(params);
|
||||
|
||||
placePivots = rsp.getFacetPivot().get("place_t,company_t");
|
||||
|
||||
testCountSorting(placePivots);
|
||||
|
||||
// Test limit
|
||||
|
||||
params.set(FacetParams.FACET_LIMIT, 2);
|
||||
|
||||
rsp = queryServer(params);
|
||||
|
||||
expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat",
|
||||
4, null));
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft",
|
||||
4, null));
|
||||
expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3,
|
||||
null));
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3,
|
||||
null));
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4,
|
||||
expectedDublinPivots));
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "london", 4,
|
||||
expectedLondonPivots));
|
||||
|
||||
placePivots = rsp.getFacetPivot().get("place_t,company_t");
|
||||
|
||||
assertEquals(expectedPlacePivots, placePivots);
|
||||
|
||||
// Test individual facet.limit values
|
||||
params.remove(FacetParams.FACET_LIMIT);
|
||||
|
||||
params.set("f.place_t." + FacetParams.FACET_LIMIT, 1);
|
||||
params.set("f.company_t." + FacetParams.FACET_LIMIT, 4);
|
||||
|
||||
rsp = queryServer(params);
|
||||
|
||||
expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
|
||||
expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft",4, null));
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat",4, null));
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "null",3, null));
|
||||
expectedDublinPivots.add(new ComparablePivotField("company_t", "fujitsu",2, null));
|
||||
|
||||
expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3, null));
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
|
||||
expectedLondonPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
|
||||
|
||||
expectedCardiffPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedCardiffPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
|
||||
|
||||
expectedKrakowPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedKrakowPivots.add(new ComparablePivotField("company_t", "null", 3, null));
|
||||
|
||||
expectedLAPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedLAPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
|
||||
|
||||
expectedCorkPivots = new UnorderedEqualityArrayList<PivotField>();
|
||||
expectedCorkPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
|
||||
|
||||
expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, expectedDublinPivots));
|
||||
|
||||
placePivots = rsp.getFacetPivot().get("place_t,company_t");
|
||||
assertEquals(expectedPlacePivots, placePivots);
|
||||
|
||||
params.remove("f.company_t." + FacetParams.FACET_LIMIT);
|
||||
params.remove("f.place_t." + FacetParams.FACET_LIMIT);
|
||||
params.set(FacetParams.FACET_LIMIT, 2);
|
||||
|
||||
// Test facet.missing=true with diff sorts
|
||||
|
||||
index("id",777); // NOTE: id=25 has no place as well
|
||||
commit();
|
||||
|
||||
SolrParams missingA = params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","place_t,company_t",
|
||||
// default facet.sort
|
||||
FacetParams.FACET_MISSING, "true" );
|
||||
SolrParams missingB = SolrParams.wrapDefaults(missingA,
|
||||
params(FacetParams.FACET_LIMIT, "4",
|
||||
"facet.sort", "index"));
|
||||
for (SolrParams p : new SolrParams[] { missingA, missingB }) {
|
||||
// in either case, the last pivot option should be the same
|
||||
rsp = query( p );
|
||||
placePivots = rsp.getFacetPivot().get("place_t,company_t");
|
||||
assertTrue("not enough values for pivot: " + p + " => " + placePivots,
|
||||
1 < placePivots.size());
|
||||
PivotField missing = placePivots.get(placePivots.size()-1);
|
||||
assertNull("not the missing place value: " + p, missing.getValue());
|
||||
assertEquals("wrong missing place count: " + p, 2, missing.getCount());
|
||||
assertTrue("not enough sub-pivots for missing place: "+ p +" => " + missing.getPivot(),
|
||||
1 < missing.getPivot().size());
|
||||
missing = missing.getPivot().get(missing.getPivot().size()-1);
|
||||
assertNull("not the missing company value: " + p, missing.getValue());
|
||||
assertEquals("wrong missing company count: " + p, 1, missing.getCount());
|
||||
assertNull("company shouldn't have sub-pivots: " + p, missing.getPivot());
|
||||
}
|
||||
|
||||
// sort=index + mincount + limit
|
||||
for (SolrParams variableParams : new SolrParams[] {
|
||||
// we should get the same results regardless of overrequest
|
||||
params("facet.overrequest.count","0",
|
||||
"facet.overrequest.ratio","0"),
|
||||
params() }) {
|
||||
|
||||
|
||||
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","company_t",
|
||||
"facet.sort", "index",
|
||||
"facet.pivot.mincount", "4",
|
||||
"facet.limit", "4"),
|
||||
variableParams );
|
||||
|
||||
try {
|
||||
List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
|
||||
assertEquals(4, pivots.size());
|
||||
assertEquals("fujitsu", pivots.get(0).getValue());
|
||||
assertEquals(4, pivots.get(0).getCount());
|
||||
assertEquals("microsoft", pivots.get(1).getValue());
|
||||
assertEquals(5, pivots.get(1).getCount());
|
||||
assertEquals("null", pivots.get(2).getValue());
|
||||
assertEquals(6, pivots.get(2).getCount());
|
||||
assertEquals("polecat", pivots.get(3).getValue());
|
||||
assertEquals(6, pivots.get(3).getCount());
|
||||
|
||||
} catch (AssertionFailedError ae) {
|
||||
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
|
||||
}
|
||||
}
|
||||
|
||||
// sort=index + mincount + limit + offset
|
||||
for (SolrParams variableParams : new SolrParams[] {
|
||||
// we should get the same results regardless of overrequest
|
||||
params("facet.overrequest.count","0",
|
||||
"facet.overrequest.ratio","0"),
|
||||
params() }) {
|
||||
|
||||
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.pivot","company_t",
|
||||
"facet.sort", "index",
|
||||
"facet.pivot.mincount", "4",
|
||||
"facet.offset", "1",
|
||||
"facet.limit", "4"),
|
||||
variableParams );
|
||||
try {
|
||||
List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
|
||||
assertEquals(3, pivots.size()); // asked for 4, but not enough meet the mincount
|
||||
assertEquals("microsoft", pivots.get(0).getValue());
|
||||
assertEquals(5, pivots.get(0).getCount());
|
||||
assertEquals("null", pivots.get(1).getValue());
|
||||
assertEquals(6, pivots.get(1).getCount());
|
||||
assertEquals("polecat", pivots.get(2).getValue());
|
||||
assertEquals(6, pivots.get(2).getCount());
|
||||
|
||||
} catch (AssertionFailedError ae) {
|
||||
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// sort=index + mincount + limit + offset (more permutations)
|
||||
for (SolrParams variableParams : new SolrParams[] {
|
||||
// all of these combinations should result in the same first value
|
||||
params("facet.pivot.mincount", "4",
|
||||
"facet.offset", "2"),
|
||||
params("facet.pivot.mincount", "5",
|
||||
"facet.offset", "1"),
|
||||
params("facet.pivot.mincount", "6",
|
||||
"facet.offset", "0" ) }) {
|
||||
|
||||
SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
|
||||
"rows", "0",
|
||||
"facet","true",
|
||||
"facet.limit","1",
|
||||
"facet.sort","index",
|
||||
"facet.overrequest.ratio","0",
|
||||
"facet.pivot", "company_t"),
|
||||
variableParams );
|
||||
|
||||
try {
|
||||
List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
|
||||
assertEquals(1, pivots.size());
|
||||
assertEquals(pivots.toString(), "null", pivots.get(0).getValue());
|
||||
assertEquals(pivots.toString(), 6, pivots.get(0).getCount());
|
||||
|
||||
} catch (AssertionFailedError ae) {
|
||||
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Useful to check for errors, orders lists and does toString() equality check
|
||||
private void testOrderedPivotsStringEquality(
|
||||
List<PivotField> expectedPlacePivots, List<PivotField> placePivots) {
|
||||
Collections.sort(expectedPlacePivots, new PivotFieldComparator());
|
||||
for (PivotField expectedPivot : expectedPlacePivots) {
|
||||
if (expectedPivot.getPivot() != null) {
|
||||
Collections.sort(expectedPivot.getPivot(), new PivotFieldComparator());
|
||||
}
|
||||
}
|
||||
Collections.sort(placePivots, new PivotFieldComparator());
|
||||
for (PivotField pivot : placePivots) {
|
||||
if (pivot.getPivot() != null) {
|
||||
Collections.sort(pivot.getPivot(), new PivotFieldComparator());
|
||||
}
|
||||
}
|
||||
assertEquals(expectedPlacePivots.toString(), placePivots.toString());
|
||||
}
|
||||
|
||||
private void testCountSorting(List<PivotField> pivots) {
|
||||
Integer lastCount = null;
|
||||
for (PivotField pivot : pivots) {
|
||||
if (lastCount != null) {
|
||||
assertTrue(pivot.getCount() <= lastCount);
|
||||
}
|
||||
lastCount = pivot.getCount();
|
||||
if (pivot.getPivot() != null) {
|
||||
testCountSorting(pivot.getPivot());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class ComparablePivotField extends PivotField {
|
||||
|
||||
|
||||
public ComparablePivotField(String f, Object v, int count,
|
||||
List<PivotField> pivot) {
|
||||
super(f,v,count,pivot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null) return false;
|
||||
if (!obj.getClass().isAssignableFrom(PivotField.class)) return false;
|
||||
PivotField other = (PivotField) obj;
|
||||
if (getCount() != other.getCount()) return false;
|
||||
if (getField() == null) {
|
||||
if (other.getField() != null) return false;
|
||||
} else if (!getField().equals(other.getField())) return false;
|
||||
if (getPivot() == null) {
|
||||
if (other.getPivot() != null) return false;
|
||||
} else if (!getPivot().equals(other.getPivot())) return false;
|
||||
if (getValue() == null) {
|
||||
if (other.getValue() != null) return false;
|
||||
} else if (!getValue().equals(other.getValue())) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public static class UnorderedEqualityArrayList<T> extends ArrayList<T> {
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
boolean equal = false;
|
||||
if (o instanceof ArrayList) {
|
||||
List<?> otherList = (List<?>) o;
|
||||
if (size() == otherList.size()) {
|
||||
equal = true;
|
||||
for (Object objectInOtherList : otherList) {
|
||||
if (!contains(objectInOtherList)) {
|
||||
equal = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return equal;
|
||||
}
|
||||
|
||||
public int indexOf(Object o) {
|
||||
for (int i = 0; i < size(); i++) {
|
||||
if (get(i).equals(o)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public class PivotFieldComparator implements Comparator<PivotField> {
|
||||
|
||||
@Override
|
||||
public int compare(PivotField o1, PivotField o2) {
|
||||
Integer compare = (Integer.valueOf(o2.getCount())).compareTo(Integer
|
||||
.valueOf(o1.getCount()));
|
||||
if (compare == 0) {
|
||||
compare = ((String) o2.getValue()).compareTo((String) o1.getValue());
|
||||
}
|
||||
return compare;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.handler.component.PivotFacetField;
|
||||
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* A light weight test of various helper methods used in pivot faceting
|
||||
*
|
||||
**/
|
||||
public class TestPivotHelperCode extends SolrTestCaseJ4{
|
||||
|
||||
/**
|
||||
* test refinement encoding/decoding matches specific expected encoded values
|
||||
* @see PivotFacetHelper#encodeRefinementValuePath
|
||||
* @see PivotFacetHelper#decodeRefinementValuePath
|
||||
*/
|
||||
public void testRefinementStringEncodingWhiteBox() {
|
||||
// trivial example with some basci escaping of an embedded comma
|
||||
assertBiDirectionalEncoding(strs("foo,bar","yak","zat"), "~foo\\,bar,~yak,~zat");
|
||||
|
||||
// simple single valued case
|
||||
assertBiDirectionalEncoding( strs("foo"), "~foo");
|
||||
|
||||
// special case: empty list
|
||||
assertBiDirectionalEncoding(strs(), "");
|
||||
|
||||
// special case: single element list containing empty string
|
||||
assertBiDirectionalEncoding(strs(""), "~");
|
||||
|
||||
// special case: single element list containing null
|
||||
assertBiDirectionalEncoding(strs((String)null), "^");
|
||||
|
||||
// mix of empty strings & null with other values
|
||||
assertBiDirectionalEncoding(strs("", "foo", "", "", null, "bar"),
|
||||
"~,~foo,~,~,^,~bar");
|
||||
}
|
||||
|
||||
/**
|
||||
* test refinement encoding/decoding of random sets of values can be round tripped,
|
||||
* w/o worrying about what the actual encoding looks like
|
||||
*
|
||||
* @see PivotFacetHelper#encodeRefinementValuePath
|
||||
* @see PivotFacetHelper#decodeRefinementValuePath
|
||||
*/
|
||||
public void testRefinementStringEncodingBlockBoxRoundTrip() {
|
||||
// random data: we should be able to round trip any set of random strings
|
||||
final int numIters = atLeast(100);
|
||||
for (int i = 0; i < numIters; i++) {
|
||||
final int numStrs = atLeast(1);
|
||||
List<String> data = new ArrayList<String>(numStrs);
|
||||
for (int j = 0; j < numStrs; j++) {
|
||||
// :TODO: mix in nulls
|
||||
data.add(TestUtil.randomUnicodeString(random()));
|
||||
}
|
||||
String encoded = PivotFacetHelper.encodeRefinementValuePath(data);
|
||||
List<String> decoded = PivotFacetHelper.decodeRefinementValuePath(encoded);
|
||||
assertEquals(data, decoded);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void assertBiDirectionalEncoding(List<String> data, String encoded) {
|
||||
assertEquals(data, PivotFacetHelper.decodeRefinementValuePath(encoded));
|
||||
assertEquals(encoded, PivotFacetHelper.encodeRefinementValuePath(data));
|
||||
}
|
||||
|
||||
|
||||
public void testCompareWithNullLast() throws Exception {
|
||||
Long a = random().nextLong();
|
||||
Long b = random().nextLong();
|
||||
|
||||
assertEquals(a.compareTo(b), PivotFacetFieldValueCollection.compareWithNullLast(a, b));
|
||||
assertEquals(b.compareTo(a), PivotFacetFieldValueCollection.compareWithNullLast(b, a));
|
||||
|
||||
Long bb = new Long(b.longValue());
|
||||
assertEquals(0, PivotFacetFieldValueCollection.compareWithNullLast(b, bb));
|
||||
|
||||
assertEquals(0, PivotFacetFieldValueCollection.compareWithNullLast(null, null));
|
||||
|
||||
assertTrue( PivotFacetFieldValueCollection.compareWithNullLast(a, null) < 0 );
|
||||
assertTrue( PivotFacetFieldValueCollection.compareWithNullLast(b, null) < 0 );
|
||||
|
||||
assertTrue( 0 < PivotFacetFieldValueCollection.compareWithNullLast(null, a) );
|
||||
assertTrue( 0 < PivotFacetFieldValueCollection.compareWithNullLast(null, b) );
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<String> strs(String... strs) {
|
||||
return Arrays.<String>asList(strs);
|
||||
}
|
||||
|
||||
}
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package org.apache.solr.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
@ -31,6 +32,29 @@ import org.junit.Assert;
|
|||
*
|
||||
*/
|
||||
public class TestUtils extends LuceneTestCase {
|
||||
|
||||
public void testJoin() {
|
||||
assertEquals("a|b|c", StrUtils.join(Arrays.asList("a","b","c"), '|'));
|
||||
assertEquals("a,b,c", StrUtils.join(Arrays.asList("a","b","c"), ','));
|
||||
assertEquals("a\\,b,c", StrUtils.join(Arrays.asList("a,b","c"), ','));
|
||||
assertEquals("a,b|c", StrUtils.join(Arrays.asList("a,b","c"), '|'));
|
||||
|
||||
assertEquals("a\\\\b|c", StrUtils.join(Arrays.asList("a\\b","c"), '|'));
|
||||
}
|
||||
|
||||
public void testEscapeTextWithSeparator() {
|
||||
assertEquals("a", StrUtils.escapeTextWithSeparator("a", '|'));
|
||||
assertEquals("a", StrUtils.escapeTextWithSeparator("a", ','));
|
||||
|
||||
assertEquals("a\\|b", StrUtils.escapeTextWithSeparator("a|b", '|'));
|
||||
assertEquals("a|b", StrUtils.escapeTextWithSeparator("a|b", ','));
|
||||
assertEquals("a,b", StrUtils.escapeTextWithSeparator("a,b", '|'));
|
||||
assertEquals("a\\,b", StrUtils.escapeTextWithSeparator("a,b", ','));
|
||||
assertEquals("a\\\\b", StrUtils.escapeTextWithSeparator("a\\b", ','));
|
||||
|
||||
assertEquals("a\\\\\\,b", StrUtils.escapeTextWithSeparator("a\\,b", ','));
|
||||
}
|
||||
|
||||
public void testSplitEscaping() {
|
||||
List<String> arr = StrUtils.splitSmart("\\r\\n:\\t\\f\\b", ":", true);
|
||||
assertEquals(2,arr.size());
|
||||
|
|
|
@ -390,10 +390,19 @@ public class QueryResponse extends SolrResponseBase
|
|||
ArrayList<PivotField> values = new ArrayList<>( list.size() );
|
||||
for( NamedList nl : list ) {
|
||||
// NOTE, this is cheating, but we know the order they are written in, so no need to check
|
||||
assert "field".equals(nl.getName(0));
|
||||
String f = (String)nl.getVal( 0 );
|
||||
assert "value".equals(nl.getName(1));
|
||||
Object v = nl.getVal( 1 );
|
||||
assert "count".equals(nl.getName(2));
|
||||
int cnt = ((Integer)nl.getVal( 2 )).intValue();
|
||||
List<PivotField> p = (nl.size()<4)?null:readPivots((List<NamedList>)nl.getVal(3) );
|
||||
List<PivotField> p = null;
|
||||
if (4 <= nl.size()) {
|
||||
assert "pivot".equals(nl.getName(3));
|
||||
Object subPiv = nl.getVal(3);
|
||||
assert null != subPiv : "Server sent back 'null' for sub pivots?";
|
||||
p = readPivots( (List<NamedList>) subPiv );
|
||||
}
|
||||
values.add( new PivotField( f, v, cnt, p ) );
|
||||
}
|
||||
return values;
|
||||
|
|
|
@ -100,6 +100,24 @@ public interface FacetParams {
|
|||
public static final String FACET_MISSING = FACET + ".missing";
|
||||
|
||||
|
||||
static final String FACET_OVERREQUEST = FACET + ".overrequest";
|
||||
|
||||
/**
|
||||
* The percentage to over-request by when performing initial distributed requests.
|
||||
*
|
||||
* default value is 1.5
|
||||
*/
|
||||
public static final String FACET_OVERREQUEST_RATIO = FACET_OVERREQUEST + ".ratio";
|
||||
|
||||
/**
|
||||
* An additional amount to over-request by when performing initial distributed requests. This
|
||||
* value will be added after accounting for the over-request ratio.
|
||||
*
|
||||
* default value is 10
|
||||
*/
|
||||
public static final String FACET_OVERREQUEST_COUNT = FACET_OVERREQUEST + ".count";
|
||||
|
||||
|
||||
/**
|
||||
* Comma separated list of fields to pivot
|
||||
*
|
||||
|
|
|
@ -143,7 +143,10 @@ public class StrUtils {
|
|||
return result;
|
||||
}
|
||||
|
||||
/** Creates a backslash escaped string, joining all the items. */
|
||||
/**
|
||||
* Creates a backslash escaped string, joining all the items.
|
||||
* @see #escapeTextWithSeparator
|
||||
*/
|
||||
public static String join(List<?> items, char separator) {
|
||||
StringBuilder sb = new StringBuilder(items.size() << 3);
|
||||
boolean first=true;
|
||||
|
@ -154,13 +157,7 @@ public class StrUtils {
|
|||
} else {
|
||||
sb.append(separator);
|
||||
}
|
||||
for (int i=0; i<item.length(); i++) {
|
||||
char ch = item.charAt(i);
|
||||
if (ch=='\\' || ch == separator) {
|
||||
sb.append('\\');
|
||||
}
|
||||
sb.append(ch);
|
||||
}
|
||||
appendEscapedTextToBuilder(sb, item, separator);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
@ -283,4 +280,31 @@ public class StrUtils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new copy of the string with the separator backslash escaped.
|
||||
* @see #join
|
||||
*/
|
||||
public static String escapeTextWithSeparator(String item, char separator) {
|
||||
StringBuilder sb = new StringBuilder(item.length() * 2);
|
||||
appendEscapedTextToBuilder(sb, item, separator);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* writes chars from item to out, backslash escaping as needed based on separator --
|
||||
* but does not append the seperator itself
|
||||
*/
|
||||
public static void appendEscapedTextToBuilder(StringBuilder out,
|
||||
String item,
|
||||
char separator) {
|
||||
for (int i = 0; i < item.length(); i++) {
|
||||
char ch = item.charAt(i);
|
||||
if (ch == '\\' || ch == separator) {
|
||||
out.append('\\');
|
||||
}
|
||||
out.append(ch);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.solr.request.SolrQueryRequest;
|
|||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieDateField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.servlet.DirectSolrConnection;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
@ -93,11 +94,13 @@ import java.net.URL;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.logging.ConsoleHandler;
|
||||
|
@ -2050,5 +2053,44 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>likely</code> most (1/10) of the time, otherwise <code>unlikely</code>
|
||||
*/
|
||||
public static Object skewed(Object likely, Object unlikely) {
|
||||
return (0 == TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a randomly generated Date in the appropriate Solr external (input) format
|
||||
* @see #randomSkewedDate
|
||||
*/
|
||||
public static String randomDate() {
|
||||
return TrieDateField.formatExternal(new Date(random().nextLong()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Date such that all results from this method always have the same values for
|
||||
* year+month+day+hour+minute but the seconds are randomized. This can be helpful for
|
||||
* indexing documents with random date values that are biased for a narrow window
|
||||
* (one day) to test collisions/overlaps
|
||||
*
|
||||
* @see #randomDate
|
||||
*/
|
||||
public static String randomSkewedDate() {
|
||||
return String.format(Locale.ROOT, "2010-10-31T10:31:%02d.000Z",
|
||||
TestUtil.nextInt(random(), 0, 59));
|
||||
}
|
||||
|
||||
/**
|
||||
* We want "realistic" unicode strings beyond simple ascii, but because our
|
||||
* updates use XML we need to ensure we don't get "special" code block.
|
||||
*/
|
||||
public static String randomXmlUsableUnicodeString() {
|
||||
String result = TestUtil.randomRealisticUnicodeString(random());
|
||||
if (result.matches(".*\\p{InSpecials}.*")) {
|
||||
result = TestUtil.randomSimpleString(random());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue