SOLR-12954: fix facet.pivot refinement bugs when using facet.sort=index and facet.mincount>1

This commit is contained in:
Chris Hostetter 2018-11-02 20:32:32 -07:00
parent 31d7dfe6b1
commit c5ff4a4444
4 changed files with 119 additions and 8 deletions

View File

@ -233,6 +233,8 @@ Bug Fixes
* SOLR-12875: fix ArrayIndexOutOfBoundsException when unique(field) or uniqueBlock(_root_) is
used with DVHASH method in json.facet. (Tim Underwood via Mikhail Khludnev)
* SOLR-12954: fix facet.pivot refinement bugs when using facet.sort=index and facet.mincount>1 (hossman)
Improvements
----------------------

View File

@ -650,7 +650,7 @@ public class FacetComponent extends SearchComponent {
(fieldToOverRequest, FacetParams.FACET_SORT, defaultSort);
int shardLimit = requestedLimit + offset;
int shardMinCount = requestedMinCount;
int shardMinCount = Math.min(requestedMinCount, 1);
// per-shard mincount & overrequest
if ( FacetParams.FACET_SORT_INDEX.equals(sort) &&
@ -670,7 +670,6 @@ public class FacetComponent extends SearchComponent {
if ( 0 < requestedLimit ) {
shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
}
shardMinCount = Math.min(requestedMinCount, 1);
}
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, shardLimit);
sreq.params.set(paramStart + FacetParams.FACET_PIVOT_MINCOUNT, shardMinCount);

View File

@ -118,14 +118,36 @@ public class PivotFacetFieldValueCollection implements Iterable<PivotFacetValue>
*/
public List<PivotFacetValue> getNextLevelValuesToRefine() {
final int numRefinableValues = getExplicitValuesListSize();
if (facetFieldOffset < numRefinableValues) {
final int offsetPlusCount = (facetFieldLimit >= 0)
? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues)
: numRefinableValues;
return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount);
} else {
if (numRefinableValues < facetFieldOffset) {
return Collections.<PivotFacetValue>emptyList();
}
final int offsetPlusCount = (facetFieldLimit >= 0)
? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues)
: numRefinableValues;
if (1 < facetFieldMinimumCount && facetFieldSort.equals(FacetParams.FACET_SORT_INDEX)) {
// we have to skip any values that (still) don't meet the mincount
//
// TODO: in theory we could avoid this extra check by trimming sooner (SOLR-6331)
// but since that's a destructive op that blows away the `valuesMap` which we (might?) still need
// (and pre-emptively skips the offsets) we're avoiding re-working that optimization
// for now until/unless someone gives it more careful thought...
final List<PivotFacetValue> results = new ArrayList<>(numRefinableValues);
for (PivotFacetValue pivotValue : explicitValues) {
if (pivotValue.getCount() >= facetFieldMinimumCount) {
results.add(pivotValue);
if (numRefinableValues <= results.size()) {
break;
}
}
}
return results;
}
// in the non "sort==count OR mincount==1" situation, we can just return the first N values
// because any viable candidate is already in the top N
return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount);
}
/**

View File

@ -17,6 +17,7 @@
package org.apache.solr.handler.component;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
@ -264,6 +265,93 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas
}
}
// check of a single level pivot using sort=index w/mincount big enough
// to triggers per-shard mincount > num docs on one shard
// (beefed up test of same with nested pivot below)
for (int limit : Arrays.asList(4, 444444, -1)) {
SolrParams p = params("q", "*:*",
"rows", "0",
// skip place_s:Nplaceholder buckets
"fq","-hiredate_dt:\"2012-10-01T12:30:00Z\"",
// skip company_t:compHolderN buckets from twoShard
"fq","-(+company_t:compHolder* +real_b:true)",
"facet","true",
"facet.pivot","place_s",
FacetParams.FACET_PIVOT_MINCOUNT, "50",
FacetParams.FACET_LIMIT, ""+limit,
"facet.sort", "index");
rsp = null;
try {
rsp = query( p );
assertPivot("place_s", "cardiff", 107, rsp.getFacetPivot().get("place_s").get(0));
// - zeroShard = 50 ... above per-shard min of 50/(numShards=4)
// - oneShard = 5 ... below per-shard min of 50/(numShards=4) .. should be refined
// - twoShard = 52 ... above per-shard min of 50/(numShards=4)
// = threeShard = 0 ... should be refined and still match nothing
} catch (AssertionError ae) {
throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
}
}
// test permutations of mincount & limit with sort=index
// (there is a per-shard optimization on mincount when sort=index is used)
for (int limit : Arrays.asList(4, 444444, -1)) {
SolrParams p = params("q", "*:*",
"rows", "0",
// skip place_s:Nplaceholder buckets
"fq","-hiredate_dt:\"2012-10-01T12:30:00Z\"",
// skip company_t:compHolderN buckets from twoShard
"fq","-(+company_t:compHolder* +real_b:true)",
"facet","true",
"facet.pivot","place_s,company_t",
FacetParams.FACET_PIVOT_MINCOUNT, "50",
FacetParams.FACET_LIMIT, ""+limit,
"facet.sort", "index");
rsp = null;
try {
rsp = query( p );
pivots = rsp.getFacetPivot().get("place_s,company_t");
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 107, firstPlace);
//
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
assertPivot("company_t", "honda", 50, firstPlace.getPivot().get(1));
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(2));
assertPivot("company_t", "polecat", 52, firstPlace.getPivot().get(3));
} catch (AssertionError ae) {
throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
}
}
{ // similar to the test above, but now force a restriction on the over request and allow
// terms that are early in index sort -- but don't meet the mincount overall -- to be considered
// in the first phase. (SOLR-12954)
SolrParams p = params("q", "*:*",
"rows", "0",
// skip company_t:compHolderN buckets from twoShard
"fq","-(+company_t:compHolder* +real_b:true)",
"facet","true",
"facet.pivot","place_s,company_t",
// the (50) Nplaceholder place_s values exist in 6 each on oneShard
FacetParams.FACET_PIVOT_MINCOUNT, ""+(6 * shardsArr.length),
FacetParams.FACET_LIMIT, "4",
"facet.sort", "index");
rsp = null;
try {
rsp = query( p );
pivots = rsp.getFacetPivot().get("place_s,company_t");
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 107, firstPlace);
//
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
assertPivot("company_t", "honda", 50, firstPlace.getPivot().get(1));
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(2));
assertPivot("company_t", "polecat", 52, firstPlace.getPivot().get(3));
} catch (AssertionError ae) {
throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
}
}
// Pivot Faceting (combined wtih Field Faceting)
for (SolrParams facetParams :
// with and w/o an excluded fq