mirror of https://github.com/apache/lucene.git
SOLR-12954: fix facet.pivot refinement bugs when using facet.sort=index and facet.mincount>1
This commit is contained in:
parent
31d7dfe6b1
commit
c5ff4a4444
|
@ -233,6 +233,8 @@ Bug Fixes
|
|||
* SOLR-12875: fix ArrayIndexOutOfBoundsException when unique(field) or uniqueBlock(_root_) is
|
||||
used with DVHASH method in json.facet. (Tim Underwood via Mikhail Khludnev)
|
||||
|
||||
* SOLR-12954: fix facet.pivot refinement bugs when using facet.sort=index and facet.mincount>1 (hossman)
|
||||
|
||||
Improvements
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -650,7 +650,7 @@ public class FacetComponent extends SearchComponent {
|
|||
(fieldToOverRequest, FacetParams.FACET_SORT, defaultSort);
|
||||
|
||||
int shardLimit = requestedLimit + offset;
|
||||
int shardMinCount = requestedMinCount;
|
||||
int shardMinCount = Math.min(requestedMinCount, 1);
|
||||
|
||||
// per-shard mincount & overrequest
|
||||
if ( FacetParams.FACET_SORT_INDEX.equals(sort) &&
|
||||
|
@ -670,7 +670,6 @@ public class FacetComponent extends SearchComponent {
|
|||
if ( 0 < requestedLimit ) {
|
||||
shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
|
||||
}
|
||||
shardMinCount = Math.min(requestedMinCount, 1);
|
||||
}
|
||||
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, shardLimit);
|
||||
sreq.params.set(paramStart + FacetParams.FACET_PIVOT_MINCOUNT, shardMinCount);
|
||||
|
|
|
@ -118,14 +118,36 @@ public class PivotFacetFieldValueCollection implements Iterable<PivotFacetValue>
|
|||
*/
|
||||
public List<PivotFacetValue> getNextLevelValuesToRefine() {
|
||||
final int numRefinableValues = getExplicitValuesListSize();
|
||||
if (facetFieldOffset < numRefinableValues) {
|
||||
final int offsetPlusCount = (facetFieldLimit >= 0)
|
||||
? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues)
|
||||
: numRefinableValues;
|
||||
return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount);
|
||||
} else {
|
||||
if (numRefinableValues < facetFieldOffset) {
|
||||
return Collections.<PivotFacetValue>emptyList();
|
||||
}
|
||||
|
||||
final int offsetPlusCount = (facetFieldLimit >= 0)
|
||||
? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues)
|
||||
: numRefinableValues;
|
||||
|
||||
if (1 < facetFieldMinimumCount && facetFieldSort.equals(FacetParams.FACET_SORT_INDEX)) {
|
||||
// we have to skip any values that (still) don't meet the mincount
|
||||
//
|
||||
// TODO: in theory we could avoid this extra check by trimming sooner (SOLR-6331)
|
||||
// but since that's a destructive op that blows away the `valuesMap` which we (might?) still need
|
||||
// (and pre-emptively skips the offsets) we're avoiding re-working that optimization
|
||||
// for now until/unless someone gives it more careful thought...
|
||||
final List<PivotFacetValue> results = new ArrayList<>(numRefinableValues);
|
||||
for (PivotFacetValue pivotValue : explicitValues) {
|
||||
if (pivotValue.getCount() >= facetFieldMinimumCount) {
|
||||
results.add(pivotValue);
|
||||
if (numRefinableValues <= results.size()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// in the non "sort==count OR mincount==1" situation, we can just return the first N values
|
||||
// because any viable candidate is already in the top N
|
||||
return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -264,6 +265,93 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas
|
|||
}
|
||||
}
|
||||
|
||||
// check of a single level pivot using sort=index w/mincount big enough
|
||||
// to triggers per-shard mincount > num docs on one shard
|
||||
// (beefed up test of same with nested pivot below)
|
||||
for (int limit : Arrays.asList(4, 444444, -1)) {
|
||||
SolrParams p = params("q", "*:*",
|
||||
"rows", "0",
|
||||
// skip place_s:Nplaceholder buckets
|
||||
"fq","-hiredate_dt:\"2012-10-01T12:30:00Z\"",
|
||||
// skip company_t:compHolderN buckets from twoShard
|
||||
"fq","-(+company_t:compHolder* +real_b:true)",
|
||||
"facet","true",
|
||||
"facet.pivot","place_s",
|
||||
FacetParams.FACET_PIVOT_MINCOUNT, "50",
|
||||
FacetParams.FACET_LIMIT, ""+limit,
|
||||
"facet.sort", "index");
|
||||
rsp = null;
|
||||
try {
|
||||
rsp = query( p );
|
||||
assertPivot("place_s", "cardiff", 107, rsp.getFacetPivot().get("place_s").get(0));
|
||||
// - zeroShard = 50 ... above per-shard min of 50/(numShards=4)
|
||||
// - oneShard = 5 ... below per-shard min of 50/(numShards=4) .. should be refined
|
||||
// - twoShard = 52 ... above per-shard min of 50/(numShards=4)
|
||||
// = threeShard = 0 ... should be refined and still match nothing
|
||||
} catch (AssertionError ae) {
|
||||
throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
|
||||
}
|
||||
}
|
||||
|
||||
// test permutations of mincount & limit with sort=index
|
||||
// (there is a per-shard optimization on mincount when sort=index is used)
|
||||
for (int limit : Arrays.asList(4, 444444, -1)) {
|
||||
SolrParams p = params("q", "*:*",
|
||||
"rows", "0",
|
||||
// skip place_s:Nplaceholder buckets
|
||||
"fq","-hiredate_dt:\"2012-10-01T12:30:00Z\"",
|
||||
// skip company_t:compHolderN buckets from twoShard
|
||||
"fq","-(+company_t:compHolder* +real_b:true)",
|
||||
"facet","true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
FacetParams.FACET_PIVOT_MINCOUNT, "50",
|
||||
FacetParams.FACET_LIMIT, ""+limit,
|
||||
"facet.sort", "index");
|
||||
rsp = null;
|
||||
try {
|
||||
rsp = query( p );
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 107, firstPlace);
|
||||
//
|
||||
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
|
||||
assertPivot("company_t", "honda", 50, firstPlace.getPivot().get(1));
|
||||
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(2));
|
||||
assertPivot("company_t", "polecat", 52, firstPlace.getPivot().get(3));
|
||||
} catch (AssertionError ae) {
|
||||
throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
|
||||
}
|
||||
}
|
||||
|
||||
{ // similar to the test above, but now force a restriction on the over request and allow
|
||||
// terms that are early in index sort -- but don't meet the mincount overall -- to be considered
|
||||
// in the first phase. (SOLR-12954)
|
||||
SolrParams p = params("q", "*:*",
|
||||
"rows", "0",
|
||||
// skip company_t:compHolderN buckets from twoShard
|
||||
"fq","-(+company_t:compHolder* +real_b:true)",
|
||||
"facet","true",
|
||||
"facet.pivot","place_s,company_t",
|
||||
// the (50) Nplaceholder place_s values exist in 6 each on oneShard
|
||||
FacetParams.FACET_PIVOT_MINCOUNT, ""+(6 * shardsArr.length),
|
||||
FacetParams.FACET_LIMIT, "4",
|
||||
"facet.sort", "index");
|
||||
rsp = null;
|
||||
try {
|
||||
rsp = query( p );
|
||||
pivots = rsp.getFacetPivot().get("place_s,company_t");
|
||||
firstPlace = pivots.get(0);
|
||||
assertPivot("place_s", "cardiff", 107, firstPlace);
|
||||
//
|
||||
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
|
||||
assertPivot("company_t", "honda", 50, firstPlace.getPivot().get(1));
|
||||
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(2));
|
||||
assertPivot("company_t", "polecat", 52, firstPlace.getPivot().get(3));
|
||||
} catch (AssertionError ae) {
|
||||
throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
|
||||
}
|
||||
}
|
||||
|
||||
// Pivot Faceting (combined wtih Field Faceting)
|
||||
for (SolrParams facetParams :
|
||||
// with and w/o an excluded fq
|
||||
|
|
Loading…
Reference in New Issue