mirror of https://github.com/apache/lucene.git
SOLR-12839: JSON 'terms' Faceting now supports a 'prelim_sort' option to use when initially selecting the top ranking buckets, prior to the final 'sort' option used after refinement.
This commit is contained in:
parent
5c4ab188eb
commit
5dc988f5ee
|
@ -130,7 +130,8 @@ Upgrade Notes
|
|||
New Features
|
||||
----------------------
|
||||
|
||||
(No Changes)
|
||||
* SOLR-12839: JSON 'terms' Faceting now supports a 'prelim_sort' option to use when initially selecting
|
||||
the top ranking buckets, prior to the final 'sort' option used after refinement. (hossman)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
|
|
@ -41,8 +41,18 @@ abstract class FacetRequestSorted extends FacetRequest {
|
|||
*/
|
||||
int overrefine = -1;
|
||||
long mincount;
|
||||
String sortVariable;
|
||||
SortDirection sortDirection;
|
||||
/**
|
||||
* The basic sorting to do on buckets, defaults to {@link FacetRequest.FacetSort#COUNT_DESC}
|
||||
* @see #prelim_sort
|
||||
*/
|
||||
FacetSort sort;
|
||||
/**
|
||||
* An optional "Pre-Sort" that defaults to null.
|
||||
* If specified, then the <code>prelim_sort</code> is used as an optimization in place of {@link #sort}
|
||||
* during collection, and the full {@link #sort} values are only computed for the top candidate buckets
|
||||
* (after refinement)
|
||||
*/
|
||||
FacetSort prelim_sort;
|
||||
RefineMethod refine; // null, NONE, or SIMPLE
|
||||
|
||||
@Override
|
||||
|
@ -137,8 +147,15 @@ public class FacetField extends FacetRequestSorted {
|
|||
if (method == FacetMethod.ENUM) {// at the moment these two are the same
|
||||
method = FacetMethod.STREAM;
|
||||
}
|
||||
if (method == FacetMethod.STREAM && sf.indexed() &&
|
||||
"index".equals(sortVariable) && sortDirection == SortDirection.asc && !ft.isPointField()) {
|
||||
if (method == FacetMethod.STREAM && sf.indexed() && !ft.isPointField() &&
|
||||
// wether we can use stream processing depends on wether this is a shard request, wether
|
||||
// re-sorting has been requested, and if the effective sort during collection is "index asc"
|
||||
( fcontext.isShard()
|
||||
// for a shard request, the effective per-shard sort must be index asc
|
||||
? FacetSort.INDEX_ASC.equals(null == prelim_sort ? sort : prelim_sort)
|
||||
// for a non-shard request, we can only use streaming if there is no pre-sorting
|
||||
: (null == prelim_sort && FacetSort.INDEX_ASC.equals( sort ) ) ) ) {
|
||||
|
||||
return new FacetFieldProcessorByEnumTermsStream(fcontext, this, sf);
|
||||
}
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@ public class FacetFieldMerger extends FacetRequestSortedMerger<FacetField> {
|
|||
result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue());
|
||||
}
|
||||
|
||||
sortBuckets();
|
||||
sortBuckets(freq.sort);
|
||||
|
||||
long first = freq.offset;
|
||||
long end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
|
||||
|
|
|
@ -18,8 +18,10 @@
|
|||
package org.apache.solr.search.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
|
@ -48,8 +50,11 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
SchemaField sf;
|
||||
SlotAcc indexOrderAcc;
|
||||
int effectiveMincount;
|
||||
|
||||
Map<String,AggValueSource> deferredAggs; // null if none
|
||||
final boolean singlePassSlotAccCollection;
|
||||
final FacetRequest.FacetSort sort; // never null (may be the user's requested sort, or the prelim_sort)
|
||||
final FacetRequest.FacetSort resort; // typically null (unless the user specified a prelim_sort)
|
||||
|
||||
final Map<String,AggValueSource> deferredAggs = new HashMap<String,AggValueSource>();
|
||||
|
||||
// TODO: push any of this down to base class?
|
||||
|
||||
|
@ -67,6 +72,37 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
super(fcontext, freq);
|
||||
this.sf = sf;
|
||||
this.effectiveMincount = (int)(fcontext.isShard() ? Math.min(1 , freq.mincount) : freq.mincount);
|
||||
this.singlePassSlotAccCollection = (freq.limit == -1 && freq.subFacets.size() == 0);
|
||||
|
||||
if ( null == freq.prelim_sort ) {
|
||||
// If the user has not specified any preliminary sort, then things are very simple.
|
||||
// Just use the "sort" as is w/o needing any re-sorting
|
||||
this.sort = freq.sort;
|
||||
this.resort = null;
|
||||
} else {
|
||||
assert null != freq.prelim_sort;
|
||||
|
||||
if ( fcontext.isShard() ) {
|
||||
// for a shard request, we can ignore the users requested "sort" and focus solely on the prelim_sort
|
||||
// the merger will worry about the final sorting -- we don't need to resort anything...
|
||||
this.sort = freq.prelim_sort;
|
||||
this.resort = null;
|
||||
|
||||
} else { // non shard...
|
||||
if ( singlePassSlotAccCollection ) { // special case situation...
|
||||
// when we can do a single pass SlotAcc collection on non-shard request, there is
|
||||
// no point re-sorting. Ignore the freq.prelim_sort and use the freq.sort option as is...
|
||||
this.sort = freq.sort;
|
||||
this.resort = null;
|
||||
} else {
|
||||
// for a non-shard request, we will use the prelim_sort as our initial sort option if it exists
|
||||
// then later we will re-sort on the final desired sort...
|
||||
this.sort = freq.prelim_sort;
|
||||
this.resort = freq.sort;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert null != this.sort;
|
||||
}
|
||||
|
||||
/** This is used to create accs for second phase (or to create accs for all aggs) */
|
||||
|
@ -86,17 +122,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
// reuse these accs, but reset them first and resize since size could be different
|
||||
for (SlotAcc acc : accs) {
|
||||
acc.reset();
|
||||
acc.resize(new SlotAcc.Resizer() {
|
||||
@Override
|
||||
public int getNewSize() {
|
||||
return slotCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNewSlot(int oldSlot) {
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
acc.resize(new FlatteningResizer(slotCount));
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
|
@ -121,33 +147,47 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
}
|
||||
}
|
||||
|
||||
void createCollectAcc(int numDocs, int numSlots) throws IOException {
|
||||
accMap = new LinkedHashMap<>();
|
||||
|
||||
// we always count...
|
||||
// allow a subclass to set a custom counter.
|
||||
if (countAcc == null) {
|
||||
countAcc = new CountSlotArrAcc(fcontext, numSlots);
|
||||
}
|
||||
|
||||
if ("count".equals(freq.sortVariable)) {
|
||||
sortAcc = countAcc;
|
||||
deferredAggs = freq.getFacetStats();
|
||||
} else if ("index".equals(freq.sortVariable)) {
|
||||
/**
|
||||
* Simple helper for checking if a {@FacetRequest.FacetSort} is on "count" or "index" and picking
|
||||
* the existing SlotAcc
|
||||
* @return an existing SlotAcc for sorting, else null if it should be built from the Aggs
|
||||
*/
|
||||
private SlotAcc getTrivialSortingSlotAcc(FacetRequest.FacetSort fsort) {
|
||||
if ("count".equals(fsort.sortVariable)) {
|
||||
assert null != countAcc;
|
||||
return countAcc;
|
||||
} else if ("index".equals(fsort.sortVariable)) {
|
||||
// allow subclass to set indexOrderAcc first
|
||||
if (indexOrderAcc == null) {
|
||||
// This sorting accumulator just goes by the slot number, so does not need to be collected
|
||||
// and hence does not need to find it's way into the accMap or accs array.
|
||||
indexOrderAcc = new SortSlotAcc(fcontext);
|
||||
}
|
||||
sortAcc = indexOrderAcc;
|
||||
deferredAggs = freq.getFacetStats();
|
||||
return indexOrderAcc;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
void createCollectAcc(int numDocs, int numSlots) throws IOException {
|
||||
accMap = new LinkedHashMap<>();
|
||||
|
||||
// start with the assumption that we're going to defer the computation of all stats
|
||||
deferredAggs.putAll(freq.getFacetStats());
|
||||
|
||||
// we always count...
|
||||
// allow a subclass to set a custom counter.
|
||||
if (countAcc == null) {
|
||||
countAcc = new CountSlotArrAcc(fcontext, numSlots);
|
||||
}
|
||||
|
||||
// If we are going to return all buckets and if there are no subfacets (that would need a domain), then don't defer
|
||||
// any aggregation calculations to a second phase. This way we can avoid calculating domains for each bucket, which
|
||||
// can be expensive.
|
||||
if (freq.limit == -1 && freq.subFacets.size() == 0) {
|
||||
sortAcc = getTrivialSortingSlotAcc(this.sort);
|
||||
|
||||
if (this.singlePassSlotAccCollection) {
|
||||
// If we are going to return all buckets, and if there are no subfacets (that would need a domain),
|
||||
// then don't defer any aggregation calculations to a second phase.
|
||||
// This way we can avoid calculating domains for each bucket, which can be expensive.
|
||||
|
||||
// TODO: BEGIN: why can't we just call createAccs here ?
|
||||
accs = new SlotAcc[ freq.getFacetStats().size() ];
|
||||
int otherAccIdx = 0;
|
||||
for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
|
||||
|
@ -157,6 +197,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
accMap.put(acc.key, acc);
|
||||
accs[otherAccIdx++] = acc;
|
||||
}
|
||||
// TODO: END: why can't we just call createAccs here ?
|
||||
if (accs.length == 1) {
|
||||
collectAcc = accs[0];
|
||||
} else {
|
||||
|
@ -164,26 +205,21 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
}
|
||||
|
||||
if (sortAcc == null) {
|
||||
sortAcc = accMap.get(freq.sortVariable);
|
||||
sortAcc = accMap.get(sort.sortVariable);
|
||||
assert sortAcc != null;
|
||||
}
|
||||
|
||||
deferredAggs = null;
|
||||
deferredAggs.clear();
|
||||
}
|
||||
|
||||
if (sortAcc == null) {
|
||||
AggValueSource sortAgg = freq.getFacetStats().get(freq.sortVariable);
|
||||
AggValueSource sortAgg = freq.getFacetStats().get(sort.sortVariable);
|
||||
if (sortAgg != null) {
|
||||
collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots);
|
||||
collectAcc.key = freq.sortVariable; // TODO: improve this
|
||||
collectAcc.key = sort.sortVariable; // TODO: improve this
|
||||
}
|
||||
sortAcc = collectAcc;
|
||||
deferredAggs = new HashMap<>(freq.getFacetStats());
|
||||
deferredAggs.remove(freq.sortVariable);
|
||||
}
|
||||
|
||||
if (deferredAggs == null || deferredAggs.size() == 0) {
|
||||
deferredAggs = null;
|
||||
deferredAggs.remove(sort.sortVariable);
|
||||
}
|
||||
|
||||
boolean needOtherAccs = freq.allBuckets; // TODO: use for missing too...
|
||||
|
@ -207,7 +243,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
return;
|
||||
}
|
||||
|
||||
int numDeferred = deferredAggs == null ? 0 : deferredAggs.size();
|
||||
final int numDeferred = deferredAggs.size();
|
||||
if (numDeferred <= 0) return;
|
||||
|
||||
otherAccs = new SlotAcc[ numDeferred ];
|
||||
|
@ -267,11 +303,13 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
} else {
|
||||
effectiveLimit += freq.overrequest;
|
||||
}
|
||||
} else if (null != resort && 0 < freq.overrequest) {
|
||||
// in non-shard situations, if we have a 'resort' we check for explicit overrequest > 0
|
||||
effectiveLimit += freq.overrequest;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final int sortMul = freq.sortDirection.getMultiplier();
|
||||
final int sortMul = sort.sortDirection.getMultiplier();
|
||||
|
||||
int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(freq.offset + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1);
|
||||
maxTopVals = Math.min(maxTopVals, slotCardinality);
|
||||
|
@ -358,31 +396,53 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
// moved missing fillBucket after we fill facet since it will reset all the accumulators.
|
||||
}
|
||||
|
||||
// if we are deep paging, we don't have to order the highest "offset" counts.
|
||||
int collectCount = Math.max(0, queue.size() - off);
|
||||
assert collectCount <= maxTopVals;
|
||||
int[] sortedSlots = new int[collectCount];
|
||||
for (int i = collectCount - 1; i >= 0; i--) {
|
||||
sortedSlots[i] = queue.pop().slot;
|
||||
final boolean needFilter = (!deferredAggs.isEmpty()) || freq.getSubFacets().size() > 0;
|
||||
if (needFilter) {
|
||||
createOtherAccs(-1, 1);
|
||||
}
|
||||
|
||||
ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount);
|
||||
res.add("buckets", bucketList);
|
||||
// if we are deep paging, we don't have to order the highest "offset" counts...
|
||||
// ...unless we need to resort.
|
||||
int collectCount = Math.max(0, queue.size() - (null == this.resort ? off : 0));
|
||||
//
|
||||
assert collectCount <= maxTopVals;
|
||||
Slot[] sortedSlots = new Slot[collectCount];
|
||||
for (int i = collectCount - 1; i >= 0; i--) {
|
||||
Slot slot = sortedSlots[i] = queue.pop();
|
||||
// At this point we know we're either returning this Slot as a Bucket, or resorting it,
|
||||
// so definitely fill in the bucket value -- we'll need it either way
|
||||
slot.bucketVal = bucketValFromSlotNumFunc.apply(slot.slot);
|
||||
|
||||
if (needFilter || null != this.resort) {
|
||||
slot.bucketFilter = makeBucketQuery(fieldQueryValFunc.apply(slot.bucketVal));
|
||||
}
|
||||
}
|
||||
|
||||
final SlotAcc resortAccForFill = resortSlots(sortedSlots); // No-Op if not needed
|
||||
|
||||
if (null != this.resort) {
|
||||
// now that we've completely resorted, throw away extra docs from possible offset/overrequest...
|
||||
final int endOffset = (int)Math.min((long) sortedSlots.length,
|
||||
// NOTE: freq.limit is long, so no risk of overflow here
|
||||
off + (freq.limit < 0 ? Integer.MAX_VALUE : freq.limit));
|
||||
if (0 < off || endOffset < sortedSlots.length) {
|
||||
sortedSlots = Arrays.copyOfRange(sortedSlots, off, endOffset);
|
||||
}
|
||||
}
|
||||
List<SimpleOrderedMap> bucketList = new ArrayList<>(sortedSlots.length);
|
||||
|
||||
boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
|
||||
|
||||
for (int slotNum : sortedSlots) {
|
||||
for (Slot slot : sortedSlots) {
|
||||
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
|
||||
Comparable val = bucketValFromSlotNumFunc.apply(slotNum);
|
||||
bucket.add("val", val);
|
||||
bucket.add("val", slot.bucketVal);
|
||||
|
||||
Query filter = needFilter ? makeBucketQuery(fieldQueryValFunc.apply(val)) : null;
|
||||
|
||||
fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter);
|
||||
fillBucketFromSlot(bucket, slot, resortAccForFill);
|
||||
|
||||
bucketList.add(bucket);
|
||||
}
|
||||
|
||||
res.add("buckets", bucketList);
|
||||
|
||||
|
||||
if (fcontext.isShard() && shardHasMoreBuckets) {
|
||||
// Currently, "more" is an internal implementation detail and only returned for distributed sub-requests
|
||||
res.add("more", true);
|
||||
|
@ -420,24 +480,38 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
}
|
||||
|
||||
private static class Slot {
|
||||
/** The Slot number used during collection */
|
||||
int slot;
|
||||
|
||||
/** filled in only once we know the bucket will either be involved in resorting, or returned */
|
||||
Comparable bucketVal;
|
||||
|
||||
/** Filled in if and only if needed for resorting, deferred stats, or subfacets */
|
||||
Query bucketFilter;
|
||||
// TODO: we could potentially store the bucket's (DocSet)subDomain as well,
|
||||
// but that's much bigger object to hang onto for every slot at the sametime
|
||||
// Probably best to just trust the filterCache to do it's job
|
||||
|
||||
/** The Slot number used during resorting */
|
||||
int resortSlotNum;
|
||||
}
|
||||
|
||||
private void fillBucket(SimpleOrderedMap<Object> target, int count, int slotNum, DocSet subDomain, Query filter) throws IOException {
|
||||
/** Helper method used solely when looping over buckets to be returned in findTopSlots */
|
||||
private void fillBucketFromSlot(SimpleOrderedMap<Object> target, Slot slot,
|
||||
SlotAcc resortAcc) throws IOException {
|
||||
final int count = countAcc.getCount(slot.slot);
|
||||
target.add("count", count);
|
||||
if (count <= 0 && !freq.processEmpty) return;
|
||||
|
||||
if (collectAcc != null && slotNum >= 0) {
|
||||
collectAcc.setValues(target, slotNum);
|
||||
if (collectAcc != null && slot.slot >= 0) {
|
||||
collectAcc.setValues(target, slot.slot);
|
||||
}
|
||||
|
||||
createOtherAccs(-1, 1);
|
||||
|
||||
if (otherAccs == null && freq.subFacets.isEmpty()) return;
|
||||
|
||||
if (subDomain == null) {
|
||||
subDomain = fcontext.searcher.getDocSet(filter, fcontext.base);
|
||||
}
|
||||
assert null != slot.bucketFilter;
|
||||
final Query filter = slot.bucketFilter;
|
||||
final DocSet subDomain = fcontext.searcher.getDocSet(filter, fcontext.base);
|
||||
|
||||
// if no subFacets, we only need a DocSet
|
||||
// otherwise we need more?
|
||||
|
@ -449,15 +523,119 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
if (otherAccs != null) {
|
||||
// do acc at a time (traversing domain each time) or do all accs for each doc?
|
||||
for (SlotAcc acc : otherAccs) {
|
||||
acc.reset(); // TODO: only needed if we previously used for allBuckets or missing
|
||||
acc.collect(subDomain, 0, slot -> { return new SlotContext(filter); });
|
||||
acc.setValues(target, 0);
|
||||
if (acc == resortAcc) {
|
||||
// already collected, just need to get the value from the correct slot
|
||||
acc.setValues(target, slot.resortSlotNum);
|
||||
} else {
|
||||
acc.reset(); // TODO: only needed if we previously used for allBuckets or missing
|
||||
acc.collect(subDomain, 0, s -> { return new SlotContext(filter); });
|
||||
acc.setValues(target, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
processSubs(target, filter, subDomain, false, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method that resorts the slots (if needed).
|
||||
*
|
||||
* @return a SlotAcc that should be used {@link SlotAcc#setValues} on the final buckets via
|
||||
* {@link Slot#resortSlotNum} or null if no special SlotAcc was needed (ie: no resorting, or resorting
|
||||
* on something already known/collected)
|
||||
*/
|
||||
private SlotAcc resortSlots(Slot[] slots) throws IOException {
|
||||
if (null == this.resort) {
|
||||
return null; // Nothing to do.
|
||||
}
|
||||
assert ! fcontext.isShard();
|
||||
|
||||
// NOTE: getMultiplier() is confusing and weird and ment for use in PriorityQueue.lessThan,
|
||||
// so it's backwards from what you'd expect in a Comparator...
|
||||
final int resortMul = -1 * resort.sortDirection.getMultiplier();
|
||||
|
||||
SlotAcc resortAcc = getTrivialSortingSlotAcc(this.resort);
|
||||
if (null != resortAcc) {
|
||||
// resorting on count or index is rare (and not particularly useful) but if someone chooses to do
|
||||
// either of these we don't need to re-collect ... instead just re-sort the slots based on
|
||||
// the previously collected values using the originally collected slot numbers...
|
||||
if (resortAcc.equals(countAcc)) {
|
||||
final Comparator<Slot> comparator = null != indexOrderAcc ?
|
||||
(new Comparator<Slot>() {
|
||||
public int compare(Slot x, Slot y) {
|
||||
final int cmp = resortMul * countAcc.compare(x.slot, y.slot);
|
||||
return cmp != 0 ? cmp : indexOrderAcc.compare(x.slot, y.slot);
|
||||
}
|
||||
})
|
||||
: (new Comparator<Slot>() {
|
||||
public int compare(Slot x, Slot y) {
|
||||
final int cmp = resortMul * countAcc.compare(x.slot, y.slot);
|
||||
return cmp != 0 ? cmp : Integer.compare(x.slot, y.slot);
|
||||
}
|
||||
});
|
||||
Arrays.sort(slots, comparator);
|
||||
return null;
|
||||
}
|
||||
if (resortAcc.equals(indexOrderAcc)) {
|
||||
// obviously indexOrderAcc is not null, and no need for a fancy tie breaker...
|
||||
Arrays.sort(slots, new Comparator<Slot>() {
|
||||
public int compare(Slot x, Slot y) {
|
||||
return resortMul * indexOrderAcc.compare(x.slot, y.slot);
|
||||
}
|
||||
});
|
||||
return null;
|
||||
}
|
||||
// nothing else should be possible
|
||||
assert false : "trivial resort isn't count or index: " + this.resort;
|
||||
}
|
||||
|
||||
assert null == resortAcc;
|
||||
for (SlotAcc acc : otherAccs) {
|
||||
if (acc.key.equals(this.resort.sortVariable)) {
|
||||
resortAcc = acc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// TODO: what if resortAcc is still null, ie: bad input? ... throw an error? (see SOLR-13022)
|
||||
// looks like equivilent sort code path silently ignores sorting if sortVariable isn't in accMap...
|
||||
// ...and we get a deffered NPE when trying to collect.
|
||||
assert null != resortAcc;
|
||||
|
||||
final SlotAcc acc = resortAcc;
|
||||
|
||||
// reset resortAcc to be (just) big enough for all the slots we care about...
|
||||
acc.reset();
|
||||
acc.resize(new FlatteningResizer(slots.length));
|
||||
|
||||
// give each existing Slot a new resortSlotNum and let the resortAcc collect it...
|
||||
for (int slotNum = 0; slotNum < slots.length; slotNum++) {
|
||||
Slot slot = slots[slotNum];
|
||||
slot.resortSlotNum = slotNum;
|
||||
|
||||
assert null != slot.bucketFilter : "null filter for slot=" +slot.bucketVal;
|
||||
|
||||
final DocSet subDomain = fcontext.searcher.getDocSet(slot.bucketFilter, fcontext.base);
|
||||
acc.collect(subDomain, slotNum, s -> { return new SlotContext(slot.bucketFilter); } );
|
||||
}
|
||||
|
||||
// now resort all the Slots according to the new collected values...
|
||||
final Comparator<Slot> comparator = null != indexOrderAcc ?
|
||||
(new Comparator<Slot>() {
|
||||
public int compare(Slot x, Slot y) {
|
||||
final int cmp = resortMul * acc.compare(x.resortSlotNum, y.resortSlotNum);
|
||||
return cmp != 0 ? cmp : indexOrderAcc.compare(x.slot, y.slot);
|
||||
}
|
||||
})
|
||||
: (new Comparator<Slot>() {
|
||||
public int compare(Slot x, Slot y) {
|
||||
final int cmp = resortMul * acc.compare(x.resortSlotNum, y.resortSlotNum);
|
||||
return cmp != 0 ? cmp : Integer.compare(x.slot, y.slot);
|
||||
}
|
||||
});
|
||||
Arrays.sort(slots, comparator);
|
||||
return acc;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processStats(SimpleOrderedMap<Object> bucket, Query bucketQ, DocSet docs, int docCount) throws IOException {
|
||||
if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
|
||||
|
@ -733,4 +911,20 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
return bucket;
|
||||
}
|
||||
|
||||
/** Resizes to the specified size, remapping all existing slots to slot 0 */
|
||||
private static final class FlatteningResizer extends SlotAcc.Resizer {
|
||||
private final int slotCount;
|
||||
public FlatteningResizer(int slotCount) {
|
||||
this.slotCount = slotCount;
|
||||
}
|
||||
@Override
|
||||
public int getNewSize() {
|
||||
return slotCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNewSlot(int oldSlot) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,8 +44,8 @@ public class FacetRangeMerger extends FacetRequestSortedMerger<FacetRange> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void sortBuckets() {
|
||||
// regardless of mincount, every shard returns a consistent set of buckets which are already in the correct order
|
||||
public void sortBuckets(final FacetRequest.FacetSort sort) {
|
||||
// regardless of sort or mincount, every shard returns a consistent set of buckets which are already in the correct order
|
||||
sortedBuckets = new ArrayList<>( buckets.values() );
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.ArrayList;
|
|||
import java.util.EnumSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -54,8 +55,40 @@ import static org.apache.solr.search.facet.FacetRequest.RefineMethod.NONE;
|
|||
*/
|
||||
public abstract class FacetRequest {
|
||||
|
||||
/** Simple structure for encapsulating a sort variable and a direction */
|
||||
public static final class FacetSort {
|
||||
final String sortVariable;
|
||||
final SortDirection sortDirection;
|
||||
public FacetSort(final String sortVariable, final SortDirection sortDirection) {
|
||||
assert null != sortVariable;
|
||||
assert null != sortDirection;
|
||||
|
||||
this.sortVariable = sortVariable;
|
||||
this.sortDirection = sortDirection;
|
||||
}
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof FacetSort) {
|
||||
final FacetSort that = (FacetSort)other;
|
||||
return this.sortVariable.equals(that.sortVariable)
|
||||
&& this.sortDirection.equals(that.sortDirection);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public int hashCode() {
|
||||
return Objects.hash(sortVariable, sortDirection);
|
||||
}
|
||||
public String toString() {
|
||||
return sortVariable + " " + sortDirection;
|
||||
}
|
||||
|
||||
/** Commonly Re-used "count desc" (default) */
|
||||
public static final FacetSort COUNT_DESC = new FacetSort("count", SortDirection.desc);
|
||||
/** Commonly Re-used "index asc" (index order / streaming) */
|
||||
public static final FacetSort INDEX_ASC = new FacetSort("index", SortDirection.asc);
|
||||
}
|
||||
|
||||
public static enum SortDirection {
|
||||
asc(-1) ,
|
||||
asc(-1),
|
||||
desc(1);
|
||||
|
||||
private final int multiplier;
|
||||
|
@ -893,8 +926,7 @@ class FacetFieldParser extends FacetParser<FacetField> {
|
|||
if (arg instanceof String) {
|
||||
// just the field name...
|
||||
facet.field = (String)arg;
|
||||
parseSort( null ); // TODO: defaults
|
||||
|
||||
|
||||
} else if (arg instanceof Map) {
|
||||
Map<String, Object> m = (Map<String, Object>) arg;
|
||||
facet.field = getField(m);
|
||||
|
@ -921,7 +953,13 @@ class FacetFieldParser extends FacetParser<FacetField> {
|
|||
Object o = m.get("facet");
|
||||
parseSubs(o);
|
||||
|
||||
parseSort( m.get(SORT) );
|
||||
// TODO: SOLR-13022 ... validate the sortVariabls against the subs.
|
||||
facet.sort = parseSort( m.get(SORT) );
|
||||
facet.prelim_sort = parseSort( m.get("prelim_sort") );
|
||||
}
|
||||
|
||||
if (null == facet.sort) {
|
||||
facet.sort = FacetRequest.FacetSort.COUNT_DESC;
|
||||
}
|
||||
|
||||
return facet;
|
||||
|
@ -932,21 +970,23 @@ class FacetFieldParser extends FacetParser<FacetField> {
|
|||
// sort : 'mystat desc'
|
||||
// OR
|
||||
// sort : { mystat : 'desc' }
|
||||
private void parseSort(Object sort) {
|
||||
private static FacetRequest.FacetSort parseSort(Object sort) {
|
||||
if (sort == null) {
|
||||
facet.sortVariable = "count";
|
||||
facet.sortDirection = FacetRequest.SortDirection.desc;
|
||||
return null;
|
||||
} else if (sort instanceof String) {
|
||||
String sortStr = (String)sort;
|
||||
if (sortStr.endsWith(" asc")) {
|
||||
facet.sortVariable = sortStr.substring(0, sortStr.length()-" asc".length());
|
||||
facet.sortDirection = FacetRequest.SortDirection.asc;
|
||||
return new FacetRequest.FacetSort(sortStr.substring(0, sortStr.length()-" asc".length()),
|
||||
FacetRequest.SortDirection.asc);
|
||||
} else if (sortStr.endsWith(" desc")) {
|
||||
facet.sortVariable = sortStr.substring(0, sortStr.length()-" desc".length());
|
||||
facet.sortDirection = FacetRequest.SortDirection.desc;
|
||||
return new FacetRequest.FacetSort(sortStr.substring(0, sortStr.length()-" desc".length()),
|
||||
FacetRequest.SortDirection.desc);
|
||||
} else {
|
||||
facet.sortVariable = sortStr;
|
||||
facet.sortDirection = "index".equals(facet.sortVariable) ? FacetRequest.SortDirection.asc : FacetRequest.SortDirection.desc; // default direction for "index" is ascending
|
||||
return new FacetRequest.FacetSort(sortStr,
|
||||
// default direction for "index" is ascending
|
||||
("index".equals(sortStr)
|
||||
? FacetRequest.SortDirection.asc
|
||||
: FacetRequest.SortDirection.desc));
|
||||
}
|
||||
} else {
|
||||
// sort : { myvar : 'desc' }
|
||||
|
@ -955,10 +995,8 @@ class FacetFieldParser extends FacetParser<FacetField> {
|
|||
Map.Entry<String,Object> entry = map.entrySet().iterator().next();
|
||||
String k = entry.getKey();
|
||||
Object v = entry.getValue();
|
||||
facet.sortVariable = k;
|
||||
facet.sortDirection = FacetRequest.SortDirection.valueOf(v.toString());
|
||||
return new FacetRequest.FacetSort(k, FacetRequest.SortDirection.valueOf(v.toString()));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -79,25 +79,27 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
|
|||
}
|
||||
}
|
||||
|
||||
public void sortBuckets() {
|
||||
|
||||
public void sortBuckets(final FacetRequest.FacetSort sort) {
|
||||
// NOTE: we *always* re-init from buckets, because it may have been modified post-refinement
|
||||
sortedBuckets = new ArrayList<>( buckets.values() );
|
||||
|
||||
Comparator<FacetBucket> comparator = null;
|
||||
|
||||
final FacetRequest.SortDirection direction = freq.sortDirection;
|
||||
final FacetRequest.SortDirection direction = sort.sortDirection;
|
||||
final int sortMul = direction.getMultiplier();
|
||||
|
||||
if ("count".equals(freq.sortVariable)) {
|
||||
if ("count".equals(sort.sortVariable)) {
|
||||
comparator = (o1, o2) -> {
|
||||
int v = -Long.compare(o1.count, o2.count) * sortMul;
|
||||
return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v;
|
||||
};
|
||||
Collections.sort(sortedBuckets, comparator);
|
||||
} else if ("index".equals(freq.sortVariable)) {
|
||||
} else if ("index".equals(sort.sortVariable)) {
|
||||
comparator = (o1, o2) -> -o1.bucketValue.compareTo(o2.bucketValue) * sortMul;
|
||||
Collections.sort(sortedBuckets, comparator);
|
||||
} else {
|
||||
final String key = freq.sortVariable;
|
||||
final String key = sort.sortVariable;
|
||||
|
||||
/**
|
||||
final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()];
|
||||
|
@ -154,6 +156,7 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
|
|||
out.addAll(nulls);
|
||||
sortedBuckets = out;
|
||||
}
|
||||
assert null != sortedBuckets;
|
||||
}
|
||||
|
||||
boolean isBucketComplete(FacetBucket bucket, Context mcontext) {
|
||||
|
@ -181,6 +184,8 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
|
|||
return null;
|
||||
}
|
||||
|
||||
final FacetRequest.FacetSort initial_sort = null == freq.prelim_sort ? freq.sort : freq.prelim_sort;
|
||||
|
||||
// Tags for sub facets that have partial facets somewhere in their children.
|
||||
// If we are missing a bucket for this shard, we'll need to get the specific buckets that need refining.
|
||||
Collection<String> tagsWithPartial = mcontext.getSubsWithPartial(freq);
|
||||
|
@ -206,9 +211,9 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
|
|||
|
||||
// when we don't have to worry about mincount pruning, there is no need for any
|
||||
// over refinement for these sorts..
|
||||
if (freq.mincount <= 1 && ("index".equals(freq.sortVariable)
|
||||
|| ("count".equals(freq.sortVariable)
|
||||
&& FacetRequest.SortDirection.desc == freq.sortDirection))) {
|
||||
if (freq.mincount <= 1 && ("index".equals(initial_sort.sortVariable)
|
||||
|| ("count".equals(initial_sort.sortVariable)
|
||||
&& FacetRequest.SortDirection.desc == initial_sort.sortDirection))) {
|
||||
// No-Op
|
||||
} else if (0 <= freq.overrequest) {
|
||||
// if user asked for an explicit amount of overrequesting,
|
||||
|
@ -241,9 +246,9 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
|
|||
// todo: but we may need to filter.... simplify by always sorting?
|
||||
bucketList = buckets.values();
|
||||
} else {
|
||||
// only sort once
|
||||
// don't re-sort (the prerefinement values) if our subclass already did it
|
||||
if (sortedBuckets == null) {
|
||||
sortBuckets(); // todo: make sure this filters buckets as well
|
||||
sortBuckets(initial_sort); // todo: make sure this filters buckets as well
|
||||
}
|
||||
bucketList = sortedBuckets;
|
||||
}
|
||||
|
|
|
@ -39,10 +39,10 @@ class DebugAgg extends AggValueSource {
|
|||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
parses.incrementAndGet();
|
||||
final String what = fp.hasMoreArguments() ? fp.parseId() : "debug";
|
||||
final String what = fp.hasMoreArguments() ? fp.parseId() : "wrap";
|
||||
|
||||
switch (what) {
|
||||
case "debug": return new DebugAgg(fp.getLocalParams());
|
||||
case "wrap": return new DebugAgg(fp);
|
||||
case "numShards": return new DebugAggNumShards();
|
||||
default: /* No-Op */
|
||||
}
|
||||
|
@ -59,14 +59,17 @@ class DebugAgg extends AggValueSource {
|
|||
* wrap them in defaults from the request
|
||||
*/
|
||||
public final SolrParams localParams;
|
||||
public DebugAgg(SolrParams localParams) {
|
||||
public final AggValueSource inner;
|
||||
|
||||
public DebugAgg(FunctionQParser fp) throws SyntaxError {
|
||||
super("debug");
|
||||
this.localParams = localParams;
|
||||
this.localParams = fp.getLocalParams();
|
||||
this.inner = fp.hasMoreArguments() ? fp.parseAgg(FunctionQParser.FLAG_IS_AGG) : new CountAgg();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
|
||||
return new Acc(fcontext, numDocs, numSlots);
|
||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||
return new Acc(fcontext, numDocs, numSlots, inner.createSlotAcc(fcontext, numDocs, numSlots));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -83,26 +86,35 @@ class DebugAgg extends AggValueSource {
|
|||
public static AtomicLong creates = new AtomicLong(0);
|
||||
public static AtomicLong resets = new AtomicLong(0);
|
||||
public static AtomicLong resizes = new AtomicLong(0);
|
||||
public static AtomicLong collectDocs = new AtomicLong(0);
|
||||
public static AtomicLong collectDocSets = new AtomicLong(0);
|
||||
public static Acc last;
|
||||
|
||||
public CountSlotAcc sub;
|
||||
public SlotAcc sub;
|
||||
public int numDocs;
|
||||
public int numSlots;
|
||||
|
||||
public Acc(FacetContext fcontext, int numDocs, int numSlots) {
|
||||
public Acc(FacetContext fcontext, int numDocs, int numSlots, SlotAcc sub) {
|
||||
super(fcontext);
|
||||
this.last = this;
|
||||
this.numDocs = numDocs;
|
||||
this.numSlots = numSlots;
|
||||
this.sub = sub;
|
||||
creates.addAndGet(1);
|
||||
sub = new CountSlotArrAcc(fcontext, numSlots);
|
||||
// new RuntimeException("DEBUG Acc numSlots=" + numSlots).printStackTrace();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
|
||||
collectDocs.addAndGet(1);
|
||||
sub.collect(doc, slot, slotContext);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int collect(DocSet docs, int slot, IntFunction<SlotContext> slotContext) throws IOException {
|
||||
collectDocSets.addAndGet(1);
|
||||
return sub.collect(docs, slot, slotContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slotA, int slotB) {
|
||||
|
@ -137,11 +149,6 @@ class DebugAgg extends AggValueSource {
|
|||
sub.setNextReader(readerContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int collect(DocSet docs, int slot, IntFunction<SlotContext> slotContext) throws IOException {
|
||||
return sub.collect(docs, slot, slotContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
|
||||
sub.key = this.key; // TODO: Blech... this should be fixed
|
||||
|
@ -156,7 +163,7 @@ class DebugAgg extends AggValueSource {
|
|||
|
||||
@Override
|
||||
public FacetMerger createFacetMerger(Object prototype) {
|
||||
return new FacetLongMerger();
|
||||
return inner.createFacetMerger(prototype);
|
||||
}
|
||||
|
||||
/** A simple agg that just returns the number of shards contributing to a bucket */
|
||||
|
|
|
@ -426,6 +426,133 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
|
|||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* When <code>prelim_sort</code> is used, all 'top bucket' choices for refinement should still be based on
|
||||
* it, not the <code>sort</code> param, so this test is just some sanity checks that the presence of the
|
||||
* these params doesn't break anything in the refine / logic.
|
||||
*/
|
||||
@Test
|
||||
public void testRefinementMergingWithPrelimSort() throws Exception {
|
||||
|
||||
doTestRefine("{x : { type:terms, field:X, limit:2, refine:true, prelim_sort:'count desc', sort:'y asc'," +
|
||||
" facet:{ y:'sum(y_i)' } } }",
|
||||
// shard0 response
|
||||
"{x: {buckets:[{val:x1, count:5, y:73}, {val:x2, count:3, y:13}], more:true } }",
|
||||
// shard1 response
|
||||
"{x: {buckets:[{val:x2, count:4, y:4}, {val:x3, count:2, y:22}], more:true } }",
|
||||
// shard0 expected refinement info
|
||||
null,
|
||||
// shard1 expected refinement info
|
||||
"=={x:{_l:[x1]}}");
|
||||
|
||||
// same test as above, but shard1 indicates it doesn't have any more results,
|
||||
// so there shouldn't be any refinement
|
||||
doTestRefine("{x : { type:terms, field:X, limit:2, refine:true, prelim_sort:'count desc', sort:'y asc'," +
|
||||
" facet:{ y:'sum(y_i)' } } }",
|
||||
// shard0 response
|
||||
"{x: {buckets:[{val:x1, count:5, y:73}, {val:x2, count:3, y:13}], more:true } }",
|
||||
// shard1 response
|
||||
"{x: {buckets:[{val:x2, count:4, y:4}, {val:x3, count:2, y:22}] } }",
|
||||
// shard0 expected refinement info
|
||||
null,
|
||||
// shard1 expected refinement info
|
||||
null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPrelimSortingWithRefinement() throws Exception {
|
||||
// NOTE: distributed prelim_sort testing in TestJsonFacets uses identical shards, so never needs
|
||||
// refinement, so here we focus on the (re)sorting of different topN refined buckets
|
||||
// after the prelim_sorting from diff shards
|
||||
|
||||
initServers();
|
||||
final Client client = servers.getClient(random().nextInt());
|
||||
client.queryDefaults().set("shards", servers.getShards(), "debugQuery", Boolean.toString(random().nextBoolean()));
|
||||
|
||||
List<SolrClient> clients = client.getClientProvider().all();
|
||||
assertTrue(clients.size() >= 3); // we only use 2, but assert 3 to also test empty shard
|
||||
final SolrClient c0 = clients.get(0);
|
||||
final SolrClient c1 = clients.get(1);
|
||||
|
||||
client.deleteByQuery("*:*", null);
|
||||
int id = 0;
|
||||
|
||||
// client 0 // shard1: A=1,B=1,C=2 ...
|
||||
c0.add(sdoc("id", id++, "cat_s","A", "price_i","1"));
|
||||
c0.add(sdoc("id", id++, "cat_s","B", "price_i","1"));
|
||||
c0.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
|
||||
c0.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
|
||||
// ... X=3,Y=3
|
||||
c0.add(sdoc("id", id++, "cat_s","X", "price_i","1"));
|
||||
c0.add(sdoc("id", id++, "cat_s","X", "price_i","1"));
|
||||
c0.add(sdoc("id", id++, "cat_s","X", "price_i","1"));
|
||||
c0.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
|
||||
c0.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
|
||||
c0.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
|
||||
|
||||
// client 1 // shard2: X=1,Y=2,Z=2 ...
|
||||
c1.add(sdoc("id", id++, "cat_s","X", "price_i","1"));
|
||||
c1.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
|
||||
c1.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
|
||||
c1.add(sdoc("id", id++, "cat_s","Z", "price_i","1"));
|
||||
c1.add(sdoc("id", id++, "cat_s","Z", "price_i","1"));
|
||||
// ... C=4
|
||||
c1.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
|
||||
c1.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
|
||||
c1.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
|
||||
c1.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
|
||||
|
||||
// Whole Collection: A=1,B=1,Z=2,X=4,Y=5,C=6
|
||||
client.commit();
|
||||
|
||||
// in both cases, neither C nor Z make the cut for the top3 buckets in phase#1 (due to tie breaker),
|
||||
// so they aren't refined -- after refinement the re-sorting re-orders the buckets
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet", "{"
|
||||
+ " cat_1 : { type:terms, field:cat_s, limit:3, overrequest:0"
|
||||
+ " , refine:true, prelim_sort:'count asc', sort:'index desc' }, "
|
||||
+ " cat_2 : { type:terms, field:cat_s, limit:3, overrequest:0"
|
||||
+ " , refine:true, prelim_sort:'sum_p asc', sort:'count desc' "
|
||||
+ " , facet: { sum_p: 'sum(price_i)' } }"
|
||||
+ "}")
|
||||
, "facets=={ count: "+id+","
|
||||
+ " cat_1:{ buckets:[ "
|
||||
+ " {val:X,count:4}," // index desc
|
||||
+ " {val:B,count:1},"
|
||||
+ " {val:A,count:1},"
|
||||
+ " ] },"
|
||||
+ " cat_2:{ buckets:[ "
|
||||
+ " {val:X,count:4,sum_p:4.0}," // count desc
|
||||
+ " {val:A,count:1,sum_p:1.0}," // index order tie break
|
||||
+ " {val:B,count:1,sum_p:1.0},"
|
||||
+ " ] }"
|
||||
+ "}"
|
||||
);
|
||||
|
||||
// with some explicit overrefinement=2, we also refine C and Y, giving us those additional
|
||||
// (fully populated) buckets to consider during re-sorting...
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet", "{"
|
||||
+ " cat_1 : { type:terms, field:cat_s, limit:3, overrequest:0, overrefine:2"
|
||||
+ " , refine:true, prelim_sort:'count asc', sort:'index desc' }, "
|
||||
+ " cat_2 : { type:terms, field:cat_s, limit:3, overrequest:0, overrefine:2"
|
||||
+ " , refine:true, prelim_sort:'sum_p asc', sort:'count desc' "
|
||||
+ " , facet: { sum_p: 'sum(price_i)' } }"
|
||||
+ "}")
|
||||
, "facets=={ count: "+id+","
|
||||
+ " cat_1:{ buckets:[ "
|
||||
+ " {val:Y,count:5}," // index desc
|
||||
+ " {val:X,count:4},"
|
||||
+ " {val:C,count:6},"
|
||||
+ " ] },"
|
||||
+ " cat_2:{ buckets:[ "
|
||||
+ " {val:C,count:6,sum_p:6.0}," // count desc
|
||||
+ " {val:Y,count:5,sum_p:5.0},"
|
||||
+ " {val:X,count:4,sum_p:4.0},"
|
||||
+ " ] }"
|
||||
+ "}"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSortedFacetRefinementPushingNonRefinedBucketBackIntoTopN() throws Exception {
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import com.tdunning.math.stats.AVLTreeDigest;
|
||||
|
@ -50,7 +51,7 @@ import org.junit.Test;
|
|||
|
||||
@LuceneTestCase.SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Lucene45","Appending"})
|
||||
public class TestJsonFacets extends SolrTestCaseHS {
|
||||
|
||||
|
||||
private static SolrInstances servers; // for distributed testing
|
||||
private static int origTableSize;
|
||||
private static FacetField.FacetMethod origDefaultFacetMethod;
|
||||
|
@ -89,13 +90,20 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
}
|
||||
}
|
||||
|
||||
// tip: when debugging a test, comment out the @ParametersFactory and edit the constructor to be no-arg
|
||||
// tip: when debugging failures, change this variable to DEFAULT_METHOD
|
||||
// (or if only one method is problematic, set to that explicitly)
|
||||
private static final FacetField.FacetMethod TEST_ONLY_ONE_FACET_METHOD
|
||||
= null; // FacetField.FacetMethod.DEFAULT_METHOD;
|
||||
|
||||
@ParametersFactory
|
||||
public static Iterable<Object[]> parameters() {
|
||||
if (null != TEST_ONLY_ONE_FACET_METHOD) {
|
||||
return Arrays.<Object[]>asList(new Object[] { TEST_ONLY_ONE_FACET_METHOD });
|
||||
}
|
||||
|
||||
// wrap each enum val in an Object[] and return as Iterable
|
||||
return () -> Arrays.stream(FacetField.FacetMethod.values())
|
||||
.map(it -> new Object[]{it}).iterator();
|
||||
.map(it -> new Object[]{it}).iterator();
|
||||
}
|
||||
|
||||
public TestJsonFacets(FacetField.FacetMethod defMethod) {
|
||||
|
@ -435,18 +443,28 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
+ " } }"
|
||||
);
|
||||
|
||||
// simple single level facet w/skg stat & sorting
|
||||
for (String sort : Arrays.asList("index asc", "skg desc")) {
|
||||
// the relatedness score of each of our cat_s values is (conviniently) also alphabetical order
|
||||
// so both of these sort options should produce identical output
|
||||
// and testinging "index" sort allows the randomized use of "stream" processor as default to be tested
|
||||
// simple single level facet w/skg stat & (re)sorting
|
||||
for (String sort : Arrays.asList("sort:'index asc'",
|
||||
"sort:'y desc'",
|
||||
"sort:'z desc'",
|
||||
"sort:'skg desc'",
|
||||
"prelim_sort:'count desc', sort:'index asc'",
|
||||
"prelim_sort:'count desc', sort:'y desc'",
|
||||
"prelim_sort:'count desc', sort:'z desc'",
|
||||
"prelim_sort:'count desc', sort:'skg desc'")) {
|
||||
// the relatedness score of each of our cat_s values is (conviniently) also alphabetical order,
|
||||
// (and the same order as 'sum(num_i) desc' & 'min(num_i) desc')
|
||||
//
|
||||
// So all of these re/sort options should produce identical output (since the num buckets is < limit)
|
||||
// - Testing "index" sort allows the randomized use of "stream" processor as default to be tested.
|
||||
// - Testing (re)sorts on other stats sanity checks code paths where relatedness() is a "defered" Agg
|
||||
assertJQ(req("q", "cat_s:[* TO *]", "rows", "0",
|
||||
"fore", "where_s:NY", "back", "*:*",
|
||||
"json.facet", ""
|
||||
+ "{x: { type: terms, field: 'cat_s', sort: '"+sort+"', "
|
||||
+ " facet: { skg: 'relatedness($fore,$back)' } } }")
|
||||
+ "{x: { type: terms, field: 'cat_s', "+sort+", "
|
||||
+ " facet: { skg: 'relatedness($fore,$back)', y:'sum(num_i)', z:'min(num_i)' } } }")
|
||||
, "facets=={count:5, x:{ buckets:["
|
||||
+ " { val:'A', count:2, "
|
||||
+ " { val:'A', count:2, y:5.0, z:2, "
|
||||
+ " skg : { relatedness: 0.00554, "
|
||||
//+ " foreground_count: 1, "
|
||||
//+ " foreground_size: 2, "
|
||||
|
@ -455,7 +473,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
+ " foreground_popularity: 0.16667,"
|
||||
+ " background_popularity: 0.33333, },"
|
||||
+ " }, "
|
||||
+ " { val:'B', count:3, "
|
||||
+ " { val:'B', count:3, y:-3.0, z:-5, "
|
||||
+ " skg : { relatedness: 0.0, " // perfectly average and uncorrolated
|
||||
//+ " foreground_count: 1, "
|
||||
//+ " foreground_size: 2, "
|
||||
|
@ -467,6 +485,37 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
);
|
||||
}
|
||||
|
||||
// trivial sanity check that we can (re)sort on SKG after pre-sorting on count...
|
||||
// ...and it's only computed for the top N buckets (based on our pre-sort)
|
||||
for (int overrequest : Arrays.asList(0, 1, 42)) {
|
||||
// based on our counts & relatedness values, the blackbox output should be the same for both
|
||||
// overrequest values ... only DebugAgg stats should change...
|
||||
DebugAgg.Acc.collectDocs.set(0);
|
||||
DebugAgg.Acc.collectDocSets.set(0);
|
||||
|
||||
assertJQ(req("q", "cat_s:[* TO *]", "rows", "0",
|
||||
"fore", "where_s:NJ", "back", "*:*",
|
||||
"json.facet", ""
|
||||
+ "{x: { type: terms, field: 'cat_s', prelim_sort: 'count desc', sort:'skg desc', "
|
||||
+ " limit: 1, overrequest: " + overrequest + ", "
|
||||
+ " facet: { skg: 'debug(wrap,relatedness($fore,$back))' } } }")
|
||||
, "facets=={count:5, x:{ buckets:["
|
||||
+ " { val:'B', count:3, "
|
||||
+ " skg : { relatedness: 0.00638, "
|
||||
//+ " foreground_count: 2, "
|
||||
//+ " foreground_size: 3, "
|
||||
//+ " background_count: 3, "
|
||||
//+ " background_size: 6,"
|
||||
+ " foreground_popularity: 0.33333,"
|
||||
+ " background_popularity: 0.5 },"
|
||||
+ " }, "
|
||||
+ " ] } } "
|
||||
);
|
||||
// at most 2 buckets, regardless of overrequest...
|
||||
assertEqualsAndReset(0 < overrequest ? 2 : 1, DebugAgg.Acc.collectDocSets);
|
||||
assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
|
||||
}
|
||||
|
||||
// SKG used in multiple nested facets
|
||||
//
|
||||
// we'll re-use these params in 2 requests, one will simulate a shard request
|
||||
|
@ -936,7 +985,6 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
}
|
||||
|
||||
public void doStats(Client client, ModifiableSolrParams p) throws Exception {
|
||||
|
||||
Map<String, List<String>> fieldLists = new HashMap<>();
|
||||
fieldLists.put("noexist", getAlternatives("noexist_s"));
|
||||
fieldLists.put("cat_s", getAlternatives("cat_s"));
|
||||
|
@ -1165,6 +1213,31 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
", f2:{ 'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:6.0 }]} }"
|
||||
);
|
||||
|
||||
// test trivial re-sorting by stats
|
||||
// (there are other more indepth tests of this in doTestPrelimSorting, but this let's us sanity check
|
||||
// small responses with multiple templatized params of diff real types)
|
||||
client.testJQ(params(p, "q", "*:*", "json.facet" // num_d
|
||||
, "{f1:{terms:{${terms} field:'${cat_s}', "
|
||||
+ " prelim_sort:'count desc', sort:'n1 desc', facet:{n1:'sum(${num_d})'} }},"
|
||||
+ " f2:{terms:{${terms} field:'${cat_s}', "
|
||||
+ " prelim_sort:'count asc', sort:'n1 asc', facet:{n1:'sum(${num_d})'} }} }"
|
||||
)
|
||||
, "facets=={ 'count':6 "
|
||||
+ ", f1:{ 'buckets':[{ val:'A', count:2, n1:6.0 }, { val:'B', count:3, n1:-3.0}]}"
|
||||
+ ", f2:{ 'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:6.0 }]} }"
|
||||
);
|
||||
client.testJQ(params(p, "q", "*:*", "json.facet" // num_i
|
||||
, "{f1:{terms:{${terms} field:'${cat_s}', "
|
||||
+ " prelim_sort:'count desc', sort:'n1 desc', facet:{n1:'sum(${num_i})'} }},"
|
||||
+ " f2:{terms:{${terms} field:'${cat_s}', "
|
||||
+ " prelim_sort:'count asc', sort:'n1 asc', facet:{n1:'sum(${num_i})'} }} }"
|
||||
)
|
||||
, "facets=={ 'count':6 "
|
||||
+ ", f1:{ 'buckets':[{ val:'A', count:2, n1:5.0 }, { val:'B', count:3, n1:-3.0}]}"
|
||||
+ ", f2:{ 'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:5.0 }]} }"
|
||||
);
|
||||
|
||||
|
||||
// test sorting by other stats and more than one facet
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'sum(${num_d})', n2:'avg(${num_d})'} }}" +
|
||||
|
@ -2193,10 +2266,390 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
long refineParses = DebugAgg.parses.get() - startParses;
|
||||
assertEquals(noRefineParses, refineParses);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void testPrelimSortingSingleNode() throws Exception {
|
||||
doTestPrelimSortingSingleNode(false, false);
|
||||
}
|
||||
|
||||
public void testPrelimSortingSingleNodeExtraStat() throws Exception {
|
||||
doTestPrelimSortingSingleNode(true, false);
|
||||
}
|
||||
|
||||
public void testPrelimSortingSingleNodeExtraFacet() throws Exception {
|
||||
doTestPrelimSortingSingleNode(false, true);
|
||||
}
|
||||
|
||||
public void testPrelimSortingSingleNodeExtraStatAndFacet() throws Exception {
|
||||
doTestPrelimSortingSingleNode(true, true);
|
||||
}
|
||||
|
||||
/** @see #doTestPrelimSorting */
|
||||
public void doTestPrelimSortingSingleNode(final boolean extraAgg, final boolean extraSubFacet) throws Exception {
|
||||
// we're not using Client.localClient because it doesn't provide a SolrClient to
|
||||
// use in doTestPrelimSorting -- so instead we make a single node, and don't use any shards param...
|
||||
final SolrInstances nodes = new SolrInstances(1, "solrconfig-tlog.xml", "schema_latest.xml");
|
||||
try {
|
||||
final Client client = nodes.getClient(random().nextInt());
|
||||
client.queryDefaults().set("debugQuery", Boolean.toString(random().nextBoolean()) );
|
||||
doTestPrelimSorting(client, extraAgg, extraSubFacet);
|
||||
} finally {
|
||||
nodes.stop();
|
||||
}
|
||||
}
|
||||
|
||||
public void testPrelimSortingDistrib() throws Exception {
|
||||
doTestPrelimSortingDistrib(false, false);
|
||||
}
|
||||
|
||||
public void testPrelimSortingDistribExtraStat() throws Exception {
|
||||
doTestPrelimSortingDistrib(true, false);
|
||||
}
|
||||
|
||||
public void testPrelimSortingDistribExtraFacet() throws Exception {
|
||||
doTestPrelimSortingDistrib(false, true);
|
||||
}
|
||||
|
||||
public void testPrelimSortingDistribExtraStatAndFacet() throws Exception {
|
||||
doTestPrelimSortingDistrib(true, true);
|
||||
}
|
||||
|
||||
/** @see #doTestPrelimSorting */
|
||||
public void doTestPrelimSortingDistrib(final boolean extraAgg, final boolean extraSubFacet) throws Exception {
|
||||
// we only use 2 shards, but we also want to to sanity check code paths if one (additional) shard is empty
|
||||
final int totalShards = random().nextBoolean() ? 2 : 3;
|
||||
final SolrInstances nodes = new SolrInstances(totalShards, "solrconfig-tlog.xml", "schema_latest.xml");
|
||||
try {
|
||||
final Client client = nodes.getClient(random().nextInt());
|
||||
client.queryDefaults().set( "shards", nodes.getShards(),
|
||||
"debugQuery", Boolean.toString(random().nextBoolean()) );
|
||||
doTestPrelimSorting(client, extraAgg, extraSubFacet);
|
||||
} finally {
|
||||
nodes.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method that indexes a fixed set of docs to exactly <em>two</em> of the SolrClients
|
||||
* involved in the current Client such that each shard is identical for the purposes of simplified
|
||||
* doc/facet counting/assertions -- if there is only one SolrClient (Client.local) then it sends that
|
||||
* single shard twice as many docs so the counts/assertions will be consistent.
|
||||
*
|
||||
* Note: this test doesn't demonstrate practical uses of prelim_sort.
|
||||
* The scenerios it tests are actualy fairly absurd, but help to ensure that edge cases are covered.
|
||||
*
|
||||
* @param client client to use -- may be local or multishard
|
||||
* @param extraAgg if an extra aggregation function should be included, this hits slightly diff code paths
|
||||
* @param extraSubFacet if an extra sub facet should be included, this hits slightly diff code paths
|
||||
*/
|
||||
public void doTestPrelimSorting(final Client client,
|
||||
final boolean extraAgg,
|
||||
final boolean extraSubFacet) throws Exception {
|
||||
|
||||
client.deleteByQuery("*:*", null);
|
||||
|
||||
List<SolrClient> clients = client.getClientProvider().all();
|
||||
|
||||
// carefully craft two balanced shards (assuming we have at least two) and leave any other shards
|
||||
// empty to help check the code paths of some shards returning no buckets.
|
||||
//
|
||||
// if we are in a single node sitaution, these clients will be the same, and we'll have the same
|
||||
// total docs in our collection, but the numShardsWithData will be diff
|
||||
// (which will affect some assertions)
|
||||
final SolrClient shardA = clients.get(0);
|
||||
final SolrClient shardB = clients.get(clients.size()-1);
|
||||
final int numShardsWithData = (shardA == shardB) ? 1 : 2;
|
||||
|
||||
// for simplicity, each foo_s "term" exists on each shard in the same number of docs as it's numeric
|
||||
// value (so count should be double the term) and bar_i is always 1 per doc (so sum(bar_i)
|
||||
// should always be the same as count)
|
||||
int id = 0;
|
||||
for (int i = 1; i <= 20; i++) {
|
||||
for (int j = 1; j <= i; j++) {
|
||||
shardA.add(new SolrInputDocument("id", ""+(++id), "foo_s", "foo_" + i, "bar_i", "1"));
|
||||
shardB.add(new SolrInputDocument("id", ""+(++id), "foo_s", "foo_" + i, "bar_i", "1"));
|
||||
}
|
||||
}
|
||||
assertEquals(420, id); // sanity check
|
||||
client.commit();
|
||||
DebugAgg.Acc.collectDocs.set(0);
|
||||
DebugAgg.Acc.collectDocSets.set(0);
|
||||
|
||||
// NOTE: sorting by index can cause some optimizations when using type=enum|stream
|
||||
// that cause our stat to be collected differently, so we have to account for that when
|
||||
// looking at DebugAdd collect stats if/when the test framework picks those
|
||||
// ...BUT... this only affects cloud, for single node prelim_sort overrides streaming
|
||||
final boolean indexSortDebugAggFudge = ( 1 < numShardsWithData ) &&
|
||||
(FacetField.FacetMethod.DEFAULT_METHOD.equals(FacetField.FacetMethod.STREAM) ||
|
||||
FacetField.FacetMethod.DEFAULT_METHOD.equals(FacetField.FacetMethod.ENUM));
|
||||
|
||||
|
||||
final String common = "refine:true, type:field, field:'foo_s', facet: { "
|
||||
+ "x: 'debug(wrap,sum(bar_i))' "
|
||||
+ (extraAgg ? ", y:'min(bar_i)'" : "")
|
||||
+ (extraSubFacet ? ", z:{type:query, q:'bar_i:0'}" : "")
|
||||
+ "}";
|
||||
final String yz = (extraAgg ? "y:1, " : "") + (extraSubFacet ? "z:{count:0}, " : "");
|
||||
|
||||
// really basic: top 5 by (prelim_sort) count, (re)sorted by a stat
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
|
||||
, "{ foo_a:{ "+ common+", limit:5, overrequest:0, "
|
||||
+ " prelim_sort:'count desc', sort:'x asc' }"
|
||||
+ " foo_b:{ "+ common+", limit:5, overrequest:0, "
|
||||
+ " prelim_sort:'count asc', sort:'x desc' } }")
|
||||
, "facets=={ 'count':420, "
|
||||
+ " 'foo_a':{ 'buckets':["
|
||||
+ " { val:foo_16, count:32, " + yz + "x:32.0},"
|
||||
+ " { val:foo_17, count:34, " + yz + "x:34.0},"
|
||||
+ " { val:foo_18, count:36, " + yz + "x:36.0},"
|
||||
+ " { val:foo_19, count:38, " + yz + "x:38.0},"
|
||||
+ " { val:foo_20, count:40, " + yz + "x:40.0},"
|
||||
+ "] },"
|
||||
+ " 'foo_b':{ 'buckets':["
|
||||
+ " { val:foo_5, count:10, " + yz + "x:10.0},"
|
||||
+ " { val:foo_4, count:8, " + yz + "x:8.0},"
|
||||
+ " { val:foo_3, count:6, " + yz + "x:6.0},"
|
||||
+ " { val:foo_2, count:4, " + yz + "x:4.0},"
|
||||
+ " { val:foo_1, count:2, " + yz + "x:2.0},"
|
||||
+ "] },"
|
||||
+ "}"
|
||||
);
|
||||
// (re)sorting should prevent 'sum(bar_i)' from being computed for every doc
|
||||
// only the choosen buckets should be collected (as a set) once per node...
|
||||
assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
|
||||
// 2 facets, 5 bucket, on each shard
|
||||
assertEqualsAndReset(numShardsWithData * 2 * 5, DebugAgg.Acc.collectDocSets);
|
||||
|
||||
{ // same really basic top 5 by (prelim_sort) count, (re)sorted by a stat -- w/allBuckets:true
|
||||
// check code paths with and w/o allBuckets
|
||||
// NOTE: allBuckets includes stats, but not other sub-facets...
|
||||
final String aout = "allBuckets:{ count:420, "+ (extraAgg ? "y:1, " : "") + "x:420.0 }";
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
|
||||
, "{ foo_a:{ " + common+", allBuckets:true, limit:5, overrequest:0, "
|
||||
+ " prelim_sort:'count desc', sort:'x asc' }"
|
||||
+ " foo_b:{ " + common+", allBuckets:true, limit:5, overrequest:0, "
|
||||
+ " prelim_sort:'count asc', sort:'x desc' } }")
|
||||
, "facets=={ 'count':420, "
|
||||
+ " 'foo_a':{ " + aout + " 'buckets':["
|
||||
+ " { val:foo_16, count:32, " + yz + "x:32.0},"
|
||||
+ " { val:foo_17, count:34, " + yz + "x:34.0},"
|
||||
+ " { val:foo_18, count:36, " + yz + "x:36.0},"
|
||||
+ " { val:foo_19, count:38, " + yz + "x:38.0},"
|
||||
+ " { val:foo_20, count:40, " + yz + "x:40.0},"
|
||||
+ "] },"
|
||||
+ " 'foo_b':{ " + aout + " 'buckets':["
|
||||
+ " { val:foo_5, count:10, " + yz + "x:10.0},"
|
||||
+ " { val:foo_4, count:8, " + yz + "x:8.0},"
|
||||
+ " { val:foo_3, count:6, " + yz + "x:6.0},"
|
||||
+ " { val:foo_2, count:4, " + yz + "x:4.0},"
|
||||
+ " { val:foo_1, count:2, " + yz + "x:2.0},"
|
||||
+ "] },"
|
||||
+ "}"
|
||||
);
|
||||
// because of allBuckets, we collect every doc on everyshard (x2 facets) in a single "all" slot...
|
||||
assertEqualsAndReset(2 * 420, DebugAgg.Acc.collectDocs);
|
||||
// ... in addition to collecting each of the choosen buckets (as sets) once per node...
|
||||
// 2 facets, 5 bucket, on each shard
|
||||
assertEqualsAndReset(numShardsWithData * 2 * 5, DebugAgg.Acc.collectDocSets);
|
||||
}
|
||||
|
||||
// pagination (with offset) should happen against the re-sorted list (up to the effective limit)
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
|
||||
, "{ foo_a:{ "+common+", offset:2, limit:3, overrequest:0, "
|
||||
+ " prelim_sort:'count desc', sort:'x asc' }"
|
||||
+ " foo_b:{ "+common+", offset:2, limit:3, overrequest:0, "
|
||||
+ " prelim_sort:'count asc', sort:'x desc' } }")
|
||||
, "facets=={ 'count':420, "
|
||||
+ " 'foo_a':{ 'buckets':["
|
||||
+ " { val:foo_18, count:36, " + yz + "x:36.0},"
|
||||
+ " { val:foo_19, count:38, " + yz + "x:38.0},"
|
||||
+ " { val:foo_20, count:40, " + yz + "x:40.0},"
|
||||
+ "] },"
|
||||
+ " 'foo_b':{ 'buckets':["
|
||||
+ " { val:foo_3, count:6, " + yz + "x:6.0},"
|
||||
+ " { val:foo_2, count:4, " + yz + "x:4.0},"
|
||||
+ " { val:foo_1, count:2, " + yz + "x:2.0},"
|
||||
+ "] },"
|
||||
+ "}"
|
||||
);
|
||||
assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
|
||||
// 2 facets, 5 buckets (including offset), on each shard
|
||||
assertEqualsAndReset(numShardsWithData * 2 * 5, DebugAgg.Acc.collectDocSets);
|
||||
|
||||
// when overrequesting is used, the full list of candidate buckets should be considered
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
|
||||
, "{ foo_a:{ "+common+", limit:5, overrequest:5, "
|
||||
+ " prelim_sort:'count desc', sort:'x asc' }"
|
||||
+ " foo_b:{ "+common+", limit:5, overrequest:5, "
|
||||
+ " prelim_sort:'count asc', sort:'x desc' } }")
|
||||
, "facets=={ 'count':420, "
|
||||
+ " 'foo_a':{ 'buckets':["
|
||||
+ " { val:foo_11, count:22, " + yz + "x:22.0},"
|
||||
+ " { val:foo_12, count:24, " + yz + "x:24.0},"
|
||||
+ " { val:foo_13, count:26, " + yz + "x:26.0},"
|
||||
+ " { val:foo_14, count:28, " + yz + "x:28.0},"
|
||||
+ " { val:foo_15, count:30, " + yz + "x:30.0},"
|
||||
+ "] },"
|
||||
+ " 'foo_b':{ 'buckets':["
|
||||
+ " { val:foo_10, count:20, " + yz + "x:20.0},"
|
||||
+ " { val:foo_9, count:18, " + yz + "x:18.0},"
|
||||
+ " { val:foo_8, count:16, " + yz + "x:16.0},"
|
||||
+ " { val:foo_7, count:14, " + yz + "x:14.0},"
|
||||
+ " { val:foo_6, count:12, " + yz + "x:12.0},"
|
||||
+ "] },"
|
||||
+ "}"
|
||||
);
|
||||
assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
|
||||
// 2 facets, 10 buckets (including overrequest), on each shard
|
||||
assertEqualsAndReset(numShardsWithData * 2 * 10, DebugAgg.Acc.collectDocSets);
|
||||
|
||||
{ // for an (effectively) unlimited facet, then from the black box perspective of the client,
|
||||
// preliminary sorting should be completely ignored...
|
||||
final StringBuilder expected = new StringBuilder("facets=={ 'count':420, 'foo_a':{ 'buckets':[\n");
|
||||
for (int i = 20; 0 < i; i--) {
|
||||
final int x = i * 2;
|
||||
expected.append("{ val:foo_"+i+", count:"+x+", " + yz + "x:"+x+".0},\n");
|
||||
}
|
||||
expected.append("] } }");
|
||||
for (int limit : Arrays.asList(-1, 100000)) {
|
||||
for (String sortOpts : Arrays.asList("sort:'x desc'",
|
||||
"prelim_sort:'count asc', sort:'x desc'",
|
||||
"prelim_sort:'index asc', sort:'x desc'")) {
|
||||
final String snippet = "limit: " + limit + ", " + sortOpts;
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
|
||||
, "{ foo_a:{ "+common+", " + snippet + "}}")
|
||||
, expected.toString());
|
||||
|
||||
// the only difference from a white box perspective, is when/if we are
|
||||
// optimized to use the sort SlotAcc during collection instead of the prelim_sort SlotAcc..
|
||||
// (ie: sub facet preventing single pass (re)sort in single node mode)
|
||||
if (((0 < limit || extraSubFacet) && snippet.contains("prelim_sort")) &&
|
||||
! (indexSortDebugAggFudge && snippet.contains("index asc"))) {
|
||||
// by-pass single pass collection, do everything as sets...
|
||||
assertEqualsAndReset(snippet, numShardsWithData * 20, DebugAgg.Acc.collectDocSets);
|
||||
assertEqualsAndReset(snippet, 0, DebugAgg.Acc.collectDocs);
|
||||
} else { // simple sort on x, or optimized single pass (re)sort, or indexSortDebugAggFudge
|
||||
// no sets should have been (post) collected for our stat
|
||||
assertEqualsAndReset(snippet, 0, DebugAgg.Acc.collectDocSets);
|
||||
// every doc should be collected...
|
||||
assertEqualsAndReset(snippet, 420, DebugAgg.Acc.collectDocs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// test all permutations of (prelim_sort | sort) on (index | count | stat) since there are
|
||||
// custom sort codepaths for index & count that work differnetly then general stats
|
||||
//
|
||||
// NOTE: there's very little value in re-sort by count/index after prelim_sort on something more complex,
|
||||
// typically better to just ignore the prelim_sort, but we're testing it for completeness
|
||||
// (and because you *might* want to prelim_sort by some function, for the purpose of "sampling" the
|
||||
// top results and then (re)sorting by count/index)
|
||||
for (String numSort : Arrays.asList("count", "x")) { // equivilent ordering
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
|
||||
, "{ foo_a:{ "+common+", limit:10, overrequest:0, "
|
||||
+ " prelim_sort:'"+numSort+" asc', sort:'index desc' }"
|
||||
+ " foo_b:{ "+common+", limit:10, overrequest:0, "
|
||||
+ " prelim_sort:'index asc', sort:'"+numSort+" desc' } }")
|
||||
, "facets=={ 'count':420, "
|
||||
+ " 'foo_a':{ 'buckets':["
|
||||
+ " { val:foo_9, count:18, " + yz + "x:18.0},"
|
||||
+ " { val:foo_8, count:16, " + yz + "x:16.0},"
|
||||
+ " { val:foo_7, count:14, " + yz + "x:14.0},"
|
||||
+ " { val:foo_6, count:12, " + yz + "x:12.0},"
|
||||
+ " { val:foo_5, count:10, " + yz + "x:10.0},"
|
||||
+ " { val:foo_4, count:8, " + yz + "x:8.0},"
|
||||
+ " { val:foo_3, count:6, " + yz + "x:6.0},"
|
||||
+ " { val:foo_2, count:4, " + yz + "x:4.0},"
|
||||
+ " { val:foo_10, count:20, " + yz + "x:20.0},"
|
||||
+ " { val:foo_1, count:2, " + yz + "x:2.0},"
|
||||
+ "] },"
|
||||
+ " 'foo_b':{ 'buckets':["
|
||||
+ " { val:foo_18, count:36, " + yz + "x:36.0},"
|
||||
+ " { val:foo_17, count:34, " + yz + "x:34.0},"
|
||||
+ " { val:foo_16, count:32, " + yz + "x:32.0},"
|
||||
+ " { val:foo_15, count:30, " + yz + "x:30.0},"
|
||||
+ " { val:foo_14, count:28, " + yz + "x:28.0},"
|
||||
+ " { val:foo_13, count:26, " + yz + "x:26.0},"
|
||||
+ " { val:foo_12, count:24, " + yz + "x:24.0},"
|
||||
+ " { val:foo_11, count:22, " + yz + "x:22.0},"
|
||||
+ " { val:foo_10, count:20, " + yz + "x:20.0},"
|
||||
+ " { val:foo_1, count:2, " + yz + "x:2.0},"
|
||||
+ "] },"
|
||||
+ "}"
|
||||
);
|
||||
// since these behave differently, defer DebugAgg counter checks until all are done...
|
||||
}
|
||||
// These 3 permutations defer the compuation of x as docsets,
|
||||
// so it's 3 x (10 buckets on each shard) (but 0 direct docs)
|
||||
// prelim_sort:count, sort:index
|
||||
// prelim_sort:index, sort:x
|
||||
// prelim_sort:index, sort:count
|
||||
// ...except when streaming, prelim_sort:index does no docsets.
|
||||
assertEqualsAndReset((indexSortDebugAggFudge ? 1 : 3) * numShardsWithData * 10,
|
||||
DebugAgg.Acc.collectDocSets);
|
||||
// This is the only situation that should (always) result in every doc being collected (but 0 docsets)...
|
||||
// prelim_sort:x, sort:index
|
||||
// ...but the (2) prelim_sort:index streaming situations above will also cause all the docs in the first
|
||||
// 10+1 buckets to be collected (enum checks limit+1 to know if there are "more"...
|
||||
assertEqualsAndReset(420 + (indexSortDebugAggFudge ?
|
||||
2 * numShardsWithData * (1+10+11+12+13+14+15+16+17+18+19) : 0),
|
||||
DebugAgg.Acc.collectDocs);
|
||||
|
||||
// sanity check of prelim_sorting in a sub facet
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
|
||||
, "{ bar:{ type:query, query:'foo_s:[foo_10 TO foo_19]', facet: {"
|
||||
+ " foo:{ "+ common+", limit:5, overrequest:0, "
|
||||
+ " prelim_sort:'count desc', sort:'x asc' } } } }")
|
||||
, "facets=={ 'count':420, "
|
||||
+ " 'bar':{ 'count':290, "
|
||||
+ " 'foo':{ 'buckets':["
|
||||
+ " { val:foo_15, count:30, " + yz + "x:30.0},"
|
||||
+ " { val:foo_16, count:32, " + yz + "x:32.0},"
|
||||
+ " { val:foo_17, count:34, " + yz + "x:34.0},"
|
||||
+ " { val:foo_18, count:36, " + yz + "x:36.0},"
|
||||
+ " { val:foo_19, count:38, " + yz + "x:38.0},"
|
||||
+ " ] },"
|
||||
+ " },"
|
||||
+ "}"
|
||||
);
|
||||
// the prelim_sort should prevent 'sum(bar_i)' from being computed for every doc
|
||||
// only the choosen buckets should be collected (as a set) once per node...
|
||||
assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
|
||||
// 5 bucket, on each shard
|
||||
assertEqualsAndReset(numShardsWithData * 5, DebugAgg.Acc.collectDocSets);
|
||||
|
||||
{ // sanity check how defered stats are handled
|
||||
|
||||
// here we'll prelim_sort & sort on things that are both "not x" and using the debug() counters
|
||||
// (wrapping x) to assert that 'x' is correctly defered and only collected for the final top buckets
|
||||
final List<String> sorts = new ArrayList<String>(Arrays.asList("index asc", "count asc"));
|
||||
if (extraAgg) {
|
||||
sorts.add("y asc"); // same for every bucket, but index order tie breaker should kick in
|
||||
}
|
||||
for (String s : sorts) {
|
||||
client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
|
||||
, "{ foo:{ "+ common+", limit:5, overrequest:0, "
|
||||
+ " prelim_sort:'count desc', sort:'"+s+"' } }")
|
||||
, "facets=={ 'count':420, "
|
||||
+ " 'foo':{ 'buckets':["
|
||||
+ " { val:foo_16, count:32, " + yz + "x:32.0},"
|
||||
+ " { val:foo_17, count:34, " + yz + "x:34.0},"
|
||||
+ " { val:foo_18, count:36, " + yz + "x:36.0},"
|
||||
+ " { val:foo_19, count:38, " + yz + "x:38.0},"
|
||||
+ " { val:foo_20, count:40, " + yz + "x:40.0},"
|
||||
+ "] } }"
|
||||
);
|
||||
// Neither prelim_sort nor sort should need 'sum(bar_i)' to be computed for every doc
|
||||
// only the choosen buckets should be collected (as a set) once per node...
|
||||
assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
|
||||
// 5 bucket, on each shard
|
||||
assertEqualsAndReset(numShardsWithData * 5, DebugAgg.Acc.collectDocSets);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testOverrequest() throws Exception {
|
||||
initServers();
|
||||
|
@ -2796,4 +3249,16 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
hll.addRaw(987654321);
|
||||
}
|
||||
|
||||
|
||||
/** atomicly resets the acctual AtomicLong value matches the expected and resets it to 0 */
|
||||
private static final void assertEqualsAndReset(String msg, long expected, AtomicLong actual) {
|
||||
final long current = actual.getAndSet(0);
|
||||
assertEquals(msg, expected, current);
|
||||
}
|
||||
/** atomicly resets the acctual AtomicLong value matches the expected and resets it to 0 */
|
||||
private static final void assertEqualsAndReset(long expected, AtomicLong actual) {
|
||||
final long current = actual.getAndSet(0);
|
||||
assertEquals(expected, current);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
|||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.junit.BeforeClass;
|
||||
import static org.hamcrest.CoreMatchers.not;
|
||||
import static org.hamcrest.core.IsInstanceOf.instanceOf;
|
||||
|
||||
import org.noggit.ObjectBuilder;
|
||||
|
@ -36,6 +37,20 @@ public class TestJsonFacetsStatsParsing extends SolrTestCaseJ4 {
|
|||
initCore("solrconfig-tlog.xml","schema15.xml");
|
||||
}
|
||||
|
||||
public void testSortEquality() throws Exception {
|
||||
assertEquals(new FacetRequest.FacetSort("count", FacetRequest.SortDirection.desc),
|
||||
FacetRequest.FacetSort.COUNT_DESC);
|
||||
assertEquals(new FacetRequest.FacetSort("index", FacetRequest.SortDirection.asc),
|
||||
FacetRequest.FacetSort.INDEX_ASC);
|
||||
assertEquals(new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.asc),
|
||||
new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.asc));
|
||||
// negative assertions...
|
||||
assertThat(new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.desc),
|
||||
not(new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.asc)));
|
||||
assertThat(new FacetRequest.FacetSort("bar", FacetRequest.SortDirection.desc),
|
||||
not(new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.desc)));
|
||||
}
|
||||
|
||||
public void testEquality() throws IOException {
|
||||
try (SolrQueryRequest req = req("custom_req_param","foo_i",
|
||||
"overridden_param","xxxxx_i")) {
|
||||
|
|
|
@ -218,7 +218,9 @@ json.facet={
|
|||
|field |The field name to facet over.
|
||||
|offset |Used for paging, this skips the first N buckets. Defaults to 0.
|
||||
|limit |Limits the number of buckets returned. Defaults to 10.
|
||||
|sort |Specifies how to sort the buckets produced. “count” specifies document count, “index” sorts by the index (natural) order of the bucket value. One can also sort by any <<json-facet-api.adoc#aggregation-functions,facet function / statistic>> that occurs in the bucket. The default is “count desc”. This parameter may also be specified in JSON like `sort:{count:desc}`. The sort order may either be “asc” or “desc”
|
||||
|sort |Specifies how to sort the buckets produced.
|
||||
|
||||
“count” specifies document count, “index” sorts by the index (natural) order of the bucket value. One can also sort by any <<json-facet-api.adoc#aggregation-functions,facet function / statistic>> that occurs in the bucket. The default is “count desc”. This parameter may also be specified in JSON like `sort:{count:desc}`. The sort order may either be “asc” or “desc”
|
||||
|overrequest a|
|
||||
Number of buckets beyond the `limit` to internally request from shards during a distributed search.
|
||||
|
||||
|
@ -248,6 +250,7 @@ This parameter indicates the facet algorithm to use:
|
|||
* "stream" Presently equivalent to "enum"
|
||||
* "smart" Pick the best method for the field type (this is the default)
|
||||
|
||||
|prelim_sort |An optional parameter for specifying an approximation of the final `sort` to use during initial collection of top buckets when the <<json-facet-api.adoc#sorting-facets-by-nested-functions,`sort` param is very costly>>.
|
||||
|===
|
||||
|
||||
== Query Facet
|
||||
|
@ -532,13 +535,13 @@ By default "top authors" is defined by simple document count descending, but we
|
|||
|
||||
=== Sorting Facets By Nested Functions
|
||||
|
||||
The default sort for a field or terms facet is by bucket count descending. We can optionally sort ascending or descending by any facet function that appears in each bucket.
|
||||
The default sort for a field or terms facet is by bucket count descending. We can optionally `sort` ascending or descending by any facet function that appears in each bucket.
|
||||
|
||||
[source,java]
|
||||
----
|
||||
{
|
||||
categories:{
|
||||
type : terms // terms facet creates a bucket for each indexed term in the field
|
||||
type : terms, // terms facet creates a bucket for each indexed term in the field
|
||||
field : cat,
|
||||
sort : "x desc", // can also use sort:{x:desc}
|
||||
facet : {
|
||||
|
@ -549,6 +552,28 @@ The default sort for a field or terms facet is by bucket count descending. We ca
|
|||
}
|
||||
----
|
||||
|
||||
In some situations the desired `sort` may be an aggregation function that is very costly to compute for every bucket. A `prelim_sort` option can be used to specify an approximation of the `sort`, for initially ranking the buckets to determine the top candidates (based on the `limit` and `overrequest`). Only after the top candidate buckets have been refined, will the actual `sort` be used.
|
||||
|
||||
[source,java]
|
||||
----
|
||||
{
|
||||
categories:{
|
||||
type : terms,
|
||||
field : cat,
|
||||
refine: true,
|
||||
limit: 10,
|
||||
overrequest: 100,
|
||||
prelim_sort: "sales_rank desc",
|
||||
sort : "prod_quality desc",
|
||||
facet : {
|
||||
prod_quality : "avg(div(prod(rating,sales_rank),prod(num_returns,price)))"
|
||||
sales_rank : "sum(sales_rank)"
|
||||
}
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
|
||||
== Changing the Domain
|
||||
|
||||
As discussed above, facets compute buckets or statistics based on a "domain" which is typically implicit:
|
||||
|
@ -805,6 +830,11 @@ When using the extended `type:func` syntax for specifying a `relatedness()` aggr
|
|||
|
||||
This can be particularly useful when using a descending sorting on `relatedness()` with foreground and background queries that are disjoint, to ensure the "top buckets" are all relevant to both sets.
|
||||
|
||||
[TIP]
|
||||
====
|
||||
When sorting on `relatedness(...)` requests can be processed much more quickly by adding a `prelim_sort: "count desc"` option. Increasing the `overrequest` can help improve the accuracy of the top buckets.
|
||||
====
|
||||
|
||||
=== Semantic Knowledge Graph Example
|
||||
|
||||
.Sample Documents
|
||||
|
|
Loading…
Reference in New Issue