SOLR-12839: JSON 'terms' Faceting now supports a 'prelim_sort' option to use when initially selecting the top ranking buckets, prior to the final 'sort' option used after refinement.

2018-11-30 15:49:06 -07:00 · 2018-11-30 15:49:06 -07:00 · 5dc988f5ee
parent 5c4ab188eb
commit 5dc988f5ee
12 changed files with 1039 additions and 140 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -130,7 +130,8 @@ Upgrade Notes
 New Features
 ----------------------

-(No Changes)
+* SOLR-12839: JSON 'terms' Faceting now supports a 'prelim_sort' option to use when initially selecting 
+  the top ranking buckets, prior to the final 'sort' option used after refinement.  (hossman)

 Bug Fixes
 ----------------------
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@ -41,8 +41,18 @@ abstract class FacetRequestSorted extends FacetRequest {
   */
  int overrefine = -1;
  long mincount;
-  String sortVariable;
-  SortDirection sortDirection;
+  /** 
+   * The basic sorting to do on buckets, defaults to {@link FacetRequest.FacetSort#COUNT_DESC} 
+   * @see #prelim_sort
+   */
+  FacetSort sort;
+  /** 
+   * An optional "Pre-Sort" that defaults to null.
+   * If specified, then the <code>prelim_sort</code> is used as an optimization in place of {@link #sort} 
+   * during collection, and the full {@link #sort} values are only computed for the top candidate buckets 
+   * (after refinement)
+   */
+  FacetSort prelim_sort;
  RefineMethod refine; // null, NONE, or SIMPLE

  @Override
@ -137,8 +147,15 @@ public class FacetField extends FacetRequestSorted {
    if (method == FacetMethod.ENUM) {// at the moment these two are the same
      method = FacetMethod.STREAM;
    }
-    if (method == FacetMethod.STREAM && sf.indexed() &&
-        "index".equals(sortVariable) && sortDirection == SortDirection.asc && !ft.isPointField()) {
+    if (method == FacetMethod.STREAM && sf.indexed() && !ft.isPointField() &&
+        // wether we can use stream processing depends on wether this is a shard request, wether
+        // re-sorting has been requested, and if the effective sort during collection is "index asc"
+        ( fcontext.isShard()
+          // for a shard request, the effective per-shard sort must be index asc
+          ? FacetSort.INDEX_ASC.equals(null == prelim_sort ? sort : prelim_sort)
+          // for a non-shard request, we can only use streaming if there is no pre-sorting
+          : (null == prelim_sort && FacetSort.INDEX_ASC.equals( sort ) ) ) ) {
+          
      return new FacetFieldProcessorByEnumTermsStream(fcontext, this, sf);
    }

--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
@ -102,7 +102,7 @@ public class FacetFieldMerger extends FacetRequestSortedMerger<FacetField> {
      result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue());
    }

-    sortBuckets();
+    sortBuckets(freq.sort);

    long first = freq.offset;
    long end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
@ -18,8 +18,10 @@
 package org.apache.solr.search.facet;

 import java.io.IOException;
+import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@ -48,8 +50,11 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
  SchemaField sf;
  SlotAcc indexOrderAcc;
  int effectiveMincount;
-
-  Map<String,AggValueSource> deferredAggs;  // null if none
+  final boolean singlePassSlotAccCollection;
+  final FacetRequest.FacetSort sort; // never null (may be the user's requested sort, or the prelim_sort)
+  final FacetRequest.FacetSort resort; // typically null (unless the user specified a prelim_sort)
+  
+  final Map<String,AggValueSource> deferredAggs = new HashMap<String,AggValueSource>();

  // TODO: push any of this down to base class?

@ -67,6 +72,37 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
    super(fcontext, freq);
    this.sf = sf;
    this.effectiveMincount = (int)(fcontext.isShard() ? Math.min(1 , freq.mincount) : freq.mincount);
+    this.singlePassSlotAccCollection = (freq.limit == -1 && freq.subFacets.size() == 0);
+
+    if ( null == freq.prelim_sort ) {
+      // If the user has not specified any preliminary sort, then things are very simple.
+      // Just use the "sort" as is w/o needing any re-sorting
+      this.sort = freq.sort;
+      this.resort = null;
+    } else {
+      assert null != freq.prelim_sort;
+      
+      if ( fcontext.isShard() ) {
+        // for a shard request, we can ignore the users requested "sort" and focus solely on the prelim_sort
+        // the merger will worry about the final sorting -- we don't need to resort anything...
+        this.sort = freq.prelim_sort;
+        this.resort = null;
+        
+      } else { // non shard...
+        if ( singlePassSlotAccCollection ) { // special case situation...
+          // when we can do a single pass SlotAcc collection on non-shard request, there is
+          // no point re-sorting. Ignore the freq.prelim_sort and use the freq.sort option as is...
+          this.sort = freq.sort;
+          this.resort = null;
+        } else {
+          // for a non-shard request, we will use the prelim_sort as our initial sort option if it exists
+          // then later we will re-sort on the final desired sort...
+          this.sort = freq.prelim_sort;
+          this.resort = freq.sort;
+        }
+      }
+    }
+    assert null != this.sort;
  }

  /** This is used to create accs for second phase (or to create accs for all aggs) */
@ -86,17 +122,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
      // reuse these accs, but reset them first and resize since size could be different
      for (SlotAcc acc : accs) {
        acc.reset();
-        acc.resize(new SlotAcc.Resizer() {
-          @Override
-          public int getNewSize() {
-            return slotCount;
-          }
-
-          @Override
-          public int getNewSlot(int oldSlot) {
-            return 0;
-          }
-        });
+        acc.resize(new FlatteningResizer(slotCount));
      }
      return;
    } else {
@ -121,33 +147,47 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
    }
  }

-  void createCollectAcc(int numDocs, int numSlots) throws IOException {
-    accMap = new LinkedHashMap<>();
-
-    // we always count...
-    // allow a subclass to set a custom counter.
-    if (countAcc == null) {
-      countAcc = new CountSlotArrAcc(fcontext, numSlots);
-    }
-
-    if ("count".equals(freq.sortVariable)) {
-      sortAcc = countAcc;
-      deferredAggs = freq.getFacetStats();
-    } else if ("index".equals(freq.sortVariable)) {
+  /** 
+   * Simple helper for checking if a {@FacetRequest.FacetSort} is on "count" or "index" and picking 
+   * the existing SlotAcc 
+   * @return an existing SlotAcc for sorting, else null if it should be built from the Aggs
+   */
+  private SlotAcc getTrivialSortingSlotAcc(FacetRequest.FacetSort fsort) {
+    if ("count".equals(fsort.sortVariable)) {
+      assert null != countAcc;
+      return countAcc;
+    } else if ("index".equals(fsort.sortVariable)) {
      // allow subclass to set indexOrderAcc first
      if (indexOrderAcc == null) {
        // This sorting accumulator just goes by the slot number, so does not need to be collected
        // and hence does not need to find it's way into the accMap or accs array.
        indexOrderAcc = new SortSlotAcc(fcontext);
      }
-      sortAcc = indexOrderAcc;
-      deferredAggs = freq.getFacetStats();
+      return indexOrderAcc;
+    }
+    return null;
+  }
+  
+  void createCollectAcc(int numDocs, int numSlots) throws IOException {
+    accMap = new LinkedHashMap<>();
+    
+    // start with the assumption that we're going to defer the computation of all stats
+    deferredAggs.putAll(freq.getFacetStats());
+ 
+    // we always count...
+    // allow a subclass to set a custom counter.
+    if (countAcc == null) {
+      countAcc = new CountSlotArrAcc(fcontext, numSlots);
    }

-    // If we are going to return all buckets and if there are no subfacets (that would need a domain), then don't defer
-    // any aggregation calculations to a second phase.  This way we can avoid calculating domains for each bucket, which
-    // can be expensive.
-    if (freq.limit == -1 && freq.subFacets.size() == 0) {
+    sortAcc = getTrivialSortingSlotAcc(this.sort);
+
+    if (this.singlePassSlotAccCollection) {
+      // If we are going to return all buckets, and if there are no subfacets (that would need a domain),
+      // then don't defer any aggregation calculations to a second phase.
+      // This way we can avoid calculating domains for each bucket, which can be expensive.
+
+      // TODO: BEGIN: why can't we just call createAccs here ?
      accs = new SlotAcc[ freq.getFacetStats().size() ];
      int otherAccIdx = 0;
      for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
@ -157,6 +197,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
        accMap.put(acc.key, acc);
        accs[otherAccIdx++] = acc;
      }
+      // TODO: END: why can't we just call createAccs here ?
      if (accs.length == 1) {
        collectAcc = accs[0];
      } else {
@ -164,26 +205,21 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
      }

      if (sortAcc == null) {
-        sortAcc = accMap.get(freq.sortVariable);
+        sortAcc = accMap.get(sort.sortVariable);
        assert sortAcc != null;
      }

-      deferredAggs = null;
+      deferredAggs.clear();
    }

    if (sortAcc == null) {
-      AggValueSource sortAgg = freq.getFacetStats().get(freq.sortVariable);
+      AggValueSource sortAgg = freq.getFacetStats().get(sort.sortVariable);
      if (sortAgg != null) {
        collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots);
-        collectAcc.key = freq.sortVariable; // TODO: improve this
+        collectAcc.key = sort.sortVariable; // TODO: improve this
      }
      sortAcc = collectAcc;
-      deferredAggs = new HashMap<>(freq.getFacetStats());
-      deferredAggs.remove(freq.sortVariable);
-    }
-
-    if (deferredAggs == null || deferredAggs.size() == 0) {
-      deferredAggs = null;
+      deferredAggs.remove(sort.sortVariable);
    }

    boolean needOtherAccs = freq.allBuckets;  // TODO: use for missing too...
@ -207,7 +243,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
      return;
    }

-    int numDeferred = deferredAggs == null ? 0 : deferredAggs.size();
+    final int numDeferred = deferredAggs.size();
    if (numDeferred <= 0) return;

    otherAccs = new SlotAcc[ numDeferred ];
@ -267,11 +303,13 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
        } else {
          effectiveLimit += freq.overrequest;
        }
+      } else if (null != resort && 0 < freq.overrequest) {
+        // in non-shard situations, if we have a 'resort' we check for explicit overrequest > 0
+        effectiveLimit += freq.overrequest;
      }
    }

-
-    final int sortMul = freq.sortDirection.getMultiplier();
+    final int sortMul = sort.sortDirection.getMultiplier();

    int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(freq.offset + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1);
    maxTopVals = Math.min(maxTopVals, slotCardinality);
@ -358,31 +396,53 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
      // moved missing fillBucket after we fill facet since it will reset all the accumulators.
    }

-    // if we are deep paging, we don't have to order the highest "offset" counts.
-    int collectCount = Math.max(0, queue.size() - off);
-    assert collectCount <= maxTopVals;
-    int[] sortedSlots = new int[collectCount];
-    for (int i = collectCount - 1; i >= 0; i--) {
-      sortedSlots[i] = queue.pop().slot;
+    final boolean needFilter = (!deferredAggs.isEmpty()) || freq.getSubFacets().size() > 0;
+    if (needFilter) {
+      createOtherAccs(-1, 1);
    }

-    ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount);
-    res.add("buckets", bucketList);
+    // if we are deep paging, we don't have to order the highest "offset" counts...
+    // ...unless we need to resort.
+    int collectCount = Math.max(0, queue.size() - (null == this.resort ? off : 0));
+    //
+    assert collectCount <= maxTopVals;
+    Slot[] sortedSlots = new Slot[collectCount];
+    for (int i = collectCount - 1; i >= 0; i--) {
+      Slot slot = sortedSlots[i] = queue.pop();
+      // At this point we know we're either returning this Slot as a Bucket, or resorting it,
+      // so definitely fill in the bucket value -- we'll need it either way
+      slot.bucketVal = bucketValFromSlotNumFunc.apply(slot.slot);
+      
+      if (needFilter || null != this.resort) {
+        slot.bucketFilter = makeBucketQuery(fieldQueryValFunc.apply(slot.bucketVal));
+      }
+    }
+    
+    final SlotAcc resortAccForFill = resortSlots(sortedSlots); // No-Op if not needed
+    
+    if (null != this.resort) {
+      // now that we've completely resorted, throw away extra docs from possible offset/overrequest...
+      final int endOffset = (int)Math.min((long) sortedSlots.length,
+                                          // NOTE: freq.limit is long, so no risk of overflow here
+                                          off + (freq.limit < 0 ? Integer.MAX_VALUE : freq.limit));
+      if (0 < off || endOffset < sortedSlots.length) {
+        sortedSlots = Arrays.copyOfRange(sortedSlots, off, endOffset);
+      }
+    }
+    List<SimpleOrderedMap> bucketList = new ArrayList<>(sortedSlots.length);

-    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
-
-    for (int slotNum : sortedSlots) {
+    for (Slot slot : sortedSlots) {
      SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
-      Comparable val = bucketValFromSlotNumFunc.apply(slotNum);
-      bucket.add("val", val);
+      bucket.add("val", slot.bucketVal);

-      Query filter = needFilter ? makeBucketQuery(fieldQueryValFunc.apply(val)) : null;
-
-      fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter);
+      fillBucketFromSlot(bucket, slot, resortAccForFill);

      bucketList.add(bucket);
    }

+    res.add("buckets", bucketList);
+      
+    
    if (fcontext.isShard() && shardHasMoreBuckets) {
      // Currently, "more" is an internal implementation detail and only returned for distributed sub-requests
      res.add("more", true);
@ -420,24 +480,38 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
  }

  private static class Slot {
+    /** The Slot number used during collection */
    int slot;
+
+    /** filled in only once we know the bucket will either be involved in resorting, or returned */
+    Comparable bucketVal;
+
+    /** Filled in if and only if needed for resorting, deferred stats, or subfacets */
+    Query bucketFilter;
+    // TODO: we could potentially store the bucket's (DocSet)subDomain as well,
+    // but that's much bigger object to hang onto for every slot at the sametime
+    // Probably best to just trust the filterCache to do it's job
+    
+    /** The Slot number used during resorting */
+    int resortSlotNum;
  }

-  private void fillBucket(SimpleOrderedMap<Object> target, int count, int slotNum, DocSet subDomain, Query filter) throws IOException {
+  /** Helper method used solely when looping over buckets to be returned in findTopSlots */
+  private void fillBucketFromSlot(SimpleOrderedMap<Object> target, Slot slot,
+                                  SlotAcc resortAcc) throws IOException {
+    final int count = countAcc.getCount(slot.slot);
    target.add("count", count);
    if (count <= 0 && !freq.processEmpty) return;

-    if (collectAcc != null && slotNum >= 0) {
-      collectAcc.setValues(target, slotNum);
+    if (collectAcc != null && slot.slot >= 0) {
+      collectAcc.setValues(target, slot.slot);
    }

-    createOtherAccs(-1, 1);
-
    if (otherAccs == null && freq.subFacets.isEmpty()) return;

-    if (subDomain == null) {
-      subDomain = fcontext.searcher.getDocSet(filter, fcontext.base);
-    }
+    assert null != slot.bucketFilter;
+    final Query filter = slot.bucketFilter;
+    final DocSet subDomain = fcontext.searcher.getDocSet(filter, fcontext.base);

    // if no subFacets, we only need a DocSet
    // otherwise we need more?
@ -449,15 +523,119 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
    if (otherAccs != null) {
      // do acc at a time (traversing domain each time) or do all accs for each doc?
      for (SlotAcc acc : otherAccs) {
-        acc.reset(); // TODO: only needed if we previously used for allBuckets or missing
-        acc.collect(subDomain, 0, slot -> { return new SlotContext(filter); });
-        acc.setValues(target, 0);
+        if (acc == resortAcc) {
+          // already collected, just need to get the value from the correct slot
+          acc.setValues(target, slot.resortSlotNum);
+        } else {
+          acc.reset(); // TODO: only needed if we previously used for allBuckets or missing
+          acc.collect(subDomain, 0, s -> { return new SlotContext(filter); });
+          acc.setValues(target, 0);
+        }
      }
    }

    processSubs(target, filter, subDomain, false, null);
  }

+  /** 
+   * Helper method that resorts the slots (if needed).
+   * 
+   * @return a SlotAcc that should be used {@link SlotAcc#setValues} on the final buckets via 
+   *    {@link Slot#resortSlotNum} or null if no special SlotAcc was needed (ie: no resorting, or resorting 
+   *    on something already known/collected)
+   */
+  private SlotAcc resortSlots(Slot[] slots) throws IOException {
+    if (null == this.resort) {
+      return null; // Nothing to do.
+    }
+    assert ! fcontext.isShard();
+
+    // NOTE: getMultiplier() is confusing and weird and ment for use in PriorityQueue.lessThan,
+    // so it's backwards from what you'd expect in a Comparator...
+    final int resortMul = -1 * resort.sortDirection.getMultiplier();
+    
+    SlotAcc resortAcc = getTrivialSortingSlotAcc(this.resort);
+    if (null != resortAcc) {
+      // resorting on count or index is rare (and not particularly useful) but if someone chooses to do
+      // either of these we don't need to re-collect ... instead just re-sort the slots based on
+      // the previously collected values using the originally collected slot numbers...
+      if (resortAcc.equals(countAcc)) {
+        final Comparator<Slot> comparator = null != indexOrderAcc ?
+          (new Comparator<Slot>() {
+            public int compare(Slot x, Slot y) {
+              final int cmp = resortMul * countAcc.compare(x.slot, y.slot);
+              return  cmp != 0 ? cmp : indexOrderAcc.compare(x.slot, y.slot);
+            }
+          })
+          : (new Comparator<Slot>() {
+            public int compare(Slot x, Slot y) {
+              final int cmp = resortMul * countAcc.compare(x.slot, y.slot);
+              return  cmp != 0 ? cmp : Integer.compare(x.slot, y.slot);
+            }
+          });
+        Arrays.sort(slots, comparator);
+        return null;
+      }
+      if (resortAcc.equals(indexOrderAcc)) {
+        // obviously indexOrderAcc is not null, and no need for a fancy tie breaker...
+        Arrays.sort(slots, new Comparator<Slot>() {
+          public int compare(Slot x, Slot y) {
+            return resortMul * indexOrderAcc.compare(x.slot, y.slot);
+          }
+        });
+        return null;
+      }
+      // nothing else should be possible
+      assert false : "trivial resort isn't count or index: " + this.resort;
+    }
+
+    assert null == resortAcc;
+    for (SlotAcc acc : otherAccs) {
+      if (acc.key.equals(this.resort.sortVariable)) {
+        resortAcc = acc;
+        break;
+      }
+    }
+    // TODO: what if resortAcc is still null, ie: bad input? ... throw an error?  (see SOLR-13022)
+    // looks like equivilent sort code path silently ignores sorting if sortVariable isn't in accMap...
+    // ...and we get a deffered NPE when trying to collect.
+    assert null != resortAcc;
+    
+    final SlotAcc acc = resortAcc;
+    
+    // reset resortAcc to be (just) big enough for all the slots we care about...
+    acc.reset();
+    acc.resize(new FlatteningResizer(slots.length));
+    
+    // give each existing Slot a new resortSlotNum and let the resortAcc collect it...
+    for (int slotNum = 0; slotNum < slots.length; slotNum++) {
+      Slot slot = slots[slotNum];
+      slot.resortSlotNum = slotNum;
+      
+      assert null != slot.bucketFilter : "null filter for slot=" +slot.bucketVal;
+      
+      final DocSet subDomain = fcontext.searcher.getDocSet(slot.bucketFilter, fcontext.base);
+      acc.collect(subDomain, slotNum, s -> { return new SlotContext(slot.bucketFilter); } );
+    }
+    
+    // now resort all the Slots according to the new collected values...
+    final Comparator<Slot> comparator = null != indexOrderAcc ?
+      (new Comparator<Slot>() {
+        public int compare(Slot x, Slot y) {
+          final int cmp = resortMul * acc.compare(x.resortSlotNum, y.resortSlotNum);
+          return  cmp != 0 ? cmp : indexOrderAcc.compare(x.slot, y.slot);
+        }
+      })
+      : (new Comparator<Slot>() {
+        public int compare(Slot x, Slot y) {
+          final int cmp = resortMul * acc.compare(x.resortSlotNum, y.resortSlotNum);
+          return  cmp != 0 ? cmp : Integer.compare(x.slot, y.slot);
+        }
+      });
+    Arrays.sort(slots, comparator);
+    return acc;
+  }
+  
  @Override
  protected void processStats(SimpleOrderedMap<Object> bucket, Query bucketQ, DocSet docs, int docCount) throws IOException {
    if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
@ -733,4 +911,20 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
    return bucket;
  }

+  /** Resizes to the specified size, remapping all existing slots to slot 0 */
+  private static final class FlatteningResizer extends SlotAcc.Resizer {
+    private final int slotCount;
+    public FlatteningResizer(int slotCount) {
+      this.slotCount = slotCount;
+    }
+    @Override
+    public int getNewSize() {
+      return slotCount;
+    }
+    
+    @Override
+    public int getNewSlot(int oldSlot) {
+      return 0;
+    }
+  }
 }
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java
@ -44,8 +44,8 @@ public class FacetRangeMerger extends FacetRequestSortedMerger<FacetRange> {
  }

  @Override
-  public void sortBuckets() {
-    // regardless of mincount, every shard returns a consistent set of buckets which are already in the correct order
+  public void sortBuckets(final FacetRequest.FacetSort sort) {
+    // regardless of sort or mincount, every shard returns a consistent set of buckets which are already in the correct order
    sortedBuckets = new ArrayList<>( buckets.values() );
  }

--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Objects;
 import java.util.Map;

 import org.apache.lucene.search.Query;
@ -54,8 +55,40 @@ import static org.apache.solr.search.facet.FacetRequest.RefineMethod.NONE;
 */
 public abstract class FacetRequest {

+  /** Simple structure for encapsulating a sort variable and a direction */
+  public static final class FacetSort {
+    final String sortVariable;
+    final SortDirection sortDirection;
+    public FacetSort(final String sortVariable, final SortDirection sortDirection) {
+      assert null != sortVariable;
+      assert null != sortDirection;
+      
+      this.sortVariable = sortVariable;
+      this.sortDirection = sortDirection;
+    }
+    public boolean equals(Object other) {
+      if (other instanceof FacetSort) {
+        final FacetSort that = (FacetSort)other;
+        return this.sortVariable.equals(that.sortVariable)
+          && this.sortDirection.equals(that.sortDirection);
+      }
+      return false;
+    }
+    public int hashCode() {
+      return Objects.hash(sortVariable, sortDirection);
+    }
+    public String toString() {
+      return sortVariable + " " + sortDirection;
+    }
+    
+    /** Commonly Re-used "count desc" (default) */
+    public static final FacetSort COUNT_DESC = new FacetSort("count", SortDirection.desc);
+    /** Commonly Re-used "index asc" (index order / streaming) */
+    public static final FacetSort INDEX_ASC = new FacetSort("index", SortDirection.asc);
+  }
+  
  public static enum SortDirection {
-    asc(-1) ,
+    asc(-1),
    desc(1);

    private final int multiplier;
@ -893,8 +926,7 @@ class FacetFieldParser extends FacetParser<FacetField> {
    if (arg instanceof String) {
      // just the field name...
      facet.field = (String)arg;
-      parseSort( null );  // TODO: defaults
-
+      
    } else if (arg instanceof Map) {
      Map<String, Object> m = (Map<String, Object>) arg;
      facet.field = getField(m);
@ -921,7 +953,13 @@ class FacetFieldParser extends FacetParser<FacetField> {
      Object o = m.get("facet");
      parseSubs(o);

-      parseSort( m.get(SORT) );
+      // TODO: SOLR-13022 ... validate the sortVariabls against the subs.
+      facet.sort = parseSort( m.get(SORT) );
+      facet.prelim_sort = parseSort( m.get("prelim_sort") );
+    }
+
+    if (null == facet.sort) {
+      facet.sort = FacetRequest.FacetSort.COUNT_DESC;
    }

    return facet;
@ -932,21 +970,23 @@ class FacetFieldParser extends FacetParser<FacetField> {
  // sort : 'mystat desc'
  // OR
  // sort : { mystat : 'desc' }
-  private void parseSort(Object sort) {
+  private static FacetRequest.FacetSort parseSort(Object sort) {
    if (sort == null) {
-      facet.sortVariable = "count";
-      facet.sortDirection = FacetRequest.SortDirection.desc;
+      return null;
    } else if (sort instanceof String) {
      String sortStr = (String)sort;
      if (sortStr.endsWith(" asc")) {
-        facet.sortVariable = sortStr.substring(0, sortStr.length()-" asc".length());
-        facet.sortDirection = FacetRequest.SortDirection.asc;
+        return new FacetRequest.FacetSort(sortStr.substring(0, sortStr.length()-" asc".length()),
+                                          FacetRequest.SortDirection.asc);
      } else if (sortStr.endsWith(" desc")) {
-        facet.sortVariable = sortStr.substring(0, sortStr.length()-" desc".length());
-        facet.sortDirection = FacetRequest.SortDirection.desc;
+        return new FacetRequest.FacetSort(sortStr.substring(0, sortStr.length()-" desc".length()),
+                                          FacetRequest.SortDirection.desc);
      } else {
-        facet.sortVariable = sortStr;
-        facet.sortDirection = "index".equals(facet.sortVariable) ? FacetRequest.SortDirection.asc : FacetRequest.SortDirection.desc;  // default direction for "index" is ascending
+        return new FacetRequest.FacetSort(sortStr,
+                                          // default direction for "index" is ascending
+                                          ("index".equals(sortStr)
+                                           ? FacetRequest.SortDirection.asc
+                                           : FacetRequest.SortDirection.desc));
      }
    } else {
     // sort : { myvar : 'desc' }
@ -955,10 +995,8 @@ class FacetFieldParser extends FacetParser<FacetField> {
      Map.Entry<String,Object> entry = map.entrySet().iterator().next();
      String k = entry.getKey();
      Object v = entry.getValue();
-      facet.sortVariable = k;
-      facet.sortDirection = FacetRequest.SortDirection.valueOf(v.toString());
+      return new FacetRequest.FacetSort(k, FacetRequest.SortDirection.valueOf(v.toString()));
    }
-
  }
 }

--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java
@ -79,25 +79,27 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
    }
  }

-  public void sortBuckets() {
+
+  public void sortBuckets(final FacetRequest.FacetSort sort) {
+    // NOTE: we *always* re-init from buckets, because it may have been modified post-refinement 
    sortedBuckets = new ArrayList<>( buckets.values() );

    Comparator<FacetBucket> comparator = null;

-    final FacetRequest.SortDirection direction = freq.sortDirection;
+    final FacetRequest.SortDirection direction = sort.sortDirection;
    final int sortMul = direction.getMultiplier();

-    if ("count".equals(freq.sortVariable)) {
+    if ("count".equals(sort.sortVariable)) {
      comparator = (o1, o2) -> {
        int v = -Long.compare(o1.count, o2.count) * sortMul;
        return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v;
      };
      Collections.sort(sortedBuckets, comparator);
-    } else if ("index".equals(freq.sortVariable)) {
+    } else if ("index".equals(sort.sortVariable)) {
      comparator = (o1, o2) -> -o1.bucketValue.compareTo(o2.bucketValue) * sortMul;
      Collections.sort(sortedBuckets, comparator);
    } else {
-      final String key = freq.sortVariable;
+      final String key = sort.sortVariable;

      /**
       final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()];
@ -154,6 +156,7 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
      out.addAll(nulls);
      sortedBuckets = out;
    }
+    assert null != sortedBuckets;
  }

  boolean isBucketComplete(FacetBucket bucket, Context mcontext) {
@ -181,6 +184,8 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
      return null;
    }

+    final FacetRequest.FacetSort initial_sort = null == freq.prelim_sort ? freq.sort : freq.prelim_sort;
+    
    // Tags for sub facets that have partial facets somewhere in their children.
    // If we are missing a bucket for this shard, we'll need to get the specific buckets that need refining.
    Collection<String> tagsWithPartial = mcontext.getSubsWithPartial(freq);
@ -206,9 +211,9 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted

        // when we don't have to worry about mincount pruning, there is no need for any
        // over refinement for these sorts..
-        if (freq.mincount <= 1 && ("index".equals(freq.sortVariable)
-                                   || ("count".equals(freq.sortVariable)
-                                       && FacetRequest.SortDirection.desc == freq.sortDirection))) {
+        if (freq.mincount <= 1 && ("index".equals(initial_sort.sortVariable)
+                                   || ("count".equals(initial_sort.sortVariable)
+                                       && FacetRequest.SortDirection.desc == initial_sort.sortDirection))) {
          // No-Op
        } else if (0 <= freq.overrequest) {
          // if user asked for an explicit amount of overrequesting,
@ -241,9 +246,9 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
      // todo: but we may need to filter.... simplify by always sorting?
      bucketList = buckets.values();
    } else {
-      // only sort once
+      // don't re-sort (the prerefinement values) if our subclass already did it
      if (sortedBuckets == null) {
-        sortBuckets();  // todo: make sure this filters buckets as well
+        sortBuckets(initial_sort);  // todo: make sure this filters buckets as well
      }
      bucketList = sortedBuckets;
    }
--- a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
@ -39,10 +39,10 @@ class DebugAgg extends AggValueSource {
    @Override
    public ValueSource parse(FunctionQParser fp) throws SyntaxError {
      parses.incrementAndGet();
-      final String what = fp.hasMoreArguments() ? fp.parseId() : "debug";
+      final String what = fp.hasMoreArguments() ? fp.parseId() : "wrap";

      switch (what) {
-        case "debug": return new DebugAgg(fp.getLocalParams());
+        case "wrap": return new DebugAgg(fp);
        case "numShards": return new DebugAggNumShards();
        default: /* No-Op */
      }
@ -59,14 +59,17 @@ class DebugAgg extends AggValueSource {
   * wrap them in defaults from the request
   */
  public final SolrParams localParams;
-  public DebugAgg(SolrParams localParams) {
+  public final AggValueSource inner;
+  
+  public DebugAgg(FunctionQParser fp) throws SyntaxError { 
    super("debug");
-    this.localParams = localParams;
+    this.localParams = fp.getLocalParams();
+    this.inner = fp.hasMoreArguments() ? fp.parseAgg(FunctionQParser.FLAG_IS_AGG) : new CountAgg();
  }

  @Override
-  public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
-    return new Acc(fcontext, numDocs, numSlots);
+  public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
+    return new Acc(fcontext, numDocs, numSlots, inner.createSlotAcc(fcontext, numDocs, numSlots));
  }

  @Override
@ -83,26 +86,35 @@ class DebugAgg extends AggValueSource {
    public static AtomicLong creates = new AtomicLong(0);
    public static AtomicLong resets = new AtomicLong(0);
    public static AtomicLong resizes = new AtomicLong(0);
+    public static AtomicLong collectDocs = new AtomicLong(0);
+    public static AtomicLong collectDocSets = new AtomicLong(0);
    public static Acc last;

-    public CountSlotAcc sub;
+    public SlotAcc sub;
    public int numDocs;
    public int numSlots;

-    public Acc(FacetContext fcontext, int numDocs, int numSlots) {
+    public Acc(FacetContext fcontext, int numDocs, int numSlots, SlotAcc sub) {
      super(fcontext);
      this.last = this;
      this.numDocs = numDocs;
      this.numSlots = numSlots;
+      this.sub = sub;
      creates.addAndGet(1);
-      sub = new CountSlotArrAcc(fcontext, numSlots);
-//      new RuntimeException("DEBUG Acc numSlots=" + numSlots).printStackTrace();
    }

    @Override
    public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
+      collectDocs.addAndGet(1);
      sub.collect(doc, slot, slotContext);
    }
+    
+
+    @Override
+    public int collect(DocSet docs, int slot, IntFunction<SlotContext> slotContext) throws IOException {
+      collectDocSets.addAndGet(1);
+      return sub.collect(docs, slot, slotContext);
+    }

    @Override
    public int compare(int slotA, int slotB) {
@ -137,11 +149,6 @@ class DebugAgg extends AggValueSource {
      sub.setNextReader(readerContext);
    }

-    @Override
-    public int collect(DocSet docs, int slot, IntFunction<SlotContext> slotContext) throws IOException {
-      return sub.collect(docs, slot, slotContext);
-    }
-
    @Override
    public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
      sub.key = this.key;  // TODO: Blech... this should be fixed
@ -156,7 +163,7 @@ class DebugAgg extends AggValueSource {

  @Override
  public FacetMerger createFacetMerger(Object prototype) {
-    return new FacetLongMerger();
+    return inner.createFacetMerger(prototype);
  }

  /** A simple agg that just returns the number of shards contributing to a bucket */
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
@ -426,6 +426,133 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
                 
    
  }
+
+  /** 
+   * When <code>prelim_sort</code> is used, all 'top bucket' choices for refinement should still be based on
+   * it, not the <code>sort</code> param, so this test is just some sanity checks that the presence of the 
+   * these params doesn't break anything in the refine / logic.
+   */
+  @Test
+  public void testRefinementMergingWithPrelimSort() throws Exception {
+
+    doTestRefine("{x : { type:terms, field:X, limit:2, refine:true, prelim_sort:'count desc', sort:'y asc'," +
+                 "       facet:{ y:'sum(y_i)' } } }",
+                 // shard0 response
+                 "{x: {buckets:[{val:x1, count:5, y:73}, {val:x2, count:3, y:13}], more:true } }",
+                 // shard1 response
+                 "{x: {buckets:[{val:x2, count:4, y:4}, {val:x3, count:2, y:22}], more:true } }",
+                 // shard0 expected refinement info
+                 null,
+                 // shard1 expected refinement info
+                 "=={x:{_l:[x1]}}");
+
+    // same test as above, but shard1 indicates it doesn't have any more results,
+    // so there shouldn't be any refinement
+    doTestRefine("{x : { type:terms, field:X, limit:2, refine:true, prelim_sort:'count desc', sort:'y asc'," +
+                 "       facet:{ y:'sum(y_i)' } } }",
+                 // shard0 response
+                 "{x: {buckets:[{val:x1, count:5, y:73}, {val:x2, count:3, y:13}], more:true } }",
+                 // shard1 response
+                 "{x: {buckets:[{val:x2, count:4, y:4}, {val:x3, count:2, y:22}] } }",
+                 // shard0 expected refinement info
+                 null,
+                 // shard1 expected refinement info
+                 null);
+  }
+
+  @Test
+  public void testPrelimSortingWithRefinement() throws Exception {
+    // NOTE: distributed prelim_sort testing in TestJsonFacets uses identical shards, so never needs
+    // refinement, so here we focus on the (re)sorting of different topN refined buckets
+    // after the prelim_sorting from diff shards
+  
+    initServers();
+    final Client client = servers.getClient(random().nextInt());
+    client.queryDefaults().set("shards", servers.getShards(), "debugQuery", Boolean.toString(random().nextBoolean()));
+
+    List<SolrClient> clients = client.getClientProvider().all();
+    assertTrue(clients.size() >= 3); // we only use 2, but assert 3 to also test empty shard
+    final SolrClient c0 = clients.get(0);
+    final SolrClient c1 = clients.get(1);
+
+    client.deleteByQuery("*:*", null);
+    int id = 0;
+
+    // client 0 // shard1: A=1,B=1,C=2 ...
+    c0.add(sdoc("id", id++, "cat_s","A", "price_i","1"));
+    c0.add(sdoc("id", id++, "cat_s","B", "price_i","1"));
+    c0.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
+    c0.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
+    // ... X=3,Y=3
+    c0.add(sdoc("id", id++, "cat_s","X", "price_i","1"));
+    c0.add(sdoc("id", id++, "cat_s","X", "price_i","1"));
+    c0.add(sdoc("id", id++, "cat_s","X", "price_i","1"));
+    c0.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
+    c0.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
+    c0.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
+    
+    // client 1 // shard2: X=1,Y=2,Z=2 ...
+    c1.add(sdoc("id", id++, "cat_s","X", "price_i","1"));
+    c1.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
+    c1.add(sdoc("id", id++, "cat_s","Y", "price_i","1"));
+    c1.add(sdoc("id", id++, "cat_s","Z", "price_i","1"));
+    c1.add(sdoc("id", id++, "cat_s","Z", "price_i","1"));
+    // ... C=4
+    c1.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
+    c1.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
+    c1.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
+    c1.add(sdoc("id", id++, "cat_s","C", "price_i","1"));
+    
+    // Whole Collection: A=1,B=1,Z=2,X=4,Y=5,C=6
+    client.commit();
+    
+    // in both cases, neither C nor Z make the cut for the top3 buckets in phase#1 (due to tie breaker), 
+    // so they aren't refined -- after refinement the re-sorting re-orders the buckets
+    client.testJQ(params("q", "*:*", "rows", "0", "json.facet", "{"
+                         + " cat_1 : { type:terms, field:cat_s, limit:3, overrequest:0"
+                         + "           , refine:true, prelim_sort:'count asc', sort:'index desc' }, "
+                         + " cat_2 : { type:terms, field:cat_s, limit:3, overrequest:0"
+                         + "           , refine:true, prelim_sort:'sum_p asc', sort:'count desc' "
+                         + "           , facet: { sum_p: 'sum(price_i)' } }"
+                         + "}")
+                  , "facets=={ count: "+id+","
+                  + "  cat_1:{ buckets:[ "
+                  + "            {val:X,count:4}," // index desc
+                  + "            {val:B,count:1}," 
+                  + "            {val:A,count:1}," 
+                  + "  ] },"
+                  + "  cat_2:{ buckets:[ "
+                  + "            {val:X,count:4,sum_p:4.0}," // count desc
+                  + "            {val:A,count:1,sum_p:1.0}," // index order tie break
+                  + "            {val:B,count:1,sum_p:1.0},"
+                  + "  ] }"
+                  + "}"
+                  );
+
+    // with some explicit overrefinement=2, we also refine C and Y, giving us those additional
+    // (fully populated) buckets to consider during re-sorting...
+    client.testJQ(params("q", "*:*", "rows", "0", "json.facet", "{"
+                         + " cat_1 : { type:terms, field:cat_s, limit:3, overrequest:0, overrefine:2"
+                         + "           , refine:true, prelim_sort:'count asc', sort:'index desc' }, "
+                         + " cat_2 : { type:terms, field:cat_s, limit:3, overrequest:0, overrefine:2"
+                         + "           , refine:true, prelim_sort:'sum_p asc', sort:'count desc' "
+                         + "           , facet: { sum_p: 'sum(price_i)' } }"
+                         + "}")
+                  , "facets=={ count: "+id+","
+                  + "  cat_1:{ buckets:[ "
+                  + "            {val:Y,count:5}," // index desc
+                  + "            {val:X,count:4}," 
+                  + "            {val:C,count:6}," 
+                  + "  ] },"
+                  + "  cat_2:{ buckets:[ "
+                  + "            {val:C,count:6,sum_p:6.0}," // count desc
+                  + "            {val:Y,count:5,sum_p:5.0},"
+                  + "            {val:X,count:4,sum_p:4.0},"
+                  + "  ] }"
+                  + "}"
+                  );
+  }
+
  
  @Test
  public void testSortedFacetRefinementPushingNonRefinedBucketBackIntoTopN() throws Exception {
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Random;
+import java.util.concurrent.atomic.AtomicLong;

 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import com.tdunning.math.stats.AVLTreeDigest;
@ -50,7 +51,7 @@ import org.junit.Test;

@LuceneTestCase.SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Lucene45","Appending"})
 public class TestJsonFacets extends SolrTestCaseHS {
-
+  
  private static SolrInstances servers;  // for distributed testing
  private static int origTableSize;
  private static FacetField.FacetMethod origDefaultFacetMethod;
@ -89,13 +90,20 @@ public class TestJsonFacets extends SolrTestCaseHS {
    }
  }

-  // tip: when debugging a test, comment out the @ParametersFactory and edit the constructor to be no-arg
+  // tip: when debugging failures, change this variable to DEFAULT_METHOD
+  // (or if only one method is problematic, set to that explicitly)
+  private static final FacetField.FacetMethod TEST_ONLY_ONE_FACET_METHOD
+    = null; // FacetField.FacetMethod.DEFAULT_METHOD;

  @ParametersFactory
  public static Iterable<Object[]> parameters() {
+    if (null != TEST_ONLY_ONE_FACET_METHOD) {
+      return Arrays.<Object[]>asList(new Object[] { TEST_ONLY_ONE_FACET_METHOD });
+    }
+    
    // wrap each enum val in an Object[] and return as Iterable
    return () -> Arrays.stream(FacetField.FacetMethod.values())
-        .map(it -> new Object[]{it}).iterator();
+      .map(it -> new Object[]{it}).iterator();
  }

  public TestJsonFacets(FacetField.FacetMethod defMethod) {
@ -435,18 +443,28 @@ public class TestJsonFacets extends SolrTestCaseHS {
             + "   } }"
             );
    
-    // simple single level facet w/skg stat & sorting
-    for (String sort : Arrays.asList("index asc", "skg desc")) {
-      // the relatedness score of each of our cat_s values is (conviniently) also alphabetical order
-      // so both of these sort options should produce identical output
-      // and testinging "index" sort allows the randomized use of "stream" processor as default to be tested
+    // simple single level facet w/skg stat & (re)sorting
+    for (String sort : Arrays.asList("sort:'index asc'",
+                                     "sort:'y desc'",
+                                     "sort:'z desc'",
+                                     "sort:'skg desc'",
+                                     "prelim_sort:'count desc', sort:'index asc'",
+                                     "prelim_sort:'count desc', sort:'y desc'",
+                                     "prelim_sort:'count desc', sort:'z desc'",
+                                     "prelim_sort:'count desc', sort:'skg desc'")) {
+      // the relatedness score of each of our cat_s values is (conviniently) also alphabetical order,
+      // (and the same order as 'sum(num_i) desc' & 'min(num_i) desc')
+      //
+      // So all of these re/sort options should produce identical output (since the num buckets is < limit)
+      // - Testing "index" sort allows the randomized use of "stream" processor as default to be tested.
+      // - Testing (re)sorts on other stats sanity checks code paths where relatedness() is a "defered" Agg
      assertJQ(req("q", "cat_s:[* TO *]", "rows", "0",
                   "fore", "where_s:NY", "back", "*:*",
                   "json.facet", ""
-                   + "{x: { type: terms, field: 'cat_s', sort: '"+sort+"', "
-                   + "      facet: { skg: 'relatedness($fore,$back)' } } }")
+                   + "{x: { type: terms, field: 'cat_s', "+sort+", "
+                   + "      facet: { skg: 'relatedness($fore,$back)', y:'sum(num_i)', z:'min(num_i)' } } }")
               , "facets=={count:5, x:{ buckets:["
-               + "   { val:'A', count:2, "
+               + "   { val:'A', count:2, y:5.0, z:2, "
               + "     skg : { relatedness: 0.00554, "
               //+ "             foreground_count: 1, "
               //+ "             foreground_size: 2, "
@ -455,7 +473,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
               + "             foreground_popularity: 0.16667,"
               + "             background_popularity: 0.33333, },"
               + "   }, "
-               + "   { val:'B', count:3, "
+               + "   { val:'B', count:3, y:-3.0, z:-5, "
               + "     skg : { relatedness: 0.0, " // perfectly average and uncorrolated
               //+ "             foreground_count: 1, "
               //+ "             foreground_size: 2, "
@ -467,6 +485,37 @@ public class TestJsonFacets extends SolrTestCaseHS {
               );
    }
    
+    // trivial sanity check that we can (re)sort on SKG after pre-sorting on count...
+    // ...and it's only computed for the top N buckets (based on our pre-sort)
+    for (int overrequest : Arrays.asList(0, 1, 42)) {
+      // based on our counts & relatedness values, the blackbox output should be the same for both
+      // overrequest values ... only DebugAgg stats should change...
+      DebugAgg.Acc.collectDocs.set(0);
+      DebugAgg.Acc.collectDocSets.set(0);
+      
+      assertJQ(req("q", "cat_s:[* TO *]", "rows", "0",
+                   "fore", "where_s:NJ", "back", "*:*",
+                   "json.facet", ""
+                   + "{x: { type: terms, field: 'cat_s', prelim_sort: 'count desc', sort:'skg desc', "
+                   + "      limit: 1, overrequest: " + overrequest + ", "
+                   + "      facet: { skg: 'debug(wrap,relatedness($fore,$back))' } } }")
+               , "facets=={count:5, x:{ buckets:["
+               + "   { val:'B', count:3, "
+               + "     skg : { relatedness: 0.00638, " 
+               //+ "             foreground_count: 2, "
+               //+ "             foreground_size: 3, "
+               //+ "             background_count: 3, "
+               //+ "             background_size: 6,"
+               + "             foreground_popularity: 0.33333,"
+               + "             background_popularity: 0.5 },"
+               + "   }, "
+               + " ] } } "
+               );
+      // at most 2 buckets, regardless of overrequest...
+      assertEqualsAndReset(0 < overrequest ? 2 : 1, DebugAgg.Acc.collectDocSets);
+      assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
+    }
+      
    // SKG used in multiple nested facets
    //
    // we'll re-use these params in 2 requests, one will simulate a shard request
@ -936,7 +985,6 @@ public class TestJsonFacets extends SolrTestCaseHS {
  }

  public void doStats(Client client, ModifiableSolrParams p) throws Exception {
-
    Map<String, List<String>> fieldLists = new HashMap<>();
    fieldLists.put("noexist", getAlternatives("noexist_s"));
    fieldLists.put("cat_s", getAlternatives("cat_s"));
@ -1165,6 +1213,31 @@ public class TestJsonFacets extends SolrTestCaseHS {
            ", f2:{  'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:6.0 }]} }"
    );

+    // test trivial re-sorting by stats
+    // (there are other more indepth tests of this in doTestPrelimSorting, but this let's us sanity check
+    // small responses with multiple templatized params of diff real types)
+    client.testJQ(params(p, "q", "*:*", "json.facet" // num_d
+                         , "{f1:{terms:{${terms} field:'${cat_s}', "
+                         + "     prelim_sort:'count desc', sort:'n1 desc', facet:{n1:'sum(${num_d})'}  }},"
+                         + " f2:{terms:{${terms} field:'${cat_s}', "
+                         + "     prelim_sort:'count asc', sort:'n1 asc', facet:{n1:'sum(${num_d})'}  }} }"
+                         )
+                  , "facets=={ 'count':6 "
+                  + ", f1:{  'buckets':[{ val:'A', count:2, n1:6.0 }, { val:'B', count:3, n1:-3.0}]}"
+                  + ", f2:{  'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:6.0 }]} }"
+    );
+    client.testJQ(params(p, "q", "*:*", "json.facet" // num_i
+                         , "{f1:{terms:{${terms} field:'${cat_s}', "
+                         + "     prelim_sort:'count desc', sort:'n1 desc', facet:{n1:'sum(${num_i})'}  }},"
+                         + " f2:{terms:{${terms} field:'${cat_s}', "
+                         + "     prelim_sort:'count asc', sort:'n1 asc', facet:{n1:'sum(${num_i})'}  }} }"
+                         )
+                  , "facets=={ 'count':6 "
+                  + ", f1:{  'buckets':[{ val:'A', count:2, n1:5.0 }, { val:'B', count:3, n1:-3.0}]}"
+                  + ", f2:{  'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:5.0 }]} }"
+    );
+
+    
    // test sorting by other stats and more than one facet
    client.testJQ(params(p, "q", "*:*"
            , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'sum(${num_d})', n2:'avg(${num_d})'}  }}" +
@ -2193,10 +2266,390 @@ public class TestJsonFacets extends SolrTestCaseHS {
      long refineParses = DebugAgg.parses.get() - startParses;
      assertEquals(noRefineParses, refineParses);
    }
-
-
  }

+  public void testPrelimSortingSingleNode() throws Exception {
+    doTestPrelimSortingSingleNode(false, false);
+  }
+  
+  public void testPrelimSortingSingleNodeExtraStat() throws Exception {
+    doTestPrelimSortingSingleNode(true, false);
+  }
+  
+  public void testPrelimSortingSingleNodeExtraFacet() throws Exception {
+    doTestPrelimSortingSingleNode(false, true);
+  }
+  
+  public void testPrelimSortingSingleNodeExtraStatAndFacet() throws Exception {
+    doTestPrelimSortingSingleNode(true, true);
+  }
+  
+  /** @see #doTestPrelimSorting */
+  public void doTestPrelimSortingSingleNode(final boolean extraAgg, final boolean extraSubFacet) throws Exception {
+    // we're not using Client.localClient because it doesn't provide a SolrClient to
+    // use in doTestPrelimSorting -- so instead we make a single node, and don't use any shards param...
+    final SolrInstances nodes = new SolrInstances(1, "solrconfig-tlog.xml", "schema_latest.xml");
+    try {
+      final Client client = nodes.getClient(random().nextInt());
+      client.queryDefaults().set("debugQuery", Boolean.toString(random().nextBoolean()) );
+      doTestPrelimSorting(client, extraAgg, extraSubFacet);
+    } finally {
+      nodes.stop();
+    }
+  }
+  
+  public void testPrelimSortingDistrib() throws Exception {
+    doTestPrelimSortingDistrib(false, false);
+  }
+  
+  public void testPrelimSortingDistribExtraStat() throws Exception {
+    doTestPrelimSortingDistrib(true, false);
+  }
+  
+  public void testPrelimSortingDistribExtraFacet() throws Exception {
+    doTestPrelimSortingDistrib(false, true);
+  }
+  
+  public void testPrelimSortingDistribExtraStatAndFacet() throws Exception {
+    doTestPrelimSortingDistrib(true, true);
+  }
+
+  /** @see #doTestPrelimSorting */
+  public void doTestPrelimSortingDistrib(final boolean extraAgg, final boolean extraSubFacet) throws Exception {
+    // we only use 2 shards, but we also want to to sanity check code paths if one (additional) shard is empty
+    final int totalShards = random().nextBoolean() ? 2 : 3;
+    final SolrInstances nodes = new SolrInstances(totalShards, "solrconfig-tlog.xml", "schema_latest.xml");
+    try {
+      final Client client = nodes.getClient(random().nextInt());
+      client.queryDefaults().set( "shards", nodes.getShards(),
+                                  "debugQuery", Boolean.toString(random().nextBoolean()) );
+      doTestPrelimSorting(client, extraAgg, extraSubFacet);
+    } finally {
+      nodes.stop();
+    }
+  }
+  
+  /**
+   * Helper method that indexes a fixed set of docs to exactly <em>two</em> of the SolrClients 
+   * involved in the current Client such that each shard is identical for the purposes of simplified 
+   * doc/facet counting/assertions -- if there is only one SolrClient (Client.local) then it sends that 
+   * single shard twice as many docs so the counts/assertions will be consistent.
+   *
+   * Note: this test doesn't demonstrate practical uses of prelim_sort.
+   * The scenerios it tests are actualy fairly absurd, but help to ensure that edge cases are covered.
+   *
+   * @param client client to use -- may be local or multishard
+   * @param extraAgg if an extra aggregation function should be included, this hits slightly diff code paths
+   * @param extraSubFacet if an extra sub facet should be included, this hits slightly diff code paths
+   */
+  public void doTestPrelimSorting(final Client client,
+                                  final boolean extraAgg,
+                                  final boolean extraSubFacet) throws Exception {
+    
+    client.deleteByQuery("*:*", null);
+    
+    List<SolrClient> clients = client.getClientProvider().all();
+    
+    // carefully craft two balanced shards (assuming we have at least two) and leave any other shards
+    // empty to help check the code paths of some shards returning no buckets.
+    //
+    // if we are in a single node sitaution, these clients will be the same, and we'll have the same
+    // total docs in our collection, but the numShardsWithData will be diff
+    // (which will affect some assertions)
+    final SolrClient shardA = clients.get(0);
+    final SolrClient shardB = clients.get(clients.size()-1);
+    final int numShardsWithData = (shardA == shardB) ? 1 : 2;
+
+    // for simplicity, each foo_s "term" exists on each shard in the same number of docs as it's numeric 
+    // value (so count should be double the term) and bar_i is always 1 per doc (so sum(bar_i)
+    // should always be the same as count)
+    int id = 0;
+    for (int i = 1; i <= 20; i++) {
+      for (int j = 1; j <= i; j++) {
+        shardA.add(new SolrInputDocument("id", ""+(++id), "foo_s", "foo_" + i, "bar_i", "1"));
+        shardB.add(new SolrInputDocument("id", ""+(++id), "foo_s", "foo_" + i, "bar_i", "1"));
+      }
+    }
+    assertEquals(420, id); // sanity check
+    client.commit();
+    DebugAgg.Acc.collectDocs.set(0);
+    DebugAgg.Acc.collectDocSets.set(0);
+
+    // NOTE: sorting by index can cause some optimizations when using type=enum|stream
+    // that cause our stat to be collected differently, so we have to account for that when
+    // looking at DebugAdd collect stats if/when the test framework picks those
+    // ...BUT... this only affects cloud, for single node prelim_sort overrides streaming
+    final boolean indexSortDebugAggFudge = ( 1 < numShardsWithData ) &&
+      (FacetField.FacetMethod.DEFAULT_METHOD.equals(FacetField.FacetMethod.STREAM) ||
+       FacetField.FacetMethod.DEFAULT_METHOD.equals(FacetField.FacetMethod.ENUM));
+    
+    
+    final String common = "refine:true, type:field, field:'foo_s', facet: { "
+      + "x: 'debug(wrap,sum(bar_i))' "
+      + (extraAgg ? ", y:'min(bar_i)'" : "")
+      + (extraSubFacet ? ", z:{type:query, q:'bar_i:0'}" : "")
+      + "}";
+    final String yz = (extraAgg ? "y:1, " : "") + (extraSubFacet ? "z:{count:0}, " : "");
+    
+    // really basic: top 5 by (prelim_sort) count, (re)sorted by a stat
+    client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
+                         , "{ foo_a:{ "+ common+", limit:5, overrequest:0, "
+                         + "          prelim_sort:'count desc', sort:'x asc' }"
+                         + "  foo_b:{ "+ common+", limit:5, overrequest:0, "
+                         + "          prelim_sort:'count asc', sort:'x desc' } }")
+                  , "facets=={ 'count':420, "
+                  + "  'foo_a':{ 'buckets':[" 
+                  + "    { val:foo_16, count:32, " + yz + "x:32.0},"
+                  + "    { val:foo_17, count:34, " + yz + "x:34.0},"
+                  + "    { val:foo_18, count:36, " + yz + "x:36.0},"
+                  + "    { val:foo_19, count:38, " + yz + "x:38.0},"
+                  + "    { val:foo_20, count:40, " + yz + "x:40.0},"
+                  + "] },"
+                  + "  'foo_b':{ 'buckets':[" 
+                  + "    { val:foo_5, count:10, " + yz + "x:10.0},"
+                  + "    { val:foo_4, count:8,  " + yz + "x:8.0},"
+                  + "    { val:foo_3, count:6,  " + yz + "x:6.0},"
+                  + "    { val:foo_2, count:4,  " + yz + "x:4.0},"
+                  + "    { val:foo_1, count:2,  " + yz + "x:2.0},"
+                  + "] },"
+                  + "}"
+                  );
+    // (re)sorting should prevent 'sum(bar_i)' from being computed for every doc
+    // only the choosen buckets should be collected (as a set) once per node...
+    assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
+    // 2 facets, 5 bucket, on each shard
+    assertEqualsAndReset(numShardsWithData * 2 * 5, DebugAgg.Acc.collectDocSets);
+
+    { // same really basic top 5 by (prelim_sort) count, (re)sorted by a stat -- w/allBuckets:true
+      // check code paths with and w/o allBuckets
+      // NOTE: allBuckets includes stats, but not other sub-facets...
+      final String aout = "allBuckets:{ count:420, "+ (extraAgg ? "y:1, " : "") + "x:420.0 }";
+      client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
+                           , "{ foo_a:{ " + common+", allBuckets:true, limit:5, overrequest:0, "
+                           + "          prelim_sort:'count desc', sort:'x asc' }"
+                           + "  foo_b:{ " + common+", allBuckets:true, limit:5, overrequest:0, "
+                           + "          prelim_sort:'count asc', sort:'x desc' } }")
+                    , "facets=={ 'count':420, "
+                    + "  'foo_a':{ " + aout + " 'buckets':[" 
+                    + "    { val:foo_16, count:32, " + yz + "x:32.0},"
+                    + "    { val:foo_17, count:34, " + yz + "x:34.0},"
+                    + "    { val:foo_18, count:36, " + yz + "x:36.0},"
+                    + "    { val:foo_19, count:38, " + yz + "x:38.0},"
+                    + "    { val:foo_20, count:40, " + yz + "x:40.0},"
+                    + "] },"
+                    + "  'foo_b':{ " + aout + " 'buckets':[" 
+                    + "    { val:foo_5, count:10, " + yz + "x:10.0},"
+                    + "    { val:foo_4, count:8,  " + yz + "x:8.0},"
+                    + "    { val:foo_3, count:6,  " + yz + "x:6.0},"
+                    + "    { val:foo_2, count:4,  " + yz + "x:4.0},"
+                    + "    { val:foo_1, count:2,  " + yz + "x:2.0},"
+                    + "] },"
+                    + "}"
+                    );
+      // because of allBuckets, we collect every doc on everyshard (x2 facets) in a single "all" slot...
+      assertEqualsAndReset(2 * 420, DebugAgg.Acc.collectDocs);
+      // ... in addition to collecting each of the choosen buckets (as sets) once per node...
+      // 2 facets, 5 bucket, on each shard
+      assertEqualsAndReset(numShardsWithData * 2 * 5, DebugAgg.Acc.collectDocSets);
+    }
+    
+    // pagination (with offset) should happen against the re-sorted list (up to the effective limit)
+    client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
+                         , "{ foo_a:{ "+common+", offset:2, limit:3, overrequest:0, "
+                         + "          prelim_sort:'count desc', sort:'x asc' }"
+                         + "  foo_b:{ "+common+", offset:2, limit:3, overrequest:0, "
+                         + "          prelim_sort:'count asc', sort:'x desc' } }")
+                  , "facets=={ 'count':420, "
+                  + "  'foo_a':{ 'buckets':[" 
+                  + "    { val:foo_18, count:36, " + yz + "x:36.0},"
+                  + "    { val:foo_19, count:38, " + yz + "x:38.0},"
+                  + "    { val:foo_20, count:40, " + yz + "x:40.0},"
+                  + "] },"
+                  + "  'foo_b':{ 'buckets':[" 
+                  + "    { val:foo_3, count:6,  " + yz + "x:6.0},"
+                  + "    { val:foo_2, count:4,  " + yz + "x:4.0},"
+                  + "    { val:foo_1, count:2,  " + yz + "x:2.0},"
+                  + "] },"
+                  + "}"
+                  );
+    assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
+    // 2 facets, 5 buckets (including offset), on each shard
+    assertEqualsAndReset(numShardsWithData * 2 * 5, DebugAgg.Acc.collectDocSets);
+    
+    // when overrequesting is used, the full list of candidate buckets should be considered
+    client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
+                         , "{ foo_a:{ "+common+", limit:5, overrequest:5, "
+                         + "          prelim_sort:'count desc', sort:'x asc' }"
+                         + "  foo_b:{ "+common+", limit:5, overrequest:5, "
+                         + "          prelim_sort:'count asc', sort:'x desc' } }")
+                  , "facets=={ 'count':420, "
+                  + "  'foo_a':{ 'buckets':[" 
+                  + "    { val:foo_11, count:22, " + yz + "x:22.0},"
+                  + "    { val:foo_12, count:24, " + yz + "x:24.0},"
+                  + "    { val:foo_13, count:26, " + yz + "x:26.0},"
+                  + "    { val:foo_14, count:28, " + yz + "x:28.0},"
+                  + "    { val:foo_15, count:30, " + yz + "x:30.0},"
+                  + "] },"
+                  + "  'foo_b':{ 'buckets':[" 
+                  + "    { val:foo_10, count:20, " + yz + "x:20.0},"
+                  + "    { val:foo_9, count:18,  " + yz + "x:18.0},"
+                  + "    { val:foo_8, count:16,  " + yz + "x:16.0},"
+                  + "    { val:foo_7, count:14,  " + yz + "x:14.0},"
+                  + "    { val:foo_6, count:12,  " + yz + "x:12.0},"
+                  + "] },"
+                  + "}"
+                  );
+    assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
+    // 2 facets, 10 buckets (including overrequest), on each shard
+    assertEqualsAndReset(numShardsWithData * 2 * 10, DebugAgg.Acc.collectDocSets);
+
+    { // for an (effectively) unlimited facet, then from the black box perspective of the client,
+      // preliminary sorting should be completely ignored...
+      final StringBuilder expected = new StringBuilder("facets=={ 'count':420, 'foo_a':{ 'buckets':[\n");
+      for (int i = 20; 0 < i; i--) {
+        final int x = i * 2;
+        expected.append("{ val:foo_"+i+", count:"+x+", " + yz + "x:"+x+".0},\n");
+      }
+      expected.append("] } }");
+      for (int limit : Arrays.asList(-1, 100000)) {
+        for (String sortOpts : Arrays.asList("sort:'x desc'",
+                                             "prelim_sort:'count asc', sort:'x desc'",
+                                             "prelim_sort:'index asc', sort:'x desc'")) {
+          final String snippet = "limit: " + limit + ", " + sortOpts;
+          client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
+                               , "{ foo_a:{ "+common+", " + snippet + "}}")
+                        , expected.toString());
+
+          // the only difference from a white box perspective, is when/if we are 
+          // optimized to use the sort SlotAcc during collection instead of the prelim_sort SlotAcc..
+          // (ie: sub facet preventing single pass (re)sort in single node mode)
+          if (((0 < limit || extraSubFacet) && snippet.contains("prelim_sort")) &&
+              ! (indexSortDebugAggFudge && snippet.contains("index asc"))) {
+            // by-pass single pass collection, do everything as sets...
+            assertEqualsAndReset(snippet, numShardsWithData * 20, DebugAgg.Acc.collectDocSets);
+            assertEqualsAndReset(snippet, 0, DebugAgg.Acc.collectDocs);
+          } else { // simple sort on x, or optimized single pass (re)sort, or indexSortDebugAggFudge
+            // no sets should have been (post) collected for our stat
+            assertEqualsAndReset(snippet, 0, DebugAgg.Acc.collectDocSets);
+            // every doc should be collected...
+            assertEqualsAndReset(snippet, 420, DebugAgg.Acc.collectDocs);
+          }
+        }
+      }
+    }
+
+    // test all permutations of (prelim_sort | sort) on (index | count | stat) since there are
+    // custom sort codepaths for index & count that work differnetly then general stats
+    //
+    // NOTE: there's very little value in re-sort by count/index after prelim_sort on something more complex,
+    // typically better to just ignore the prelim_sort, but we're testing it for completeness
+    // (and because you *might* want to prelim_sort by some function, for the purpose of "sampling" the
+    // top results and then (re)sorting by count/index)
+    for (String numSort : Arrays.asList("count", "x")) { // equivilent ordering
+      client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
+                           , "{ foo_a:{ "+common+", limit:10, overrequest:0, "
+                           + "          prelim_sort:'"+numSort+" asc', sort:'index desc' }"
+                           + "  foo_b:{ "+common+", limit:10, overrequest:0, "
+                           + "          prelim_sort:'index asc', sort:'"+numSort+" desc' } }")
+                    , "facets=={ 'count':420, "
+                    + "  'foo_a':{ 'buckets':[" 
+                    + "    { val:foo_9,  count:18, " + yz + "x:18.0},"
+                    + "    { val:foo_8,  count:16, " + yz + "x:16.0},"
+                    + "    { val:foo_7,  count:14, " + yz + "x:14.0},"
+                    + "    { val:foo_6,  count:12, " + yz + "x:12.0},"
+                    + "    { val:foo_5,  count:10, " + yz + "x:10.0},"
+                    + "    { val:foo_4,  count:8,  " + yz + "x:8.0},"
+                    + "    { val:foo_3,  count:6,  " + yz + "x:6.0},"
+                    + "    { val:foo_2,  count:4,  " + yz + "x:4.0},"
+                    + "    { val:foo_10, count:20, " + yz + "x:20.0},"
+                    + "    { val:foo_1,  count:2,  " + yz + "x:2.0},"
+                    + "] },"
+                    + "  'foo_b':{ 'buckets':[" 
+                    + "    { val:foo_18, count:36, " + yz + "x:36.0},"
+                    + "    { val:foo_17, count:34, " + yz + "x:34.0},"
+                    + "    { val:foo_16, count:32, " + yz + "x:32.0},"
+                    + "    { val:foo_15, count:30, " + yz + "x:30.0},"
+                    + "    { val:foo_14, count:28, " + yz + "x:28.0},"
+                    + "    { val:foo_13, count:26, " + yz + "x:26.0},"
+                    + "    { val:foo_12, count:24, " + yz + "x:24.0},"
+                    + "    { val:foo_11, count:22, " + yz + "x:22.0},"
+                    + "    { val:foo_10, count:20, " + yz + "x:20.0},"
+                    + "    { val:foo_1,  count:2,  " + yz + "x:2.0},"
+                    + "] },"
+                    + "}"
+                    );
+      // since these behave differently, defer DebugAgg counter checks until all are done...
+    }
+    // These 3 permutations defer the compuation of x as docsets,
+    // so it's 3 x (10 buckets on each shard) (but 0 direct docs)
+    //      prelim_sort:count, sort:index
+    //      prelim_sort:index, sort:x
+    //      prelim_sort:index, sort:count
+    // ...except when streaming, prelim_sort:index does no docsets.
+    assertEqualsAndReset((indexSortDebugAggFudge ? 1 : 3) * numShardsWithData * 10,
+                         DebugAgg.Acc.collectDocSets);
+    // This is the only situation that should (always) result in every doc being collected (but 0 docsets)...
+    //      prelim_sort:x,     sort:index
+    // ...but the (2) prelim_sort:index streaming situations above will also cause all the docs in the first
+    // 10+1 buckets to be collected (enum checks limit+1 to know if there are "more"...
+    assertEqualsAndReset(420 + (indexSortDebugAggFudge ?
+                                2 * numShardsWithData * (1+10+11+12+13+14+15+16+17+18+19) : 0),
+                         DebugAgg.Acc.collectDocs);
+
+    // sanity check of prelim_sorting in a sub facet
+    client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
+                         , "{ bar:{ type:query, query:'foo_s:[foo_10 TO foo_19]', facet: {"
+                         + "        foo:{ "+ common+", limit:5, overrequest:0, "
+                         + "              prelim_sort:'count desc', sort:'x asc' } } } }")
+                  , "facets=={ 'count':420, "
+                  + " 'bar':{ 'count':290, "
+                  + "    'foo':{ 'buckets':[" 
+                  + "      { val:foo_15, count:30, " + yz + "x:30.0},"
+                  + "      { val:foo_16, count:32, " + yz + "x:32.0},"
+                  + "      { val:foo_17, count:34, " + yz + "x:34.0},"
+                  + "      { val:foo_18, count:36, " + yz + "x:36.0},"
+                  + "      { val:foo_19, count:38, " + yz + "x:38.0},"
+                  + "    ] },"
+                  + "  },"
+                  + "}"
+                  );
+    // the prelim_sort should prevent 'sum(bar_i)' from being computed for every doc
+    // only the choosen buckets should be collected (as a set) once per node...
+    assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
+    // 5 bucket, on each shard
+    assertEqualsAndReset(numShardsWithData * 5, DebugAgg.Acc.collectDocSets);
+
+    { // sanity check how defered stats are handled
+      
+      // here we'll prelim_sort & sort on things that are both "not x" and using the debug() counters
+      // (wrapping x) to assert that 'x' is correctly defered and only collected for the final top buckets
+      final List<String> sorts = new ArrayList<String>(Arrays.asList("index asc", "count asc"));
+      if (extraAgg) {
+        sorts.add("y asc"); // same for every bucket, but index order tie breaker should kick in
+      }
+      for (String s : sorts) {
+        client.testJQ(params("q", "*:*", "rows", "0", "json.facet"
+                             , "{ foo:{ "+ common+", limit:5, overrequest:0, "
+                             + "          prelim_sort:'count desc', sort:'"+s+"' } }")
+                      , "facets=={ 'count':420, "
+                      + "  'foo':{ 'buckets':[" 
+                      + "    { val:foo_16, count:32, " + yz + "x:32.0},"
+                      + "    { val:foo_17, count:34, " + yz + "x:34.0},"
+                      + "    { val:foo_18, count:36, " + yz + "x:36.0},"
+                      + "    { val:foo_19, count:38, " + yz + "x:38.0},"
+                      + "    { val:foo_20, count:40, " + yz + "x:40.0},"
+                      + "] } }"
+                      );
+        // Neither prelim_sort nor sort should need 'sum(bar_i)' to be computed for every doc
+        // only the choosen buckets should be collected (as a set) once per node...
+        assertEqualsAndReset(0, DebugAgg.Acc.collectDocs);
+        // 5 bucket, on each shard
+        assertEqualsAndReset(numShardsWithData * 5, DebugAgg.Acc.collectDocSets);
+      }
+    }
+  }
+
+  
  @Test
  public void testOverrequest() throws Exception {
    initServers();
@ -2796,4 +3249,16 @@ public class TestJsonFacets extends SolrTestCaseHS {
    hll.addRaw(987654321);
  }

+
+  /** atomicly resets the acctual AtomicLong value matches the expected and resets it to  0 */
+  private static final void assertEqualsAndReset(String msg, long expected, AtomicLong actual) {
+    final long current = actual.getAndSet(0);
+    assertEquals(msg, expected, current);
+  }
+  /** atomicly resets the acctual AtomicLong value matches the expected and resets it to  0 */
+  private static final void assertEqualsAndReset(long expected, AtomicLong actual) {
+    final long current = actual.getAndSet(0);
+    assertEquals(expected, current);
+  }
+  
 }
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsStatsParsing.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsStatsParsing.java
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.IntFieldSource;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.request.SolrQueryRequest;
 import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.not;
 import static org.hamcrest.core.IsInstanceOf.instanceOf;

 import org.noggit.ObjectBuilder;
@ -36,6 +37,20 @@ public class TestJsonFacetsStatsParsing extends SolrTestCaseJ4 {
    initCore("solrconfig-tlog.xml","schema15.xml");
  }

+  public void testSortEquality() throws Exception {
+    assertEquals(new FacetRequest.FacetSort("count", FacetRequest.SortDirection.desc),
+                 FacetRequest.FacetSort.COUNT_DESC);
+    assertEquals(new FacetRequest.FacetSort("index", FacetRequest.SortDirection.asc),
+                 FacetRequest.FacetSort.INDEX_ASC);
+    assertEquals(new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.asc),
+                 new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.asc));
+    // negative assertions...
+    assertThat(new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.desc),
+               not(new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.asc)));
+    assertThat(new FacetRequest.FacetSort("bar", FacetRequest.SortDirection.desc),
+               not(new FacetRequest.FacetSort("foo", FacetRequest.SortDirection.desc)));
+  }
+  
  public void testEquality() throws IOException {
    try (SolrQueryRequest req = req("custom_req_param","foo_i",
                                    "overridden_param","xxxxx_i")) {
--- a/solr/solr-ref-guide/src/json-facet-api.adoc
+++ b/solr/solr-ref-guide/src/json-facet-api.adoc
@ -218,7 +218,9 @@ json.facet={
 |field |The field name to facet over.
 |offset |Used for paging, this skips the first N buckets. Defaults to 0.
 |limit |Limits the number of buckets returned. Defaults to 10.
-|sort |Specifies how to sort the buckets produced. “count” specifies document count, “index” sorts by the index (natural) order of the bucket value. One can also sort by any <<json-facet-api.adoc#aggregation-functions,facet function / statistic>> that occurs in the bucket. The default is “count desc”. This parameter may also be specified in JSON like `sort:{count:desc}`. The sort order may either be “asc” or “desc”
+|sort |Specifies how to sort the buckets produced.
+
+“count” specifies document count, “index” sorts by the index (natural) order of the bucket value. One can also sort by any <<json-facet-api.adoc#aggregation-functions,facet function / statistic>> that occurs in the bucket. The default is “count desc”. This parameter may also be specified in JSON like `sort:{count:desc}`. The sort order may either be “asc” or “desc”
 |overrequest a|
 Number of buckets beyond the `limit` to internally request from shards during a distributed search.

@ -248,6 +250,7 @@ This parameter indicates the facet algorithm to use:
 * "stream" Presently equivalent to "enum"
 * "smart" Pick the best method for the field type (this is the default)

+|prelim_sort |An optional parameter for specifying an approximation of the final `sort` to use during initial collection of top buckets when the <<json-facet-api.adoc#sorting-facets-by-nested-functions,`sort` param is very costly>>.
 |===

 == Query Facet
@ -532,13 +535,13 @@ By default "top authors" is defined by simple document count descending, but we

 === Sorting Facets By Nested Functions

-The default sort for a field or terms facet is by bucket count descending. We can optionally sort ascending or descending by any facet function that appears in each bucket.
+The default sort for a field or terms facet is by bucket count descending. We can optionally `sort` ascending or descending by any facet function that appears in each bucket.

 [source,java]
 ----
 {
  categories:{
-    type : terms      // terms facet creates a bucket for each indexed term in the field
+    type : terms,     // terms facet creates a bucket for each indexed term in the field
    field : cat,
    sort : "x desc",  // can also use sort:{x:desc}
    facet : {
@ -549,6 +552,28 @@ The default sort for a field or terms facet is by bucket count descending. We ca
 }
 ----

+In some situations the desired `sort` may be an aggregation function that is very costly to compute for every bucket.  A `prelim_sort` option can be used to specify an approximation of the `sort`, for initially ranking the buckets to determine the top candidates (based on the `limit` and `overrequest`).  Only after the top candidate buckets have been refined, will the actual `sort` be used.
+
+[source,java]
+----
+{
+  categories:{
+    type : terms,
+    field : cat,
+    refine: true,
+    limit: 10,
+    overrequest: 100,
+    prelim_sort: "sales_rank desc",
+    sort : "prod_quality desc",
+    facet : {
+      prod_quality : "avg(div(prod(rating,sales_rank),prod(num_returns,price)))"
+      sales_rank : "sum(sales_rank)"
+    }
+  }
+}
+----
+
+
 == Changing the Domain

 As discussed above, facets compute buckets or statistics based on a "domain" which is typically implicit:
@ -805,6 +830,11 @@ When using the extended `type:func` syntax for specifying a `relatedness()` aggr

 This can be particularly useful when using a descending sorting on `relatedness()` with foreground and background queries that are disjoint, to ensure the "top buckets" are all relevant to both sets.

+[TIP]
+====
+When sorting on `relatedness(...)` requests can be processed much more quickly by adding a `prelim_sort: "count desc"` option.  Increasing the `overrequest` can help improve the accuracy of the top buckets.
+====
+
 === Semantic Knowledge Graph Example

 .Sample Documents