SOLR-3240: add "spellcheck.collateMaxCollectDocs" for estimating collation hit-counts.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1479638 13f79535-47bb-0310-9956-ffa450edef68
2013-05-06 16:49:46 +00:00 · 2013-05-06 16:49:46 +00:00 · 7141cb35a7
parent a57bc586e2
commit 7141cb35a7
8 changed files with 345 additions and 26 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -66,6 +66,12 @@ New Features
 * SOLR-4761: Add option to plugin a merged segment warmer into solrconfig.xml
  (Mark Miller, Mike McCandless, Robert Muir)

+* SOLR-3240: Add "spellcheck.collateMaxCollectDocs" option so that when testing
+  potential Collations against the index, SpellCheckComponent will only collect
+  n documents, thereby estimating the hit-count.  This is a performance optimization
+  in cases where exact hit-counts are unnecessary.  Also, when "collateExtendedResults"
+  is false, this optimization is always made (James Dyer).
+
 Bug Fixes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@ -214,10 +214,20 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
    int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
    int maxCollationEvaluations = params.getInt(SPELLCHECK_MAX_COLLATION_EVALUATIONS, 10000);
    boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
+    int maxCollationCollectDocs = params.getInt(SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, 0);
+    // If not reporting hits counts, don't bother collecting more than 1 document per try.
+    if (!collationExtendedResults) {
+      maxCollationCollectDocs = 1;
+    }
    boolean shard = params.getBool(ShardParams.IS_SHARD, false);
-
-    SpellCheckCollator collator = new SpellCheckCollator();
-    List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations, suggestionsMayOverlap);
+    SpellCheckCollator collator = new SpellCheckCollator()
+        .setMaxCollations(maxCollations)
+        .setMaxCollationTries(maxCollationTries)
+        .setMaxCollationEvaluations(maxCollationEvaluations)
+        .setSuggestionsMayOverlap(suggestionsMayOverlap)
+        .setDocCollectionLimit(maxCollationCollectDocs)
+        .setReportHits(collationExtendedResults);
+    List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb);
    //by sorting here we guarantee a non-distributed request returns all 
    //results in the same order as a distributed request would,
    //even in cases when the internal rank is the same.
--- a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
@ -0,0 +1,85 @@
+package org.apache.solr.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+/**
+ * <p>
+ *  A wrapper {@link Collector} that throws {@link EarlyTerminatingCollectorException}) 
+ *  once a specified maximum number of documents are collected.
+ * </p>
+ */
+public class EarlyTerminatingCollector extends Collector {
+  private int numCollected;
+  private int lastDocId = -1;
+  private int maxDocsToCollect;
+  private Collector delegate;
+  
+  /**
+   * <p>
+   *  Wraps a {@link Collector}, throwing {@link EarlyTerminatingCollectorException} 
+   *  once the specified maximum is reached.
+   * </p>
+   * @param delegate - the Collector to wrap.
+   * @param maxDocsToCollect - the maximum number of documents to Collect
+   * 
+   */
+  public EarlyTerminatingCollector(Collector delegate, int maxDocsToCollect) {
+    this.delegate = delegate;
+    this.maxDocsToCollect = maxDocsToCollect;
+  }
+
+  @Override
+  public boolean acceptsDocsOutOfOrder() {
+    return delegate.acceptsDocsOutOfOrder();
+  }
+
+  @Override
+  public void collect(int doc) throws IOException {
+    delegate.collect(doc);
+    lastDocId = doc;    
+    numCollected++;  
+    if(numCollected==maxDocsToCollect) {
+      throw new EarlyTerminatingCollectorException(numCollected, lastDocId);
+    }
+  }
+  @Override
+  public void setNextReader(AtomicReaderContext context) throws IOException {
+    delegate.setNextReader(context);    
+  }
+  @Override
+  public void setScorer(Scorer scorer) throws IOException {
+    delegate.setScorer(scorer);    
+  }
+  public int getNumCollected() {
+    return numCollected;
+  }
+  public void setNumCollected(int numCollected) {
+    this.numCollected = numCollected;
+  }
+  public int getLastDocId() {
+    return lastDocId;
+  }
+  public void setLastDocId(int lastDocId) {
+    this.lastDocId = lastDocId;
+  }
+}
--- a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
+++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
@ -0,0 +1,45 @@
+package org.apache.solr.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Thrown by {@link EarlyTerminatingCollector} when the maximum to abort
+ * the scoring / collection process early, when the specified maximum number
+ * of documents were collected.
+ */
+public class EarlyTerminatingCollectorException extends RuntimeException {
+  private static final long serialVersionUID = 5939241340763428118L;  
+  private int lastDocId = -1;
+  private int numberCollected;
+  
+  public EarlyTerminatingCollectorException(int numberCollected, int lastDocId) {
+    this.numberCollected = numberCollected;
+    this.lastDocId = lastDocId;
+  }
+  public int getLastDocId() {
+    return lastDocId;
+  }
+  public void setLastDocId(int lastDocId) {
+    this.lastDocId = lastDocId;
+  }
+  public int getNumberCollected() {
+    return numberCollected;
+  }
+  public void setNumberCollected(int numberCollected) {
+    this.numberCollected = numberCollected;
+  }
+}
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -99,6 +99,7 @@ import org.apache.solr.request.UnInvertedField;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.spelling.QueryConverter;
 import org.apache.solr.update.SolrIndexConfig;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -1235,7 +1236,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
  public static final int GET_DOCSET            = 0x40000000;
  static final int NO_CHECK_FILTERCACHE  = 0x20000000;
  static final int NO_SET_QCACHE         = 0x10000000;
-
+  public static final int TERMINATE_EARLY = 0x04;
+  public static final int FORCE_INORDER_COLLECTION = 0x08;
  public static final int GET_DOCLIST           =        0x02; // get the documents actually returned in a response
  public static final int GET_SCORES             =       0x01;

@ -1394,7 +1396,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
    float[] scores;

    boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
-
+    boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY;
+    
    Query query = QueryUtils.makeQueryable(cmd.getQuery());

    ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
@ -1446,7 +1449,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
          }
        };
      }
-      
+      if (terminateEarly) {
+        collector = new EarlyTerminatingCollector(collector, cmd.len);
+      }
      if( timeAllowed > 0 ) {
        collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
      }
@ -1481,6 +1486,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
        topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
      }
      Collector collector = topCollector;
+      if (terminateEarly) {
+        collector = new EarlyTerminatingCollector(collector, cmd.len);
+      }
      if( timeAllowed > 0 ) {
        collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
      }
@ -1529,6 +1537,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
    DocSet set;

    boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
+    boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY;
    int maxDoc = maxDoc();
    int smallSetSize = maxDoc>>6;

@ -1568,7 +1577,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
           }
         });
       }
-
+       if (terminateEarly) {
+         collector = new EarlyTerminatingCollector(collector, cmd.len);
+       }
       if( timeAllowed > 0 ) {
         collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
       }
@ -1604,7 +1615,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn

      DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, topCollector);
      Collector collector = setCollector;
-
+      if (terminateEarly) {
+        collector = new EarlyTerminatingCollector(collector, cmd.len);
+      }
      if( timeAllowed > 0 ) {
        collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed );
      }
--- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
@ -22,7 +22,9 @@ import java.util.Iterator;
 import java.util.List;

 import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.IndexReader;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.DisMaxParams;
 import org.apache.solr.common.params.GroupParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
@ -33,15 +35,23 @@ import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.handler.component.SearchComponent;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.EarlyTerminatingCollectorException;
+import org.apache.solr.search.SolrIndexSearcher;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 public class SpellCheckCollator {
  private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class);
+  private int maxCollations = 1;
+  private int maxCollationTries = 0;
+  private int maxCollationEvaluations = 10000;
+  private boolean suggestionsMayOverlap = false;
+  private int docCollectionLimit = 0;
+  private boolean reportHits = true;

-  public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse,
-                                           int maxCollations, int maxTries, int maxEvaluations, boolean suggestionsMayOverlap) {
-    List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();
+  public List<SpellCheckCollation> collate(SpellingResult result,
+      String originalQuery, ResponseBuilder ultimateResponse) {
+  List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();

    QueryComponent queryComponent = null;
    if (ultimateResponse.components != null) {
@ -54,6 +64,7 @@ public class SpellCheckCollator {
    }

    boolean verifyCandidateWithQuery = true;
+    int maxTries = maxCollationTries;
    int maxNumberToIterate = maxTries;
    if (maxTries < 1) {
      maxTries = 1;
@ -65,10 +76,17 @@ public class SpellCheckCollator {
      maxTries = 1;
      verifyCandidateWithQuery = false;
    }
+    docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0;
+    int maxDocId = -1;
+    if (verifyCandidateWithQuery && docCollectionLimit > 0) {
+      IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader();
+      maxDocId = reader.maxDoc();
+    }

    int tryNo = 0;
    int collNo = 0;
-    PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxEvaluations, suggestionsMayOverlap);
+    PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), 
+        maxNumberToIterate, maxCollationEvaluations, suggestionsMayOverlap);
    while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {

      PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
@ -96,12 +114,25 @@ public class SpellCheckCollator {
        }
        params.set(CommonParams.Q, collationQueryStr);
        params.remove(CommonParams.START);
+        params.set(CommonParams.ROWS, "" + docCollectionLimit);
+        // we don't want any stored fields
        params.set(CommonParams.FL, "id");
-        params.set(CommonParams.ROWS, "0");
+        // we'll sort by doc id to ensure no scoring is done.
+        params.set(CommonParams.SORT, "_docid_ asc");
+        // If a dismax query, don't add unnecessary clauses for scoring
+        params.remove(DisMaxParams.TIE);
+        params.remove(DisMaxParams.PF);
+        params.remove(DisMaxParams.PF2);
+        params.remove(DisMaxParams.PF3);
+        params.remove(DisMaxParams.BQ);
+        params.remove(DisMaxParams.BF);
+        // Collate testing does not support Grouping (see SOLR-2577)
        params.remove(GroupParams.GROUP);

        // creating a request here... make sure to close it!
-        ResponseBuilder checkResponse = new ResponseBuilder(new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params),new SolrQueryResponse(), Arrays.<SearchComponent>asList(queryComponent));
+        ResponseBuilder checkResponse = new ResponseBuilder(
+            new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params),
+            new SolrQueryResponse(), Arrays.<SearchComponent> asList(queryComponent)); 
        checkResponse.setQparser(ultimateResponse.getQparser());
        checkResponse.setFilters(ultimateResponse.getFilters());
        checkResponse.setQueryString(collationQueryStr);
@ -109,8 +140,23 @@ public class SpellCheckCollator {

        try {
          queryComponent.prepare(checkResponse);
+          if (docCollectionLimit > 0) {
+            int f = checkResponse.getFieldFlags();
+            checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY);
+            if (reportHits) {
+              f = checkResponse.getFieldFlags();
+              checkResponse.setFieldFlags(f |= SolrIndexSearcher.FORCE_INORDER_COLLECTION);
+            }
+          }
          queryComponent.process(checkResponse);
          hits = (Integer) checkResponse.rsp.getToLog().get("hits");
+        } catch (EarlyTerminatingCollectorException etce) {
+          assert (docCollectionLimit > 0);
+          if (etce.getLastDocId() + 1 == maxDocId) {
+            hits = docCollectionLimit;
+          } else {
+            hits = maxDocId / ((etce.getLastDocId() + 1) / docCollectionLimit);
+          }
        } catch (Exception e) {
          LOG.warn("Exception trying to re-query to check if a spell check possibility would return any hits.", e);
        } finally {
@ -191,6 +237,31 @@ public class SpellCheckCollator {
      offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset());      
    }
    return collation.toString();
+  }  
+  public SpellCheckCollator setMaxCollations(int maxCollations) {
+    this.maxCollations = maxCollations;
+    return this;
+  }  
+  public SpellCheckCollator setMaxCollationTries(int maxCollationTries) {
+    this.maxCollationTries = maxCollationTries;
+    return this;
+  }  
+  public SpellCheckCollator setMaxCollationEvaluations(
+      int maxCollationEvaluations) {
+    this.maxCollationEvaluations = maxCollationEvaluations;
+    return this;
+  }  
+  public SpellCheckCollator setSuggestionsMayOverlap(
+      boolean suggestionsMayOverlap) {
+    this.suggestionsMayOverlap = suggestionsMayOverlap;
+    return this;
+  }  
+  public SpellCheckCollator setDocCollectionLimit(int docCollectionLimit) {
+    this.docCollectionLimit = docCollectionLimit;
+    return this;
+  }  
+  public SpellCheckCollator setReportHits(boolean reportHits) {
+    this.reportHits = reportHits;
+    return this;
  }
-
 }
--- a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java
@ -42,21 +42,24 @@ import org.junit.Test;
 public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
  @BeforeClass
  public static void beforeClass() throws Exception {
-     initCore("solrconfig-spellcheckcomponent.xml", "schema.xml");
-    assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love")));
+    initCore("solrconfig-spellcheckcomponent.xml", "schema.xml");
+    assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love", "teststop", "metanoia")));
    assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "faith hope and loaves")));
    assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "fat hops and loaves")));
-    assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer")));
+    assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer", "teststop", "metanoia")));
    assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer")));
    assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace")));
    assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "hyphenated word")));
-     assertNull(h.validateUpdate(adoc("id", "7", "teststop", "Jane filled out a form at Charles De Gaulle")));
-     assertNull(h.validateUpdate(adoc("id", "8", "teststop", "Dick flew from Heathrow")));
-     assertNull(h.validateUpdate(adoc("id", "9", "teststop", "Jane is stuck in customs because Spot chewed up the form")));
-     assertNull(h.validateUpdate(adoc("id", "10", "teststop", "Once in Paris Dick built a fire on the hearth")));
-     assertNull(h.validateUpdate(adoc("id", "11", "teststop", "Dick waited for Jane as he watched the sparks flow upward")));
-     assertNull(h.validateUpdate(adoc("id", "12", "teststop", "This June parisian rendez-vous is ruined because of a customs snafu")));
-     assertNull(h.validateUpdate(adoc("id", "13", "teststop", "partisan political machine")));
+    assertNull(h.validateUpdate(adoc("id", "7", "teststop", "Jane filled out a form at Charles De Gaulle")));
+    assertNull(h.validateUpdate(adoc("id", "8", "teststop", "Dick flew from Heathrow")));
+    assertNull(h.validateUpdate(adoc("id", "9", "teststop", "Jane is stuck in customs because Spot chewed up the form")));
+    assertNull(h.validateUpdate(adoc("id", "10", "teststop", "Once in Paris Dick built a fire on the hearth")));
+    assertNull(h.validateUpdate(adoc("id", "11", "teststop", "Dick waited for Jane as he watched the sparks flow upward")));
+    assertNull(h.validateUpdate(adoc("id", "12", "teststop", "This June parisian rendez-vous is ruined because of a customs snafu")));
+    assertNull(h.validateUpdate(adoc("id", "13", "teststop", "partisan political machine", "teststop", "metanoia")));
+    assertNull(h.validateUpdate(adoc("id", "14", "teststop", "metanoia")));
+    assertNull(h.validateUpdate(adoc("id", "15", "teststop", "metanoia")));
+    assertNull(h.validateUpdate(adoc("id", "16", "teststop", "metanoia")));
    assertNull(h.validateUpdate(commit()));
  }

@ -430,4 +433,71 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
      );
    }
  }
+  @Test
+  public void testEstimatedHitCounts() throws Exception {
+   assertQ(
+        req(
+          SpellCheckComponent.COMPONENT_NAME, "true",
+          SpellCheckComponent.SPELLCHECK_DICT, "direct",
+          SpellingParams.SPELLCHECK_COUNT, "1",   
+          SpellingParams.SPELLCHECK_COLLATE, "true",
+          SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
+          SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
+          SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",          
+          "qt", "spellCheckCompRH",          
+          CommonParams.Q, "teststop:metnoia"
+        ),
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=6"        
+      );
+    assertQ(
+        req(
+          SpellCheckComponent.COMPONENT_NAME, "true",
+          SpellCheckComponent.SPELLCHECK_DICT, "direct",
+          SpellingParams.SPELLCHECK_COUNT, "1",   
+          SpellingParams.SPELLCHECK_COLLATE, "true",
+          SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
+          SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
+          SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
+          SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "1",
+          "qt", "spellCheckCompRH",          
+          CommonParams.Q, "teststop:metnoia"
+        ),
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=17"        
+      );
+    assertQ(
+        req(
+          SpellCheckComponent.COMPONENT_NAME, "true",
+          SpellCheckComponent.SPELLCHECK_DICT, "direct",
+          SpellingParams.SPELLCHECK_COUNT, "1",   
+          SpellingParams.SPELLCHECK_COLLATE, "true",
+          SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
+          SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
+          SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
+          SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "3",
+          "qt", "spellCheckCompRH",          
+          CommonParams.Q, "teststop:metnoia"
+        ),
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=4"        
+      );
+    assertQ(
+        req(
+          SpellCheckComponent.COMPONENT_NAME, "true",
+          SpellCheckComponent.SPELLCHECK_DICT, "direct",
+          SpellingParams.SPELLCHECK_COUNT, "1",   
+          SpellingParams.SPELLCHECK_COLLATE, "true",
+          SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
+          SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
+          SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
+          SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "100",
+          "qt", "spellCheckCompRH",          
+          CommonParams.Q, "teststop:metnoia"
+        ),
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=6"        
+      );
+  }  
+  
 }
--- a/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java
@ -129,13 +129,32 @@ public interface SpellingParams {
   * </p>
   */
  public static final String SPELLCHECK_MAX_COLLATION_EVALUATIONS = SPELLCHECK_PREFIX + "maxCollationEvaluations";
-  
+  /**
+   * <p>
+   * For use with {@link SpellingParams#SPELLCHECK_MAX_COLLATION_TRIES} and 
+   * {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS}.
+   * A performance optimization in cases where the exact number of hits a collation would return is not needed.  
+   * Specify "0" to return the exact # of hits, otherwise give the maximum documents Lucene should collect 
+   * with which to base an estimate.  The higher the value the more likely the estimates will be accurate 
+   * (at expense of performance). 
+   * </p>
+   * 
+   * <p>
+   * The default is 0 (report exact hit-counts) when {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS} is TRUE.
+   * When {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS} is FALSE, this optimization is always performed.
+   * </p>
+   */
+  public static final String SPELLCHECK_COLLATE_MAX_COLLECT_DOCS = SPELLCHECK_PREFIX + "collateMaxCollectDocs";
  /**
   * <p>
   * Whether to use the Extended Results Format for collations. 
   * Includes "before>after" pairs to easily allow clients to generate messages like "no results for PORK.  did you mean POLK?"
   * Also indicates the # of hits each collation will return on re-query.  Default=false, which retains 1.4-compatible output.
   * </p>
+   * <p>
+   * Note: that if {@link SpellingParams#SPELLCHECK_COLLATE_MAX_COLLECT_DOCS} is set to a value greater than 0, 
+   * then the hit counts returned by this will be estimated.
+   * </p>
   */
  public static final String SPELLCHECK_COLLATE_EXTENDED_RESULTS = SPELLCHECK_PREFIX + "collateExtendedResults";