SOLR-1904: use HashDocSet.exists in facet.enum

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@941029 13f79535-47bb-0310-9956-ffa450edef68
2010-05-04 20:39:38 +00:00 · 2010-05-04 20:39:38 +00:00 · 26f4756511
parent 49631091d6
commit 26f4756511
2 changed files with 12 additions and 1 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -167,6 +167,9 @@ Optimizations
  the "threads" local param on the facet.field param.  This algorithm will
  only be faster in the presence of rapid index changes.  (yonik)

+* SOLR-1904: When facet.enum.cache.minDf > 0 and the base doc set is a
+  SortedIntSet, convert to HashDocSet for better performance. (yonik)
+
 Bug Fixes
 ----------------------

--- a/solr/src/java/org/apache/solr/request/SimpleFacets.java
+++ b/solr/src/java/org/apache/solr/request/SimpleFacets.java
@ -493,6 +493,14 @@ public class SimpleFacets {
    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);

+    // make sure we have a set that is fast for random access, if we will use it for that
+    DocSet fastForRandomSet = docs;
+    if (minDfFilterCache>0 && docs instanceof SortedIntDocSet) {
+      SortedIntDocSet sset = (SortedIntDocSet)docs;
+      fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
+    }
+
+
    IndexSchema schema = searcher.getSchema();
    IndexReader r = searcher.getReader();
    FieldType ft = schema.getFieldType(field);
@ -576,7 +584,7 @@ public class SimpleFacets {
              int[] docArr = bulk.docs.ints;  // this might be movable outside the loop, but perhaps not worth the risk.
              int end = bulk.docs.offset + nDocs;
              for (int i=bulk.docs.offset; i<end; i++) {
-                if (docs.exists(docArr[i])) c++;
+                if (fastForRandomSet.exists(docArr[i])) c++;
              }
            }
          }