LUCENE-3102: add no-wrap ability to CachingCollector

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1124379 13f79535-47bb-0310-9956-ffa450edef68
2011-05-18 18:51:39 +00:00 · 2011-05-18 18:51:39 +00:00 · fa5da66404
parent 731e619a7c
commit fa5da66404
4 changed files with 91 additions and 9 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -412,7 +412,7 @@ New features
  bytes in RAM. (Mike McCandless)

 * LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache 
-  document IDs and scores encountered during the search, and "reply" them to 
+  document IDs and scores encountered during the search, and "replay" them to 
  another Collector. (Mike McCandless, Shai Erera)

 Optimizations
--- a/lucene/src/java/org/apache/lucene/search/CachingCollector.java
+++ b/lucene/src/java/org/apache/lucene/search/CachingCollector.java
@ -308,6 +308,48 @@ public abstract class CachingCollector extends Collector {
  protected int base;
  protected int lastDocBase;
  
+  /**
+   * Creates a {@link CachingCollector} which does not wrap another collector.
+   * The cached documents and scores can later be {@link #replay(Collector)
+   * replayed}.
+   * 
+   * @param acceptDocsOutOfOrder
+   *          whether documents are allowed to be collected out-of-order
+   */
+  public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores, double maxRAMMB) {
+    Collector other = new Collector() {
+      @Override
+      public boolean acceptsDocsOutOfOrder() {
+        return acceptDocsOutOfOrder;
+      }
+      
+      @Override
+      public void setScorer(Scorer scorer) throws IOException {}
+
+      @Override
+      public void collect(int doc) throws IOException {}
+
+      @Override
+      public void setNextReader(AtomicReaderContext context) throws IOException {}
+
+    };
+    return create(other, cacheScores, maxRAMMB);
+  }
+
+  /**
+   * Create a new {@link CachingCollector} that wraps the given collector and
+   * caches documents and scores up to the specified RAM threshold.
+   * 
+   * @param other
+   *          the Collector to wrap and delegate calls to.
+   * @param cacheScores
+   *          whether to cache scores in addition to document IDs. Note that
+   *          this increases the RAM consumed per doc
+   * @param maxRAMMB
+   *          the maximum RAM in MB to consume for caching the documents and
+   *          scores. If the collector exceeds the threshold, no documents and
+   *          scores are cached.
+   */
  public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
    return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
    }
--- a/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
+++ b/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
@ -171,5 +171,18 @@ public class TestCachingCollector extends LuceneTestCase {
      assertFalse(cc.isCached());
    }
  }
+
+  public void testNoWrappedCollector() throws Exception {
+    for (boolean cacheScores : new boolean[] { false, true }) {
+      // create w/ null wrapped collector, and test that the methods work
+      CachingCollector cc = CachingCollector.create(true, cacheScores, 50 * ONE_BYTE);
+      cc.setNextReader(null);
+      cc.setScorer(new MockScorer());
+      cc.collect(0);
+      
+      assertTrue(cc.isCached());
+      cc.replay(new NoOpCollector(true));
+    }
+  }
  
 }
--- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
+++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
@ -445,27 +445,54 @@ public class TestGrouping extends LuceneTestCase {
        final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
        final CachingCollector cCache;
        final Collector c;
+        
+        final boolean useWrappingCollector = random.nextBoolean();
+        
        if (doCache) {
          final double maxCacheMB = random.nextDouble();
          if (VERBOSE) {
            System.out.println("TEST: maxCacheMB=" + maxCacheMB);
          }

-          if (doAllGroups) {
-            cCache = CachingCollector.create(c1, true, maxCacheMB);
-            c = MultiCollector.wrap(cCache, allGroupsCollector);
+          if (useWrappingCollector) {
+            if (doAllGroups) {
+              cCache = CachingCollector.create(c1, true, maxCacheMB);              
+              c = MultiCollector.wrap(cCache, allGroupsCollector);
+            } else {
+              c = cCache = CachingCollector.create(c1, true, maxCacheMB);              
+            }
          } else {
-            c = cCache = CachingCollector.create(c1, true, maxCacheMB);
+            // Collect only into cache, then replay multiple times:
+            c = cCache = CachingCollector.create(false, true, maxCacheMB);
          }
-        } else if (doAllGroups) {
-          c = MultiCollector.wrap(c1, allGroupsCollector);
-          cCache = null;
        } else {
-          c = c1;
          cCache = null;
+          if (doAllGroups) {
+            c = MultiCollector.wrap(c1, allGroupsCollector);
+          } else {
+            c = c1;
+          }
        }
+        
        s.search(new TermQuery(new Term("content", searchTerm)), c);

+        if (doCache && !useWrappingCollector) {
+          if (cCache.isCached()) {
+            // Replay for first-pass grouping
+            cCache.replay(c1);
+            if (doAllGroups) {
+              // Replay for all groups:
+              cCache.replay(allGroupsCollector);
+            }
+          } else {
+            // Replay by re-running search:
+            s.search(new TermQuery(new Term("content", searchTerm)), c1);
+            if (doAllGroups) {
+              s.search(new TermQuery(new Term("content", searchTerm)), allGroupsCollector);
+            }
+          }
+        }
+
        final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
        final TopGroups groupsResult;