LUCENE-3102: add no-wrap ability to CachingCollector

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1124379 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2011-05-18 18:51:39 +00:00
parent 731e619a7c
commit fa5da66404
4 changed files with 91 additions and 9 deletions

View File

@ -412,7 +412,7 @@ New features
bytes in RAM. (Mike McCandless)
* LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache
document IDs and scores encountered during the search, and "reply" them to
document IDs and scores encountered during the search, and "replay" them to
another Collector. (Mike McCandless, Shai Erera)
Optimizations

View File

@ -308,6 +308,48 @@ public abstract class CachingCollector extends Collector {
protected int base;
protected int lastDocBase;
/**
* Creates a {@link CachingCollector} which does not wrap another collector.
* The cached documents and scores can later be {@link #replay(Collector)
* replayed}.
*
* @param acceptDocsOutOfOrder
* whether documents are allowed to be collected out-of-order
*/
public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores, double maxRAMMB) {
Collector other = new Collector() {
@Override
public boolean acceptsDocsOutOfOrder() {
return acceptDocsOutOfOrder;
}
@Override
public void setScorer(Scorer scorer) throws IOException {}
@Override
public void collect(int doc) throws IOException {}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {}
};
return create(other, cacheScores, maxRAMMB);
}
/**
* Create a new {@link CachingCollector} that wraps the given collector and
* caches documents and scores up to the specified RAM threshold.
*
* @param other
* the Collector to wrap and delegate calls to.
* @param cacheScores
* whether to cache scores in addition to document IDs. Note that
* this increases the RAM consumed per doc
* @param maxRAMMB
* the maximum RAM in MB to consume for caching the documents and
* scores. If the collector exceeds the threshold, no documents and
* scores are cached.
*/
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
}

View File

@ -172,4 +172,17 @@ public class TestCachingCollector extends LuceneTestCase {
}
}
public void testNoWrappedCollector() throws Exception {
for (boolean cacheScores : new boolean[] { false, true }) {
// create w/ null wrapped collector, and test that the methods work
CachingCollector cc = CachingCollector.create(true, cacheScores, 50 * ONE_BYTE);
cc.setNextReader(null);
cc.setScorer(new MockScorer());
cc.collect(0);
assertTrue(cc.isCached());
cc.replay(new NoOpCollector(true));
}
}
}

View File

@ -445,27 +445,54 @@ public class TestGrouping extends LuceneTestCase {
final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
final boolean useWrappingCollector = random.nextBoolean();
if (doCache) {
final double maxCacheMB = random.nextDouble();
if (VERBOSE) {
System.out.println("TEST: maxCacheMB=" + maxCacheMB);
}
if (doAllGroups) {
cCache = CachingCollector.create(c1, true, maxCacheMB);
c = MultiCollector.wrap(cCache, allGroupsCollector);
if (useWrappingCollector) {
if (doAllGroups) {
cCache = CachingCollector.create(c1, true, maxCacheMB);
c = MultiCollector.wrap(cCache, allGroupsCollector);
} else {
c = cCache = CachingCollector.create(c1, true, maxCacheMB);
}
} else {
c = cCache = CachingCollector.create(c1, true, maxCacheMB);
// Collect only into cache, then replay multiple times:
c = cCache = CachingCollector.create(false, true, maxCacheMB);
}
} else if (doAllGroups) {
c = MultiCollector.wrap(c1, allGroupsCollector);
cCache = null;
} else {
c = c1;
cCache = null;
if (doAllGroups) {
c = MultiCollector.wrap(c1, allGroupsCollector);
} else {
c = c1;
}
}
s.search(new TermQuery(new Term("content", searchTerm)), c);
if (doCache && !useWrappingCollector) {
if (cCache.isCached()) {
// Replay for first-pass grouping
cCache.replay(c1);
if (doAllGroups) {
// Replay for all groups:
cCache.replay(allGroupsCollector);
}
} else {
// Replay by re-running search:
s.search(new TermQuery(new Term("content", searchTerm)), c1);
if (doAllGroups) {
s.search(new TermQuery(new Term("content", searchTerm)), allGroupsCollector);
}
}
}
final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult;