mirror of https://github.com/apache/lucene.git
LUCENE-3102: add no-wrap ability to CachingCollector
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1124379 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
731e619a7c
commit
fa5da66404
|
@ -412,7 +412,7 @@ New features
|
|||
bytes in RAM. (Mike McCandless)
|
||||
|
||||
* LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache
|
||||
document IDs and scores encountered during the search, and "reply" them to
|
||||
document IDs and scores encountered during the search, and "replay" them to
|
||||
another Collector. (Mike McCandless, Shai Erera)
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -308,6 +308,48 @@ public abstract class CachingCollector extends Collector {
|
|||
protected int base;
|
||||
protected int lastDocBase;
|
||||
|
||||
/**
|
||||
* Creates a {@link CachingCollector} which does not wrap another collector.
|
||||
* The cached documents and scores can later be {@link #replay(Collector)
|
||||
* replayed}.
|
||||
*
|
||||
* @param acceptDocsOutOfOrder
|
||||
* whether documents are allowed to be collected out-of-order
|
||||
*/
|
||||
public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores, double maxRAMMB) {
|
||||
Collector other = new Collector() {
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return acceptDocsOutOfOrder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {}
|
||||
|
||||
};
|
||||
return create(other, cacheScores, maxRAMMB);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link CachingCollector} that wraps the given collector and
|
||||
* caches documents and scores up to the specified RAM threshold.
|
||||
*
|
||||
* @param other
|
||||
* the Collector to wrap and delegate calls to.
|
||||
* @param cacheScores
|
||||
* whether to cache scores in addition to document IDs. Note that
|
||||
* this increases the RAM consumed per doc
|
||||
* @param maxRAMMB
|
||||
* the maximum RAM in MB to consume for caching the documents and
|
||||
* scores. If the collector exceeds the threshold, no documents and
|
||||
* scores are cached.
|
||||
*/
|
||||
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
|
||||
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
|
||||
}
|
||||
|
|
|
@ -171,5 +171,18 @@ public class TestCachingCollector extends LuceneTestCase {
|
|||
assertFalse(cc.isCached());
|
||||
}
|
||||
}
|
||||
|
||||
public void testNoWrappedCollector() throws Exception {
|
||||
for (boolean cacheScores : new boolean[] { false, true }) {
|
||||
// create w/ null wrapped collector, and test that the methods work
|
||||
CachingCollector cc = CachingCollector.create(true, cacheScores, 50 * ONE_BYTE);
|
||||
cc.setNextReader(null);
|
||||
cc.setScorer(new MockScorer());
|
||||
cc.collect(0);
|
||||
|
||||
assertTrue(cc.isCached());
|
||||
cc.replay(new NoOpCollector(true));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -445,27 +445,54 @@ public class TestGrouping extends LuceneTestCase {
|
|||
final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
|
||||
final CachingCollector cCache;
|
||||
final Collector c;
|
||||
|
||||
final boolean useWrappingCollector = random.nextBoolean();
|
||||
|
||||
if (doCache) {
|
||||
final double maxCacheMB = random.nextDouble();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: maxCacheMB=" + maxCacheMB);
|
||||
}
|
||||
|
||||
if (doAllGroups) {
|
||||
cCache = CachingCollector.create(c1, true, maxCacheMB);
|
||||
c = MultiCollector.wrap(cCache, allGroupsCollector);
|
||||
if (useWrappingCollector) {
|
||||
if (doAllGroups) {
|
||||
cCache = CachingCollector.create(c1, true, maxCacheMB);
|
||||
c = MultiCollector.wrap(cCache, allGroupsCollector);
|
||||
} else {
|
||||
c = cCache = CachingCollector.create(c1, true, maxCacheMB);
|
||||
}
|
||||
} else {
|
||||
c = cCache = CachingCollector.create(c1, true, maxCacheMB);
|
||||
// Collect only into cache, then replay multiple times:
|
||||
c = cCache = CachingCollector.create(false, true, maxCacheMB);
|
||||
}
|
||||
} else if (doAllGroups) {
|
||||
c = MultiCollector.wrap(c1, allGroupsCollector);
|
||||
cCache = null;
|
||||
} else {
|
||||
c = c1;
|
||||
cCache = null;
|
||||
if (doAllGroups) {
|
||||
c = MultiCollector.wrap(c1, allGroupsCollector);
|
||||
} else {
|
||||
c = c1;
|
||||
}
|
||||
}
|
||||
|
||||
s.search(new TermQuery(new Term("content", searchTerm)), c);
|
||||
|
||||
if (doCache && !useWrappingCollector) {
|
||||
if (cCache.isCached()) {
|
||||
// Replay for first-pass grouping
|
||||
cCache.replay(c1);
|
||||
if (doAllGroups) {
|
||||
// Replay for all groups:
|
||||
cCache.replay(allGroupsCollector);
|
||||
}
|
||||
} else {
|
||||
// Replay by re-running search:
|
||||
s.search(new TermQuery(new Term("content", searchTerm)), c1);
|
||||
if (doAllGroups) {
|
||||
s.search(new TermQuery(new Term("content", searchTerm)), allGroupsCollector);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
|
||||
final TopGroups groupsResult;
|
||||
|
||||
|
|
Loading…
Reference in New Issue