mirror of https://github.com/apache/lucene.git
LUCENE-1911: Add a new DocIdSet.isCacheable() method that defaults to false, but is true for some DocIdSet impls, that are effective and use no disk I/O during iteration (OpenBitSet, SortedVIntList, DocIdBitSet, EMPTY_DOCIDSET, some FieldCache filter impls). CachingWrapperFilter now copies all DocIdSets, which are not cacheable, into an OpenBitSetDISI for caching.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@816154 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dac29b3f49
commit
8934cbd804
|
@ -434,6 +434,14 @@ API Changes
|
|||
NativeFSLockFactory, we strongly recommend not to mix deprecated
|
||||
and new API. (Uwe Schindler, Mike McCandless)
|
||||
|
||||
* LUCENE-1911: Added a new method isCacheable() to DocIdSet. This method
|
||||
should return true, if the underlying implementation does not use disk
|
||||
I/O and is fast enough to be directly cached by CachingWrapperFilter.
|
||||
OpenBitSet, SortedVIntList, and DocIdBitSet are such candidates.
|
||||
The default implementation of the abstract DocIdSet class returns false.
|
||||
In this case, CachingWrapperFilter copies the DocIdSetIterator into an
|
||||
OpenBitSet for caching. (Uwe Schindler, Thomas Becker)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.DocIdBitSet;
|
||||
import org.apache.lucene.util.OpenBitSetDISI;
|
||||
import java.util.BitSet;
|
||||
import java.util.WeakHashMap;
|
||||
import java.util.Map;
|
||||
|
@ -75,10 +76,20 @@ public class CachingWrapperFilter extends Filter {
|
|||
|
||||
/** Provide the DocIdSet to be cached, using the DocIdSet provided
|
||||
* by the wrapped Filter.
|
||||
* This implementation returns the given DocIdSet.
|
||||
* <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
|
||||
* returns <code>true</code>, else it copies the {@link DocIdSetIterator} into
|
||||
* an {@link OpenBitSetDISI}.
|
||||
*/
|
||||
protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) {
|
||||
return docIdSet;
|
||||
protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException {
|
||||
if (docIdSet.isCacheable()) {
|
||||
return docIdSet;
|
||||
} else {
|
||||
final DocIdSetIterator it = docIdSet.iterator();
|
||||
// null is allowed to be returned by iterator(),
|
||||
// in this case we wrap with the empty set,
|
||||
// which is cacheable.
|
||||
return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc());
|
||||
}
|
||||
}
|
||||
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
|
|
|
@ -37,6 +37,10 @@ public abstract class DocIdSet {
|
|||
public DocIdSetIterator iterator() {
|
||||
return iterator;
|
||||
}
|
||||
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** Provides a {@link DocIdSetIterator} to access the set.
|
||||
|
@ -44,4 +48,15 @@ public abstract class DocIdSet {
|
|||
* <code>{@linkplain #EMPTY_DOCIDSET}.iterator()</code> if there
|
||||
* are no docs that match. */
|
||||
public abstract DocIdSetIterator iterator() throws IOException;
|
||||
|
||||
/**
|
||||
* This method is a hint for {@link CachingWrapperFilter}, if this <code>DocIdSet</code>
|
||||
* should be cached without copying it into a BitSet. The default is to return
|
||||
* <code>false</code>. If you have an own <code>DocIdSet</code> implementation
|
||||
* that does its iteration very effective and fast without doing disk I/O,
|
||||
* override this method and return <code>true</here>.
|
||||
*/
|
||||
public boolean isCacheable() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -476,6 +476,11 @@ public abstract class FieldCacheRangeFilter extends Filter {
|
|||
|
||||
/** this method checks, if a doc is a hit, should throw AIOBE, when position invalid */
|
||||
abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException;
|
||||
|
||||
/** this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs */
|
||||
public boolean isCacheable() {
|
||||
return !(mayUseTermDocs && reader.hasDeletions());
|
||||
}
|
||||
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
// Synchronization needed because deleted docs BitVector
|
||||
|
@ -484,7 +489,7 @@ public abstract class FieldCacheRangeFilter extends Filter {
|
|||
// and the index has deletions
|
||||
final TermDocs termDocs;
|
||||
synchronized(reader) {
|
||||
termDocs = (mayUseTermDocs && reader.hasDeletions()) ? reader.termDocs(null) : null;
|
||||
termDocs = isCacheable() ? null : reader.termDocs(null);
|
||||
}
|
||||
if (termDocs != null) {
|
||||
// a DocIdSetIterator using TermDocs to iterate valid docIds
|
||||
|
|
|
@ -130,6 +130,11 @@ public class FieldCacheTermsFilter extends Filter {
|
|||
return new FieldCacheTermsFilterDocIdSetIterator();
|
||||
}
|
||||
|
||||
/** This DocIdSet implementation is cacheable. */
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected class FieldCacheTermsFilterDocIdSetIterator extends DocIdSetIterator {
|
||||
private int doc = -1;
|
||||
|
||||
|
|
|
@ -49,6 +49,11 @@ public abstract class FilteredDocIdSet extends DocIdSet {
|
|||
_innerSet = innerSet;
|
||||
}
|
||||
|
||||
/** This DocIdSet implementation is cacheable if the inner set is cacheable. */
|
||||
public boolean isCacheable() {
|
||||
return _innerSet.isCacheable();
|
||||
}
|
||||
|
||||
/**
|
||||
* Validation method to determine whether a docid should be in the result set.
|
||||
* @param docid docid to be tested
|
||||
|
|
|
@ -74,6 +74,7 @@ public class QueryWrapperFilter extends Filter {
|
|||
public DocIdSetIterator iterator() throws IOException {
|
||||
return weight.scorer(reader, true, false);
|
||||
}
|
||||
public boolean isCacheable() { return false; }
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,11 @@ public class DocIdBitSet extends DocIdSet {
|
|||
public DocIdSetIterator iterator() {
|
||||
return new DocIdBitSetIterator(bitSet);
|
||||
}
|
||||
|
||||
/** This DocIdSet implementation is cacheable. */
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the underlying BitSet.
|
||||
|
|
|
@ -116,6 +116,11 @@ public class OpenBitSet extends DocIdSet implements Cloneable, Serializable {
|
|||
return new OpenBitSetIterator(bits, wlen);
|
||||
}
|
||||
|
||||
/** This DocIdSet implementation is cacheable. */
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Returns the current capacity in bits (1 greater than the index of the last bit) */
|
||||
public long capacity() { return bits.length << 6; }
|
||||
|
||||
|
|
|
@ -180,6 +180,11 @@ public class SortedVIntList extends DocIdSet {
|
|||
return bytes.length;
|
||||
}
|
||||
|
||||
/** This DocIdSet implementation is cacheable. */
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return An iterator over the sorted integers.
|
||||
*/
|
||||
|
|
|
@ -18,12 +18,18 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.OpenBitSetDISI;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
public class TestCachingWrapperFilter extends LuceneTestCase {
|
||||
public void testCachingWorks() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
@ -50,4 +56,47 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
|
|||
|
||||
reader.close();
|
||||
}
|
||||
|
||||
private static void assertDocIdSetCacheable(IndexReader reader, Filter filter, boolean shouldCacheable) throws IOException {
|
||||
final CachingWrapperFilter cacher = new CachingWrapperFilter(filter);
|
||||
final DocIdSet originalSet = filter.getDocIdSet(reader);
|
||||
final DocIdSet cachedSet = cacher.getDocIdSet(reader);
|
||||
assertTrue(cachedSet.isCacheable());
|
||||
assertEquals(shouldCacheable, originalSet.isCacheable());
|
||||
//System.out.println("Original: "+originalSet.getClass().getName()+" -- cached: "+cachedSet.getClass().getName());
|
||||
if (originalSet.isCacheable()) {
|
||||
assertEquals("Cached DocIdSet must be of same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass());
|
||||
} else {
|
||||
assertTrue("Cached DocIdSet must be an OpenBitSet if the original one was not cacheable", cachedSet instanceof OpenBitSetDISI);
|
||||
}
|
||||
}
|
||||
|
||||
public void testIsCacheAble() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
|
||||
// not cacheable:
|
||||
assertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test","value"))), false);
|
||||
// returns default empty docidset, always cacheable:
|
||||
assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", new Integer(10000), new Integer(-10000), true, true), true);
|
||||
// is cacheable:
|
||||
assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", new Integer(10), new Integer(20), true, true), true);
|
||||
// a openbitset filter is always cacheable
|
||||
assertDocIdSetCacheable(reader, new Filter() {
|
||||
public DocIdSet getDocIdSet(IndexReader reader) {
|
||||
return new OpenBitSet();
|
||||
}
|
||||
}, true);
|
||||
// a deprecated filter is always cacheable
|
||||
assertDocIdSetCacheable(reader, new Filter() {
|
||||
public BitSet bits(IndexReader reader) {
|
||||
return new BitSet();
|
||||
}
|
||||
}, true);
|
||||
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,8 +66,9 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
|
|||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
// test id, bounded on both ends
|
||||
|
||||
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs;
|
||||
FieldCacheRangeFilter fcrf;
|
||||
result = search.search(q,fcrf = FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs;
|
||||
assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,F), numDocs).scoreDocs;
|
||||
|
@ -212,8 +213,9 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
|
|||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
// test id, bounded on both ends
|
||||
|
||||
result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
|
||||
FieldCacheRangeFilter fcrf;
|
||||
result = search.search(q,fcrf=FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
|
||||
assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs;
|
||||
|
@ -303,7 +305,9 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
// test id, bounded on both ends
|
||||
|
||||
result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
|
||||
FieldCacheRangeFilter fcrf;
|
||||
result = search.search(q,fcrf=FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
|
||||
assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs;
|
||||
|
@ -393,7 +397,9 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
// test id, bounded on both ends
|
||||
|
||||
result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
|
||||
FieldCacheRangeFilter fcrf;
|
||||
result = search.search(q,fcrf=FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
|
||||
assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs;
|
||||
|
@ -523,4 +529,49 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
|
|||
assertEquals("infinity special case", 0, result.length);
|
||||
}
|
||||
|
||||
// test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0
|
||||
public void testSparseIndex() throws IOException {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);
|
||||
|
||||
for (int d = -20; d <= 20; d++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("id",Integer.toString(d), Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||
doc.add(new Field("body","body", Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.optimize();
|
||||
writer.deleteDocuments(new Term("id","0"));
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
assertTrue(reader.hasDeletions());
|
||||
|
||||
ScoreDoc[] result;
|
||||
FieldCacheRangeFilter fcrf;
|
||||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) 20),T,T), 100).scoreDocs;
|
||||
assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", 40, result.length);
|
||||
|
||||
result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) 0),new Byte((byte) 20),T,T), 100).scoreDocs;
|
||||
assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", 20, result.length);
|
||||
|
||||
result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) 0),T,T), 100).scoreDocs;
|
||||
assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", 20, result.length);
|
||||
|
||||
result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) 10),new Byte((byte) 20),T,T), 100).scoreDocs;
|
||||
assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", 11, result.length);
|
||||
|
||||
result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) -10),T,T), 100).scoreDocs;
|
||||
assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
|
||||
assertEquals("find all", 11, result.length);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -48,6 +48,8 @@ public class TestQueryWrapperFilter extends LuceneTestCase {
|
|||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
TopDocs hits = searcher.search(new MatchAllDocsQuery(), qwf, 10);
|
||||
assertEquals(1, hits.totalHits);
|
||||
hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10);
|
||||
assertEquals(1, hits.totalHits);
|
||||
|
||||
// should not throw exception with complex primitive query
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
|
@ -58,6 +60,8 @@ public class TestQueryWrapperFilter extends LuceneTestCase {
|
|||
|
||||
hits = searcher.search(new MatchAllDocsQuery(), qwf, 10);
|
||||
assertEquals(1, hits.totalHits);
|
||||
hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10);
|
||||
assertEquals(1, hits.totalHits);
|
||||
|
||||
// should not throw exception with non primitive Query (doesn't implement
|
||||
// Query#createWeight)
|
||||
|
@ -65,6 +69,15 @@ public class TestQueryWrapperFilter extends LuceneTestCase {
|
|||
|
||||
hits = searcher.search(new MatchAllDocsQuery(), qwf, 10);
|
||||
assertEquals(1, hits.totalHits);
|
||||
hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10);
|
||||
assertEquals(1, hits.totalHits);
|
||||
|
||||
// test a query with no hits
|
||||
termQuery = new TermQuery(new Term("field", "not_exist"));
|
||||
qwf = new QueryWrapperFilter(termQuery);
|
||||
hits = searcher.search(new MatchAllDocsQuery(), qwf, 10);
|
||||
assertEquals(0, hits.totalHits);
|
||||
hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10);
|
||||
assertEquals(0, hits.totalHits);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue