LUCENE-1187: ChainedFilter and BooleanFilter now work with new Filter API and DocIdSetIterator-based filters.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@659635 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2008-05-23 19:25:05 +00:00
parent 08a2eb4665
commit 0ebfcc663e
12 changed files with 650 additions and 375 deletions

View File

@ -163,6 +163,10 @@ New features
13. LUCENE-1166: Decomposition tokenfilter for languages like German and Swedish (Thomas Peuss via Grant Ingersoll)
14. LUCENE-1187: ChainedFilter and BooleanFilter now work with new Filter API
and DocIdSetIterator-based filters. Backwards-compatibility with old
BitSet-based filters is ensured. (Paul Elschot via Michael Busch)
Optimizations
1. LUCENE-705: When building a compound file, use

View File

@ -58,7 +58,11 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter;
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.util.SortedVIntList;
/**
* <p>
@ -79,29 +83,13 @@ import java.util.BitSet;
*/
public class ChainedFilter extends Filter
{
/**
* {@link BitSet#or}.
*/
public static final int OR = 0;
/**
* {@link BitSet#and}.
*/
public static final int AND = 1;
/**
* {@link BitSet#andNot}.
*/
public static final int ANDNOT = 2;
/**
* {@link BitSet#xor}.
*/
public static final int XOR = 3;
/**
* Logical operation when none is declared. Defaults to
* {@link BitSet#or}.
* OR.
*/
public static int DEFAULT = OR;
@ -144,96 +132,95 @@ public class ChainedFilter extends Filter
}
/**
* {@link Filter#bits}.
* {@link Filter#getDocIdSet}.
*/
public BitSet bits(IndexReader reader) throws IOException
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
{
int[] index = new int[1]; // use array as reference to modifiable int;
index[0] = 0; // an object attribute would not be thread safe.
if (logic != -1)
return bits(reader, logic);
return getDocIdSet(reader, logic, index);
else if (logicArray != null)
return bits(reader, logicArray);
return getDocIdSet(reader, logicArray, index);
else
return bits(reader, DEFAULT);
return getDocIdSet(reader, DEFAULT, index);
}
/**
* Delegates to each filter in the chain.
* @param reader IndexReader
* @param logic Logical operation
* @return BitSet
*/
private BitSet bits(IndexReader reader, int logic) throws IOException
private DocIdSetIterator getDISI(Filter filter, IndexReader reader)
throws IOException
{
BitSet result;
int i = 0;
return filter.getDocIdSet(reader).iterator();
}
private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index)
throws IOException
{
OpenBitSetDISI result;
/**
* First AND operation takes place against a completely false
* bitset and will always return zero results. Thanks to
* Daniel Armbrust for pointing this out and suggesting workaround.
* bitset and will always return zero results.
*/
if (logic == AND)
{
result = (BitSet) chain[i].bits(reader).clone();
++i;
result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
++index[0];
}
else if (logic == ANDNOT)
{
result = (BitSet) chain[i].bits(reader).clone();
result.flip(0,reader.maxDoc());
++i;
result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
result.flip(0,reader.maxDoc()); // NOTE: may set bits for deleted docs.
++index[0];
}
else
{
result = new BitSet(reader.maxDoc());
}
for (; i < chain.length; i++)
{
doChain(result, reader, logic, chain[i]);
result = new OpenBitSetDISI(reader.maxDoc());
}
return result;
}
/** Provide a SortedVIntList when it is definitely smaller than an OpenBitSet */
protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
return (result.cardinality() < (maxDocs / 9))
? (DocIdSet) new SortedVIntList(result)
: (DocIdSet) result;
}
/**
* Delegates to each filter in the chain.
* @param reader IndexReader
* @param logic Logical operation
* @return DocIdSet
*/
private DocIdSet getDocIdSet(IndexReader reader, int logic, int[] index)
throws IOException
{
OpenBitSetDISI result = initialResult(reader, logic, index);
for (; index[0] < chain.length; index[0]++)
{
doChain(result, logic, chain[index[0]].getDocIdSet(reader));
}
return finalResult(result, reader.maxDoc());
}
/**
* Delegates to each filter in the chain.
* @param reader IndexReader
* @param logic Logical operation
* @return BitSet
* @return DocIdSet
*/
private BitSet bits(IndexReader reader, int[] logic) throws IOException
private DocIdSet getDocIdSet(IndexReader reader, int[] logic, int[] index)
throws IOException
{
if (logic.length != chain.length)
throw new IllegalArgumentException("Invalid number of elements in logic array");
BitSet result;
int i = 0;
/**
* First AND operation takes place against a completely false
* bitset and will always return zero results. Thanks to
* Daniel Armbrust for pointing this out and suggesting workaround.
*/
if (logic[0] == AND)
OpenBitSetDISI result = initialResult(reader, logic[0], index);
for (; index[0] < chain.length; index[0]++)
{
result = (BitSet) chain[i].bits(reader).clone();
++i;
doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(reader));
}
else if (logic[0] == ANDNOT)
{
result = (BitSet) chain[i].bits(reader).clone();
result.flip(0,reader.maxDoc());
++i;
}
else
{
result = new BitSet(reader.maxDoc());
}
for (; i < chain.length; i++)
{
doChain(result, reader, logic[i], chain[i]);
}
return result;
return finalResult(result, reader.maxDoc());
}
public String toString()
@ -249,26 +236,51 @@ public class ChainedFilter extends Filter
return sb.toString();
}
private void doChain(BitSet result, IndexReader reader,
int logic, Filter filter) throws IOException
private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis)
throws IOException
{
if (dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
switch (logic)
{
case OR:
result.or(filter.bits(reader));
result.or((OpenBitSet) dis);
break;
case AND:
result.and(filter.bits(reader));
result.and((OpenBitSet) dis);
break;
case ANDNOT:
result.andNot(filter.bits(reader));
result.andNot((OpenBitSet) dis);
break;
case XOR:
result.xor(filter.bits(reader));
result.xor((OpenBitSet) dis);
break;
default:
doChain(result, reader, DEFAULT, filter);
doChain(result, DEFAULT, dis);
break;
}
} else {
DocIdSetIterator disi = dis.iterator();
switch (logic)
{
case OR:
result.inPlaceOr(disi);
break;
case AND:
result.inPlaceAnd(disi);
break;
case ANDNOT:
result.inPlaceNot(disi);
break;
case XOR:
result.inPlaceXor(disi);
break;
default:
doChain(result, DEFAULT, dis);
break;
}
}
}
}

View File

@ -19,11 +19,17 @@ package org.apache.lucene.misc;
import junit.framework.TestCase;
import java.util.*;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -80,76 +86,149 @@ public class ChainedFilterTest extends TestCase {
new TermQuery(new Term("owner", "sue")));
}
private Filter[] getChainWithOldFilters(Filter[] chain) {
Filter[] oldFilters = new Filter[chain.length];
for (int i = 0; i < chain.length; i++) {
oldFilters[i] = new OldBitSetFilterWrapper(chain[i]);
}
return oldFilters;
}
private ChainedFilter getChainedFilter(Filter[] chain, int[] logic, boolean old) {
if (old) {
chain = getChainWithOldFilters(chain);
}
if (logic == null) {
return new ChainedFilter(chain);
} else {
return new ChainedFilter(chain, logic);
}
}
private ChainedFilter getChainedFilter(Filter[] chain, int logic, boolean old) {
if (old) {
chain = getChainWithOldFilters(chain);
}
return new ChainedFilter(chain, logic);
}
public void testSingleFilter() throws Exception {
ChainedFilter chain = new ChainedFilter(
new Filter[] {dateFilter});
for (int mode = 0; mode < 2; mode++) {
boolean old = (mode==0);
Hits hits = searcher.search(query, chain);
assertEquals(MAX, hits.length());
ChainedFilter chain = getChainedFilter(new Filter[] {dateFilter}, null, old);
chain = new ChainedFilter(new Filter[] {bobFilter});
hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length());
Hits hits = searcher.search(query, chain);
assertEquals(MAX, hits.length());
chain = new ChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.AND});
hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length());
assertEquals("bob", hits.doc(0).get("owner"));
chain = new ChainedFilter(new Filter[] {bobFilter});
hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length());
chain = new ChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.ANDNOT});
hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner"));
chain = getChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.AND}, old);
hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length());
assertEquals("bob", hits.doc(0).get("owner"));
chain = getChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.ANDNOT}, old);
hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner"));
}
}
public void testOR() throws Exception {
ChainedFilter chain = new ChainedFilter(
new Filter[] {sueFilter, bobFilter});
for (int mode = 0; mode < 2; mode++) {
boolean old = (mode==0);
ChainedFilter chain = getChainedFilter(
new Filter[] {sueFilter, bobFilter}, null, old);
Hits hits = searcher.search(query, chain);
assertEquals("OR matches all", MAX, hits.length());
Hits hits = searcher.search(query, chain);
assertEquals("OR matches all", MAX, hits.length());
}
}
public void testAND() throws Exception {
ChainedFilter chain = new ChainedFilter(
new Filter[] {dateFilter, bobFilter}, ChainedFilter.AND);
for (int mode = 0; mode < 2; mode++) {
boolean old = (mode==0);
ChainedFilter chain = getChainedFilter(
new Filter[] {dateFilter, bobFilter}, ChainedFilter.AND, old);
Hits hits = searcher.search(query, chain);
assertEquals("AND matches just bob", MAX / 2, hits.length());
assertEquals("bob", hits.doc(0).get("owner"));
Hits hits = searcher.search(query, chain);
assertEquals("AND matches just bob", MAX / 2, hits.length());
assertEquals("bob", hits.doc(0).get("owner"));
}
}
public void testXOR() throws Exception {
ChainedFilter chain = new ChainedFilter(
new Filter[]{dateFilter, bobFilter}, ChainedFilter.XOR);
for (int mode = 0; mode < 2; mode++) {
boolean old = (mode==0);
ChainedFilter chain = getChainedFilter(
new Filter[]{dateFilter, bobFilter}, ChainedFilter.XOR, old);
Hits hits = searcher.search(query, chain);
assertEquals("XOR matches sue", MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner"));
Hits hits = searcher.search(query, chain);
assertEquals("XOR matches sue", MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner"));
}
}
public void testANDNOT() throws Exception {
ChainedFilter chain = new ChainedFilter(
new Filter[]{dateFilter, sueFilter},
new int[] {ChainedFilter.AND, ChainedFilter.ANDNOT});
for (int mode = 0; mode < 2; mode++) {
boolean old = (mode==0);
ChainedFilter chain = getChainedFilter(
new Filter[]{dateFilter, sueFilter},
new int[] {ChainedFilter.AND, ChainedFilter.ANDNOT}, old);
Hits hits = searcher.search(query, chain);
assertEquals("ANDNOT matches just bob",
MAX / 2, hits.length());
assertEquals("bob", hits.doc(0).get("owner"));
chain = new ChainedFilter(
new Filter[]{bobFilter, bobFilter},
new int[] {ChainedFilter.ANDNOT, ChainedFilter.ANDNOT});
hits = searcher.search(query, chain);
assertEquals("ANDNOT bob ANDNOT bob matches all sues",
Hits hits = searcher.search(query, chain);
assertEquals("ANDNOT matches just bob",
MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner"));
assertEquals("bob", hits.doc(0).get("owner"));
chain = getChainedFilter(
new Filter[]{bobFilter, bobFilter},
new int[] {ChainedFilter.ANDNOT, ChainedFilter.ANDNOT}, old);
hits = searcher.search(query, chain);
assertEquals("ANDNOT bob ANDNOT bob matches all sues",
MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner"));
}
}
private Date parseDate(String s) throws ParseException {
return new SimpleDateFormat("yyyy MMM dd", Locale.US).parse(s);
}
public void testWithCachingFilter() throws Exception {
for (int mode = 0; mode < 2; mode++) {
boolean old = (mode==0);
Directory dir = new RAMDirectory();
Analyzer analyzer = new WhitespaceAnalyzer();
IndexWriter writer = new IndexWriter(dir, analyzer, true, MaxFieldLength.LIMITED);
writer.close();
Searcher searcher = new IndexSearcher(dir);
Query query = new TermQuery(new Term("none", "none"));
QueryWrapperFilter queryFilter = new QueryWrapperFilter(query);
CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter);
searcher.search(query, cachingFilter, 1);
CachingWrapperFilter cachingFilter2 = new CachingWrapperFilter(queryFilter);
Filter[] chain = new Filter[2];
chain[0] = cachingFilter;
chain[1] = cachingFilter2;
ChainedFilter cf = new ChainedFilter(chain);
// throws java.lang.ClassCastException: org.apache.lucene.util.OpenBitSet cannot be cast to java.util.BitSet
searcher.search(new MatchAllDocsQuery(), cf, 1);
}
}
}

View File

@ -23,6 +23,10 @@ import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.DocIdBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.util.SortedVIntList;
/**
* A container Filter that allows Boolean composition of Filters.
@ -37,184 +41,167 @@ import org.apache.lucene.search.BooleanClause.Occur;
public class BooleanFilter extends Filter
{
//ArrayList of SHOULD filters
ArrayList shouldFilters = null;
//ArrayList of NOT filters
ArrayList notFilters = null;
//ArrayList of MUST filters
ArrayList mustFilters = null;
ArrayList shouldFilters = null;
ArrayList notFilters = null;
ArrayList mustFilters = null;
/**
* Returns the a BitSet representing the Boolean composition
* of the filters that have been added.
*/
private DocIdSetIterator getDISI(ArrayList filters, int index, IndexReader reader)
throws IOException
{
return ((Filter)filters.get(index)).getDocIdSet(reader).iterator();
}
public BitSet bits(IndexReader reader) throws IOException
{
//create a new bitSet
BitSet returnBits = null;
/**
* Returns the a DocIdSetIterator representing the Boolean composition
* of the filters that have been added.
*/
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
{
OpenBitSetDISI res = null;
//SHOULD filters
if (shouldFilters!=null)
{
returnBits = ((Filter)shouldFilters.get(0)).bits(reader);
// avoid changing the original bitset - it may be cached
returnBits=(BitSet) returnBits.clone();
if (shouldFilters.size() > 1)
{
for (int i = 1; i < shouldFilters.size(); i++)
{
returnBits.or(((Filter)shouldFilters.get(i)).bits(reader));
}
}
}
if (shouldFilters != null) {
for (int i = 0; i < shouldFilters.size(); i++) {
if (res == null) {
res = new OpenBitSetDISI(getDISI(shouldFilters, i, reader), reader.maxDoc());
} else {
DocIdSet dis = ((Filter)shouldFilters.get(i)).getDocIdSet(reader);
if(dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.or((OpenBitSet) dis);
} else {
res.inPlaceOr(getDISI(shouldFilters, i, reader));
}
}
}
}
//NOT filters
if (notFilters!=null)
{
for (int i = 0; i < notFilters.size(); i++)
{
BitSet notBits=((Filter)notFilters.get(i)).bits(reader);
if(returnBits==null)
{
returnBits=(BitSet) notBits.clone();
returnBits.flip(0,reader.maxDoc());
}
else
{
returnBits.andNot(notBits);
}
}
}
if (notFilters!=null) {
for (int i = 0; i < notFilters.size(); i++) {
if (res == null) {
res = new OpenBitSetDISI(getDISI(notFilters, i, reader), reader.maxDoc());
res.flip(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
} else {
DocIdSet dis = ((Filter)notFilters.get(i)).getDocIdSet(reader);
if(dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.andNot((OpenBitSet) dis);
} else {
res.inPlaceNot(getDISI(notFilters, i, reader));
}
}
}
}
//MUST filters
if (mustFilters!=null)
{
for (int i = 0; i < mustFilters.size(); i++)
{
BitSet mustBits=((Filter)mustFilters.get(i)).bits(reader);
if(returnBits==null)
{
if(mustFilters.size()==1)
{
returnBits=mustBits;
if (mustFilters!=null) {
for (int i = 0; i < mustFilters.size(); i++) {
if (res == null) {
res = new OpenBitSetDISI(getDISI(mustFilters, i, reader), reader.maxDoc());
} else {
DocIdSet dis = ((Filter)mustFilters.get(i)).getDocIdSet(reader);
if(dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.and((OpenBitSet) dis);
} else {
res.inPlaceAnd(getDISI(mustFilters, i, reader));
}
}
}
}
}
else
{
//don't mangle the bitset
returnBits=(BitSet) mustBits.clone();
}
}
else
{
returnBits.and(mustBits);
}
}
}
if(returnBits==null)
{
returnBits=new BitSet(reader.maxDoc());
}
return returnBits;
}
if (res !=null)
return finalResult(res, reader.maxDoc());
/**
* Adds a new FilterClause to the Boolean Filter container
* @param filterClause A FilterClause object containing a Filter and an Occur parameter
*/
if (emptyDocIdSet == null)
emptyDocIdSet = new OpenBitSetDISI(1);
public void add(FilterClause filterClause)
{
if (filterClause.getOccur().equals(Occur.MUST))
{
if(mustFilters==null)
{
mustFilters=new ArrayList();
}
mustFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.SHOULD))
{
if(shouldFilters==null)
{
shouldFilters=new ArrayList();
}
shouldFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.MUST_NOT))
{
if(notFilters==null)
{
notFilters=new ArrayList();
}
notFilters.add(filterClause.getFilter());
}
}
return emptyDocIdSet;
}
public boolean equals(Object obj)
{
if(this == obj)
return true;
if((obj == null) || (obj.getClass() != this.getClass()))
return false;
BooleanFilter test = (BooleanFilter)obj;
return (notFilters == test.notFilters||
(notFilters!= null && notFilters.equals(test.notFilters)))
&&
(mustFilters == test.mustFilters||
(mustFilters!= null && mustFilters.equals(test.mustFilters)))
&&
(shouldFilters == test.shouldFilters||
(shouldFilters!= null && shouldFilters.equals(test.shouldFilters)));
}
/** Provide a SortedVIntList when it is definitely smaller than an OpenBitSet */
protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
return (result.cardinality() < (maxDocs / 9))
? (DocIdSet) new SortedVIntList(result)
: (DocIdSet) result;
}
public int hashCode()
{
int hash=7;
hash = 31 * hash + (null == mustFilters ? 0 : mustFilters.hashCode());
hash = 31 * hash + (null == notFilters ? 0 : notFilters.hashCode());
hash = 31 * hash + (null == shouldFilters ? 0 : shouldFilters.hashCode());
return hash;
}
private static DocIdSet emptyDocIdSet = null;
/**
* Adds a new FilterClause to the Boolean Filter container
* @param filterClause A FilterClause object containing a Filter and an Occur parameter
*/
/** Prints a user-readable version of this query. */
public String toString()
{
StringBuffer buffer = new StringBuffer();
public void add(FilterClause filterClause)
{
if (filterClause.getOccur().equals(Occur.MUST)) {
if (mustFilters==null) {
mustFilters=new ArrayList();
}
mustFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.SHOULD)) {
if (shouldFilters==null) {
shouldFilters=new ArrayList();
}
shouldFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.MUST_NOT)) {
if (notFilters==null) {
notFilters=new ArrayList();
}
notFilters.add(filterClause.getFilter());
}
}
buffer.append("BooleanFilter(");
private boolean equalFilters(ArrayList filters1, ArrayList filters2)
{
return (filters1 == filters2) ||
((filters1 != null) && filters1.equals(filters2));
}
appendFilters(shouldFilters, null, buffer);
appendFilters(mustFilters, "+", buffer);
appendFilters(notFilters, "-", buffer);
public boolean equals(Object obj)
{
if (this == obj)
return true;
buffer.append(")");
if ((obj == null) || (obj.getClass() != this.getClass()))
return false;
return buffer.toString();
}
BooleanFilter other = (BooleanFilter)obj;
return equalFilters(notFilters, other.notFilters)
&& equalFilters(mustFilters, other.mustFilters)
&& equalFilters(shouldFilters, other.shouldFilters);
}
private void appendFilters(ArrayList filters, String occurString,
StringBuffer buffer)
{
if (filters == null)
return;
public int hashCode()
{
int hash=7;
hash = 31 * hash + (null == mustFilters ? 0 : mustFilters.hashCode());
hash = 31 * hash + (null == notFilters ? 0 : notFilters.hashCode());
hash = 31 * hash + (null == shouldFilters ? 0 : shouldFilters.hashCode());
return hash;
}
for (int i = 0; i < filters.size(); i++)
{
Filter filter = (Filter) filters.get(i);
if (occurString != null)
{
buffer.append(occurString);
}
/** Prints a user-readable version of this query. */
public String toString()
{
StringBuffer buffer = new StringBuffer();
buffer.append("BooleanFilter(");
appendFilters(shouldFilters, "", buffer);
appendFilters(mustFilters, "+", buffer);
appendFilters(notFilters, "-", buffer);
buffer.append(")");
return buffer.toString();
}
buffer.append(filter);
if (i < filters.size() - 1)
{
buffer.append(' ');
}
}
}
private void appendFilters(ArrayList filters, String occurString, StringBuffer buffer)
{
if (filters != null) {
for (int i = 0; i < filters.size(); i++) {
buffer.append(' ');
buffer.append(occurString);
buffer.append(filters.get(i).toString());
}
}
}
}

View File

@ -22,6 +22,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;
public class DuplicateFilter extends Filter
{
@ -66,7 +67,7 @@ public class DuplicateFilter extends Filter
this.processingMode = processingMode;
}
public BitSet bits(IndexReader reader) throws IOException
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
{
if(processingMode==PM_FAST_INVALIDATION)
{
@ -78,10 +79,10 @@ public class DuplicateFilter extends Filter
}
}
private BitSet correctBits(IndexReader reader) throws IOException
private OpenBitSet correctBits(IndexReader reader) throws IOException
{
BitSet bits=new BitSet(reader.maxDoc()); //assume all are INvalid
OpenBitSet bits=new OpenBitSet(reader.maxDoc()); //assume all are INvalid
Term startTerm=new Term(fieldName,"");
TermEnum te = reader.terms(startTerm);
if(te!=null)
@ -117,10 +118,10 @@ public class DuplicateFilter extends Filter
return bits;
}
private BitSet fastBits(IndexReader reader) throws IOException
private OpenBitSet fastBits(IndexReader reader) throws IOException
{
BitSet bits=new BitSet(reader.maxDoc());
OpenBitSet bits=new OpenBitSet(reader.maxDoc());
bits.set(0,reader.maxDoc()); //assume all are valid
Term startTerm=new Term(fieldName,"");
TermEnum te = reader.terms(startTerm);
@ -143,7 +144,7 @@ public class DuplicateFilter extends Filter
do
{
lastDoc=td.doc();
bits.set(lastDoc,false);
bits.clear(lastDoc);
}while(td.next());
if(keepMode==KM_USE_LAST_OCCURRENCE)
{

View File

@ -26,6 +26,7 @@ import java.util.TreeSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.util.OpenBitSet;
/**
* Constructs a filter for docs matching any of the terms added to this class.
@ -50,11 +51,11 @@ public class TermsFilter extends Filter
}
/* (non-Javadoc)
* @see org.apache.lucene.search.Filter#bits(org.apache.lucene.index.IndexReader)
* @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader)
*/
public BitSet bits(IndexReader reader) throws IOException
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
{
BitSet result=new BitSet(reader.maxDoc());
OpenBitSet result=new OpenBitSet(reader.maxDoc());
TermDocs td = reader.termDocs();
try
{

View File

@ -32,6 +32,7 @@ import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.DocIdBitSet;
import junit.framework.TestCase;
@ -66,100 +67,141 @@ public class BooleanFilterTest extends TestCase
writer.addDocument(doc);
}
private Filter getRangeFilter(String field,String lowerPrice, String upperPrice)
private Filter getRangeFilter(String field,String lowerPrice, String upperPrice, boolean old)
{
return new RangeFilter(field,lowerPrice,upperPrice,true,true);
Filter f = new RangeFilter(field,lowerPrice,upperPrice,true,true);
if (old) {
return new OldBitSetFilterWrapper(f);
}
return f;
}
private TermsFilter getTermsFilter(String field,String text)
private Filter getTermsFilter(String field,String text, boolean old)
{
TermsFilter tf=new TermsFilter();
tf.addTerm(new Term(field,text));
if (old) {
return new OldBitSetFilterWrapper(tf);
}
return tf;
}
private void tstFilterCard(String mes, int expected, Filter filt)
throws Throwable
{
DocIdSetIterator disi = filt.getDocIdSet(reader).iterator();
int actual = 0;
while (disi.next()) {
actual++;
}
assertEquals(mes, expected, actual);
}
public void testShould() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("price","030"),BooleanClause.Occur.SHOULD));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Should retrieves only 1 doc",1,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("price","030", old),BooleanClause.Occur.SHOULD));
tstFilterCard("Should retrieves only 1 doc",1,booleanFilter);
}
}
public void testShoulds() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds are Ored together",5,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
tstFilterCard("Shoulds are Ored together",5,booleanFilter);
}
}
public void testShouldsAndMustNot() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but AndNot",4,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "Maybe"),BooleanClause.Occur.MUST_NOT));
bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but AndNots",3,bits.cardinality());
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("Shoulds Ored but AndNot",4,booleanFilter);
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "Maybe", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("Shoulds Ored but AndNots",3,booleanFilter);
}
}
public void testShouldsAndMust() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but MUST",3,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
tstFilterCard("Shoulds Ored but MUST",3,booleanFilter);
}
}
public void testShouldsAndMusts() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getRangeFilter("date","20040101", "20041231"),BooleanClause.Occur.MUST));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but MUSTs ANDED",1,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getRangeFilter("date","20040101", "20041231", old),BooleanClause.Occur.MUST));
tstFilterCard("Shoulds Ored but MUSTs ANDED",1,booleanFilter);
}
}
public void testShouldsAndMustsAndMustNot() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","030", "040"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getRangeFilter("date","20050101", "20051231"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but MUSTs ANDED and MustNot",0,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","030", "040", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getRangeFilter("date","20050101", "20051231", old),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("Shoulds Ored but MUSTs ANDED and MustNot",0,booleanFilter);
}
}
public void testJustMust() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
BitSet bits = booleanFilter.bits(reader);
assertEquals("MUST",3,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
tstFilterCard("MUST",3,booleanFilter);
}
}
public void testJustMustNot() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("MUST_NOT",4,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("MUST_NOT",4,booleanFilter);
}
}
public void testMustAndMustNot() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getTermsFilter("price","030"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("MUST_NOT wins over MUST for same docs",0,bits.cardinality());
for (int i = 0; i < 2; i++) {
boolean old = (i==0);
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getTermsFilter("price","030", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("MUST_NOT wins over MUST for same docs",0,booleanFilter);
}
}
}

View File

@ -150,8 +150,8 @@ public class ParallelMultiSearcher extends MultiSearcher {
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
* scoring document.
* <p>{@link HitCollector#collect(int,float)} is called for every matching
* document.
*
* <p>Applications should only use this if they need <i>all</i> of the
* matching documents. The high-level search API ({@link

View File

@ -33,7 +33,7 @@ import java.io.IOException; // for javadoc
*
* <p>Queries, filters and sort criteria are designed to be compact so that
* they may be efficiently passed to a remote index, with only the top-scoring
* hits being returned, rather than every non-zero scoring hit.
* hits being returned, rather than every matching hit.
*/
public interface Searchable extends java.rmi.Remote {
/** Lower-level search API.

View File

@ -88,8 +88,8 @@ public abstract class Searcher implements Searchable {
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
* scoring document.
* <p>{@link HitCollector#collect(int,float)} is called for every matching
* document.
*
* <p>Applications should only use this if they need <i>all</i> of the
* matching documents. The high-level search API ({@link
@ -107,8 +107,8 @@ public abstract class Searcher implements Searchable {
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
* scoring document.
* <p>{@link HitCollector#collect(int,float)} is called for every matching
* document.
* <br>HitCollector-based access to remote indexes is discouraged.
*
* <p>Applications should only use this if they need <i>all</i> of the

View File

@ -0,0 +1,101 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
public class OpenBitSetDISI extends OpenBitSet {
/** Construct an OpenBitSetDISI with its bits set
* from the doc ids of the given DocIdSetIterator.
* Also give a maximum size one larger than the largest doc id for which a
* bit may ever be set on this OpenBitSetDISI.
*/
public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException {
super(maxSize);
inPlaceOr(disi);
}
/** Construct an OpenBitSetDISI with no bits set, and a given maximum size
* one larger than the largest doc id for which a bit may ever be set
* on this OpenBitSetDISI.
*/
public OpenBitSetDISI(int maxSize) {
super(maxSize);
}
/**
* Perform an inplace OR with the doc ids from a given DocIdSetIterator,
* setting the bit for each such doc id.
* These doc ids should be smaller than the maximum size passed to the
* constructor.
*/
public void inPlaceOr(DocIdSetIterator disi) throws IOException {
while (disi.next() && (disi.doc() < size())) {
fastSet(disi.doc());
}
}
/**
* Perform an inplace AND with the doc ids from a given DocIdSetIterator,
* leaving only the bits set for which the doc ids are in common.
* These doc ids should be smaller than the maximum size passed to the
* constructor.
*/
public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
int index = nextSetBit(0);
int lastNotCleared = -1;
while ((index != -1) && disi.skipTo(index)) {
while ((index != -1) && (index < disi.doc())) {
fastClear(index);
index = nextSetBit(index + 1);
}
if (index == disi.doc()) {
lastNotCleared = index;
index++;
}
assert (index == -1) || (index > disi.doc());
}
clear(lastNotCleared+1, size());
}
/**
* Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
* clearing all the bits for each such doc id.
* These doc ids should be smaller than the maximum size passed to the
* constructor.
*/
public void inPlaceNot(DocIdSetIterator disi) throws IOException {
while (disi.next() && (disi.doc() < size())) {
fastClear(disi.doc());
}
}
/**
* Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
* flipping all the bits for each such doc id.
* These doc ids should be smaller than the maximum size passed to the
* constructor.
*/
public void inPlaceXor(DocIdSetIterator disi) throws IOException {
while (disi.next() && (disi.doc() < size())) {
fastFlip(disi.doc());
}
}
}

View File

@ -0,0 +1,48 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
/**
* Helper class used for testing compatibility with old BitSet-based filters.
* Does not override {@link Filter#getDocIdSet(IndexReader)} and thus ensures
* that {@link #bits(IndexReader)} is called.
*
* @deprecated This class will be removed together with the
* {@link Filter#bits(IndexReader)} method in Lucene 3.0.
*/
public class OldBitSetFilterWrapper extends Filter {
private Filter filter;
public OldBitSetFilterWrapper(Filter filter) {
this.filter = filter;
}
public BitSet bits(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc());
DocIdSetIterator it = filter.getDocIdSet(reader).iterator();
while(it.next()) {
bits.set(it.doc());
}
return bits;
}
}