LUCENE-1187: ChainedFilter and BooleanFilter now work with new Filter API and DocIdSetIterator-based filters.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@659635 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2008-05-23 19:25:05 +00:00
parent 08a2eb4665
commit 0ebfcc663e
12 changed files with 650 additions and 375 deletions

View File

@ -163,6 +163,10 @@ New features
13. LUCENE-1166: Decomposition tokenfilter for languages like German and Swedish (Thomas Peuss via Grant Ingersoll) 13. LUCENE-1166: Decomposition tokenfilter for languages like German and Swedish (Thomas Peuss via Grant Ingersoll)
14. LUCENE-1187: ChainedFilter and BooleanFilter now work with new Filter API
and DocIdSetIterator-based filters. Backwards-compatibility with old
BitSet-based filters is ensured. (Paul Elschot via Michael Busch)
Optimizations Optimizations
1. LUCENE-705: When building a compound file, use 1. LUCENE-705: When building a compound file, use

View File

@ -58,7 +58,11 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import java.io.IOException; import java.io.IOException;
import java.util.BitSet; import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.util.SortedVIntList;
/** /**
* <p> * <p>
@ -79,29 +83,13 @@ import java.util.BitSet;
*/ */
public class ChainedFilter extends Filter public class ChainedFilter extends Filter
{ {
/**
* {@link BitSet#or}.
*/
public static final int OR = 0; public static final int OR = 0;
/**
* {@link BitSet#and}.
*/
public static final int AND = 1; public static final int AND = 1;
/**
* {@link BitSet#andNot}.
*/
public static final int ANDNOT = 2; public static final int ANDNOT = 2;
/**
* {@link BitSet#xor}.
*/
public static final int XOR = 3; public static final int XOR = 3;
/** /**
* Logical operation when none is declared. Defaults to * Logical operation when none is declared. Defaults to
* {@link BitSet#or}. * OR.
*/ */
public static int DEFAULT = OR; public static int DEFAULT = OR;
@ -144,96 +132,95 @@ public class ChainedFilter extends Filter
} }
/** /**
* {@link Filter#bits}. * {@link Filter#getDocIdSet}.
*/ */
public BitSet bits(IndexReader reader) throws IOException public DocIdSet getDocIdSet(IndexReader reader) throws IOException
{ {
int[] index = new int[1]; // use array as reference to modifiable int;
index[0] = 0; // an object attribute would not be thread safe.
if (logic != -1) if (logic != -1)
return bits(reader, logic); return getDocIdSet(reader, logic, index);
else if (logicArray != null) else if (logicArray != null)
return bits(reader, logicArray); return getDocIdSet(reader, logicArray, index);
else else
return bits(reader, DEFAULT); return getDocIdSet(reader, DEFAULT, index);
} }
/** private DocIdSetIterator getDISI(Filter filter, IndexReader reader)
* Delegates to each filter in the chain. throws IOException
* @param reader IndexReader
* @param logic Logical operation
* @return BitSet
*/
private BitSet bits(IndexReader reader, int logic) throws IOException
{ {
BitSet result; return filter.getDocIdSet(reader).iterator();
int i = 0; }
private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index)
throws IOException
{
OpenBitSetDISI result;
/** /**
* First AND operation takes place against a completely false * First AND operation takes place against a completely false
* bitset and will always return zero results. Thanks to * bitset and will always return zero results.
* Daniel Armbrust for pointing this out and suggesting workaround.
*/ */
if (logic == AND) if (logic == AND)
{ {
result = (BitSet) chain[i].bits(reader).clone(); result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
++i; ++index[0];
} }
else if (logic == ANDNOT) else if (logic == ANDNOT)
{ {
result = (BitSet) chain[i].bits(reader).clone(); result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
result.flip(0,reader.maxDoc()); result.flip(0,reader.maxDoc()); // NOTE: may set bits for deleted docs.
++i; ++index[0];
} }
else else
{ {
result = new BitSet(reader.maxDoc()); result = new OpenBitSetDISI(reader.maxDoc());
}
for (; i < chain.length; i++)
{
doChain(result, reader, logic, chain[i]);
} }
return result; return result;
} }
/** Provide a SortedVIntList when it is definitely smaller than an OpenBitSet */
protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
return (result.cardinality() < (maxDocs / 9))
? (DocIdSet) new SortedVIntList(result)
: (DocIdSet) result;
}
/**
* Delegates to each filter in the chain.
* @param reader IndexReader
* @param logic Logical operation
* @return DocIdSet
*/
private DocIdSet getDocIdSet(IndexReader reader, int logic, int[] index)
throws IOException
{
OpenBitSetDISI result = initialResult(reader, logic, index);
for (; index[0] < chain.length; index[0]++)
{
doChain(result, logic, chain[index[0]].getDocIdSet(reader));
}
return finalResult(result, reader.maxDoc());
}
/** /**
* Delegates to each filter in the chain. * Delegates to each filter in the chain.
* @param reader IndexReader * @param reader IndexReader
* @param logic Logical operation * @param logic Logical operation
* @return BitSet * @return DocIdSet
*/ */
private BitSet bits(IndexReader reader, int[] logic) throws IOException private DocIdSet getDocIdSet(IndexReader reader, int[] logic, int[] index)
throws IOException
{ {
if (logic.length != chain.length) if (logic.length != chain.length)
throw new IllegalArgumentException("Invalid number of elements in logic array"); throw new IllegalArgumentException("Invalid number of elements in logic array");
BitSet result;
int i = 0;
/** OpenBitSetDISI result = initialResult(reader, logic[0], index);
* First AND operation takes place against a completely false for (; index[0] < chain.length; index[0]++)
* bitset and will always return zero results. Thanks to
* Daniel Armbrust for pointing this out and suggesting workaround.
*/
if (logic[0] == AND)
{ {
result = (BitSet) chain[i].bits(reader).clone(); doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(reader));
++i;
} }
else if (logic[0] == ANDNOT) return finalResult(result, reader.maxDoc());
{
result = (BitSet) chain[i].bits(reader).clone();
result.flip(0,reader.maxDoc());
++i;
}
else
{
result = new BitSet(reader.maxDoc());
}
for (; i < chain.length; i++)
{
doChain(result, reader, logic[i], chain[i]);
}
return result;
} }
public String toString() public String toString()
@ -249,26 +236,51 @@ public class ChainedFilter extends Filter
return sb.toString(); return sb.toString();
} }
private void doChain(BitSet result, IndexReader reader, private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis)
int logic, Filter filter) throws IOException throws IOException
{ {
if (dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
switch (logic) switch (logic)
{ {
case OR: case OR:
result.or(filter.bits(reader)); result.or((OpenBitSet) dis);
break; break;
case AND: case AND:
result.and(filter.bits(reader)); result.and((OpenBitSet) dis);
break; break;
case ANDNOT: case ANDNOT:
result.andNot(filter.bits(reader)); result.andNot((OpenBitSet) dis);
break; break;
case XOR: case XOR:
result.xor(filter.bits(reader)); result.xor((OpenBitSet) dis);
break; break;
default: default:
doChain(result, reader, DEFAULT, filter); doChain(result, DEFAULT, dis);
break; break;
} }
} else {
DocIdSetIterator disi = dis.iterator();
switch (logic)
{
case OR:
result.inPlaceOr(disi);
break;
case AND:
result.inPlaceAnd(disi);
break;
case ANDNOT:
result.inPlaceNot(disi);
break;
case XOR:
result.inPlaceXor(disi);
break;
default:
doChain(result, DEFAULT, dis);
break;
}
}
} }
} }

View File

@ -19,11 +19,17 @@ package org.apache.lucene.misc;
import junit.framework.TestCase; import junit.framework.TestCase;
import java.util.*; import java.util.*;
import java.io.IOException;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
@ -80,76 +86,149 @@ public class ChainedFilterTest extends TestCase {
new TermQuery(new Term("owner", "sue"))); new TermQuery(new Term("owner", "sue")));
} }
private Filter[] getChainWithOldFilters(Filter[] chain) {
Filter[] oldFilters = new Filter[chain.length];
for (int i = 0; i < chain.length; i++) {
oldFilters[i] = new OldBitSetFilterWrapper(chain[i]);
}
return oldFilters;
}
private ChainedFilter getChainedFilter(Filter[] chain, int[] logic, boolean old) {
if (old) {
chain = getChainWithOldFilters(chain);
}
if (logic == null) {
return new ChainedFilter(chain);
} else {
return new ChainedFilter(chain, logic);
}
}
private ChainedFilter getChainedFilter(Filter[] chain, int logic, boolean old) {
if (old) {
chain = getChainWithOldFilters(chain);
}
return new ChainedFilter(chain, logic);
}
public void testSingleFilter() throws Exception { public void testSingleFilter() throws Exception {
ChainedFilter chain = new ChainedFilter( for (int mode = 0; mode < 2; mode++) {
new Filter[] {dateFilter}); boolean old = (mode==0);
Hits hits = searcher.search(query, chain); ChainedFilter chain = getChainedFilter(new Filter[] {dateFilter}, null, old);
assertEquals(MAX, hits.length());
chain = new ChainedFilter(new Filter[] {bobFilter}); Hits hits = searcher.search(query, chain);
hits = searcher.search(query, chain); assertEquals(MAX, hits.length());
assertEquals(MAX / 2, hits.length());
chain = new ChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.AND}); chain = new ChainedFilter(new Filter[] {bobFilter});
hits = searcher.search(query, chain); hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length()); assertEquals(MAX / 2, hits.length());
assertEquals("bob", hits.doc(0).get("owner"));
chain = new ChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.ANDNOT}); chain = getChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.AND}, old);
hits = searcher.search(query, chain); hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length()); assertEquals(MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner")); assertEquals("bob", hits.doc(0).get("owner"));
chain = getChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.ANDNOT}, old);
hits = searcher.search(query, chain);
assertEquals(MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner"));
}
} }
public void testOR() throws Exception { public void testOR() throws Exception {
ChainedFilter chain = new ChainedFilter( for (int mode = 0; mode < 2; mode++) {
new Filter[] {sueFilter, bobFilter}); boolean old = (mode==0);
ChainedFilter chain = getChainedFilter(
new Filter[] {sueFilter, bobFilter}, null, old);
Hits hits = searcher.search(query, chain); Hits hits = searcher.search(query, chain);
assertEquals("OR matches all", MAX, hits.length()); assertEquals("OR matches all", MAX, hits.length());
}
} }
public void testAND() throws Exception { public void testAND() throws Exception {
ChainedFilter chain = new ChainedFilter( for (int mode = 0; mode < 2; mode++) {
new Filter[] {dateFilter, bobFilter}, ChainedFilter.AND); boolean old = (mode==0);
ChainedFilter chain = getChainedFilter(
new Filter[] {dateFilter, bobFilter}, ChainedFilter.AND, old);
Hits hits = searcher.search(query, chain); Hits hits = searcher.search(query, chain);
assertEquals("AND matches just bob", MAX / 2, hits.length()); assertEquals("AND matches just bob", MAX / 2, hits.length());
assertEquals("bob", hits.doc(0).get("owner")); assertEquals("bob", hits.doc(0).get("owner"));
}
} }
public void testXOR() throws Exception { public void testXOR() throws Exception {
ChainedFilter chain = new ChainedFilter( for (int mode = 0; mode < 2; mode++) {
new Filter[]{dateFilter, bobFilter}, ChainedFilter.XOR); boolean old = (mode==0);
ChainedFilter chain = getChainedFilter(
new Filter[]{dateFilter, bobFilter}, ChainedFilter.XOR, old);
Hits hits = searcher.search(query, chain); Hits hits = searcher.search(query, chain);
assertEquals("XOR matches sue", MAX / 2, hits.length()); assertEquals("XOR matches sue", MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner")); assertEquals("sue", hits.doc(0).get("owner"));
}
} }
public void testANDNOT() throws Exception { public void testANDNOT() throws Exception {
ChainedFilter chain = new ChainedFilter( for (int mode = 0; mode < 2; mode++) {
new Filter[]{dateFilter, sueFilter}, boolean old = (mode==0);
new int[] {ChainedFilter.AND, ChainedFilter.ANDNOT}); ChainedFilter chain = getChainedFilter(
new Filter[]{dateFilter, sueFilter},
new int[] {ChainedFilter.AND, ChainedFilter.ANDNOT}, old);
Hits hits = searcher.search(query, chain); Hits hits = searcher.search(query, chain);
assertEquals("ANDNOT matches just bob", assertEquals("ANDNOT matches just bob",
MAX / 2, hits.length());
assertEquals("bob", hits.doc(0).get("owner"));
chain = new ChainedFilter(
new Filter[]{bobFilter, bobFilter},
new int[] {ChainedFilter.ANDNOT, ChainedFilter.ANDNOT});
hits = searcher.search(query, chain);
assertEquals("ANDNOT bob ANDNOT bob matches all sues",
MAX / 2, hits.length()); MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner")); assertEquals("bob", hits.doc(0).get("owner"));
chain = getChainedFilter(
new Filter[]{bobFilter, bobFilter},
new int[] {ChainedFilter.ANDNOT, ChainedFilter.ANDNOT}, old);
hits = searcher.search(query, chain);
assertEquals("ANDNOT bob ANDNOT bob matches all sues",
MAX / 2, hits.length());
assertEquals("sue", hits.doc(0).get("owner"));
}
} }
private Date parseDate(String s) throws ParseException { private Date parseDate(String s) throws ParseException {
return new SimpleDateFormat("yyyy MMM dd", Locale.US).parse(s); return new SimpleDateFormat("yyyy MMM dd", Locale.US).parse(s);
} }
public void testWithCachingFilter() throws Exception {
for (int mode = 0; mode < 2; mode++) {
boolean old = (mode==0);
Directory dir = new RAMDirectory();
Analyzer analyzer = new WhitespaceAnalyzer();
IndexWriter writer = new IndexWriter(dir, analyzer, true, MaxFieldLength.LIMITED);
writer.close();
Searcher searcher = new IndexSearcher(dir);
Query query = new TermQuery(new Term("none", "none"));
QueryWrapperFilter queryFilter = new QueryWrapperFilter(query);
CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter);
searcher.search(query, cachingFilter, 1);
CachingWrapperFilter cachingFilter2 = new CachingWrapperFilter(queryFilter);
Filter[] chain = new Filter[2];
chain[0] = cachingFilter;
chain[1] = cachingFilter2;
ChainedFilter cf = new ChainedFilter(chain);
// throws java.lang.ClassCastException: org.apache.lucene.util.OpenBitSet cannot be cast to java.util.BitSet
searcher.search(new MatchAllDocsQuery(), cf, 1);
}
}
} }

View File

@ -23,6 +23,10 @@ import java.util.BitSet;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.DocIdBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.util.SortedVIntList;
/** /**
* A container Filter that allows Boolean composition of Filters. * A container Filter that allows Boolean composition of Filters.
@ -37,184 +41,167 @@ import org.apache.lucene.search.BooleanClause.Occur;
public class BooleanFilter extends Filter public class BooleanFilter extends Filter
{ {
//ArrayList of SHOULD filters ArrayList shouldFilters = null;
ArrayList shouldFilters = null; ArrayList notFilters = null;
//ArrayList of NOT filters ArrayList mustFilters = null;
ArrayList notFilters = null;
//ArrayList of MUST filters
ArrayList mustFilters = null;
/** private DocIdSetIterator getDISI(ArrayList filters, int index, IndexReader reader)
* Returns the a BitSet representing the Boolean composition throws IOException
* of the filters that have been added. {
*/ return ((Filter)filters.get(index)).getDocIdSet(reader).iterator();
}
public BitSet bits(IndexReader reader) throws IOException /**
{ * Returns the a DocIdSetIterator representing the Boolean composition
//create a new bitSet * of the filters that have been added.
BitSet returnBits = null; */
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
{
OpenBitSetDISI res = null;
//SHOULD filters if (shouldFilters != null) {
if (shouldFilters!=null) for (int i = 0; i < shouldFilters.size(); i++) {
{ if (res == null) {
returnBits = ((Filter)shouldFilters.get(0)).bits(reader); res = new OpenBitSetDISI(getDISI(shouldFilters, i, reader), reader.maxDoc());
// avoid changing the original bitset - it may be cached } else {
returnBits=(BitSet) returnBits.clone(); DocIdSet dis = ((Filter)shouldFilters.get(i)).getDocIdSet(reader);
if (shouldFilters.size() > 1) if(dis instanceof OpenBitSet) {
{ // optimized case for OpenBitSets
for (int i = 1; i < shouldFilters.size(); i++) res.or((OpenBitSet) dis);
{ } else {
returnBits.or(((Filter)shouldFilters.get(i)).bits(reader)); res.inPlaceOr(getDISI(shouldFilters, i, reader));
} }
} }
} }
}
//NOT filters if (notFilters!=null) {
if (notFilters!=null) for (int i = 0; i < notFilters.size(); i++) {
{ if (res == null) {
for (int i = 0; i < notFilters.size(); i++) res = new OpenBitSetDISI(getDISI(notFilters, i, reader), reader.maxDoc());
{ res.flip(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
BitSet notBits=((Filter)notFilters.get(i)).bits(reader); } else {
if(returnBits==null) DocIdSet dis = ((Filter)notFilters.get(i)).getDocIdSet(reader);
{ if(dis instanceof OpenBitSet) {
returnBits=(BitSet) notBits.clone(); // optimized case for OpenBitSets
returnBits.flip(0,reader.maxDoc()); res.andNot((OpenBitSet) dis);
} } else {
else res.inPlaceNot(getDISI(notFilters, i, reader));
{ }
returnBits.andNot(notBits); }
} }
} }
}
//MUST filters if (mustFilters!=null) {
if (mustFilters!=null) for (int i = 0; i < mustFilters.size(); i++) {
{ if (res == null) {
for (int i = 0; i < mustFilters.size(); i++) res = new OpenBitSetDISI(getDISI(mustFilters, i, reader), reader.maxDoc());
{ } else {
BitSet mustBits=((Filter)mustFilters.get(i)).bits(reader); DocIdSet dis = ((Filter)mustFilters.get(i)).getDocIdSet(reader);
if(returnBits==null) if(dis instanceof OpenBitSet) {
{ // optimized case for OpenBitSets
if(mustFilters.size()==1) res.and((OpenBitSet) dis);
{ } else {
returnBits=mustBits; res.inPlaceAnd(getDISI(mustFilters, i, reader));
}
}
}
}
} if (res !=null)
else return finalResult(res, reader.maxDoc());
{
//don't mangle the bitset
returnBits=(BitSet) mustBits.clone();
}
}
else
{
returnBits.and(mustBits);
}
}
}
if(returnBits==null)
{
returnBits=new BitSet(reader.maxDoc());
}
return returnBits;
}
/** if (emptyDocIdSet == null)
* Adds a new FilterClause to the Boolean Filter container emptyDocIdSet = new OpenBitSetDISI(1);
* @param filterClause A FilterClause object containing a Filter and an Occur parameter
*/
public void add(FilterClause filterClause) return emptyDocIdSet;
{ }
if (filterClause.getOccur().equals(Occur.MUST))
{
if(mustFilters==null)
{
mustFilters=new ArrayList();
}
mustFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.SHOULD))
{
if(shouldFilters==null)
{
shouldFilters=new ArrayList();
}
shouldFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.MUST_NOT))
{
if(notFilters==null)
{
notFilters=new ArrayList();
}
notFilters.add(filterClause.getFilter());
}
}
public boolean equals(Object obj) /** Provide a SortedVIntList when it is definitely smaller than an OpenBitSet */
{ protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
if(this == obj) return (result.cardinality() < (maxDocs / 9))
return true; ? (DocIdSet) new SortedVIntList(result)
if((obj == null) || (obj.getClass() != this.getClass())) : (DocIdSet) result;
return false; }
BooleanFilter test = (BooleanFilter)obj;
return (notFilters == test.notFilters||
(notFilters!= null && notFilters.equals(test.notFilters)))
&&
(mustFilters == test.mustFilters||
(mustFilters!= null && mustFilters.equals(test.mustFilters)))
&&
(shouldFilters == test.shouldFilters||
(shouldFilters!= null && shouldFilters.equals(test.shouldFilters)));
}
public int hashCode() private static DocIdSet emptyDocIdSet = null;
{
int hash=7;
hash = 31 * hash + (null == mustFilters ? 0 : mustFilters.hashCode());
hash = 31 * hash + (null == notFilters ? 0 : notFilters.hashCode());
hash = 31 * hash + (null == shouldFilters ? 0 : shouldFilters.hashCode());
return hash;
}
/**
* Adds a new FilterClause to the Boolean Filter container
* @param filterClause A FilterClause object containing a Filter and an Occur parameter
*/
/** Prints a user-readable version of this query. */ public void add(FilterClause filterClause)
public String toString() {
{ if (filterClause.getOccur().equals(Occur.MUST)) {
StringBuffer buffer = new StringBuffer(); if (mustFilters==null) {
mustFilters=new ArrayList();
}
mustFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.SHOULD)) {
if (shouldFilters==null) {
shouldFilters=new ArrayList();
}
shouldFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.MUST_NOT)) {
if (notFilters==null) {
notFilters=new ArrayList();
}
notFilters.add(filterClause.getFilter());
}
}
buffer.append("BooleanFilter("); private boolean equalFilters(ArrayList filters1, ArrayList filters2)
{
return (filters1 == filters2) ||
((filters1 != null) && filters1.equals(filters2));
}
appendFilters(shouldFilters, null, buffer); public boolean equals(Object obj)
appendFilters(mustFilters, "+", buffer); {
appendFilters(notFilters, "-", buffer); if (this == obj)
return true;
buffer.append(")"); if ((obj == null) || (obj.getClass() != this.getClass()))
return false;
return buffer.toString(); BooleanFilter other = (BooleanFilter)obj;
} return equalFilters(notFilters, other.notFilters)
&& equalFilters(mustFilters, other.mustFilters)
&& equalFilters(shouldFilters, other.shouldFilters);
}
private void appendFilters(ArrayList filters, String occurString, public int hashCode()
StringBuffer buffer) {
{ int hash=7;
if (filters == null) hash = 31 * hash + (null == mustFilters ? 0 : mustFilters.hashCode());
return; hash = 31 * hash + (null == notFilters ? 0 : notFilters.hashCode());
hash = 31 * hash + (null == shouldFilters ? 0 : shouldFilters.hashCode());
return hash;
}
for (int i = 0; i < filters.size(); i++) /** Prints a user-readable version of this query. */
{ public String toString()
Filter filter = (Filter) filters.get(i); {
if (occurString != null) StringBuffer buffer = new StringBuffer();
{ buffer.append("BooleanFilter(");
buffer.append(occurString); appendFilters(shouldFilters, "", buffer);
} appendFilters(mustFilters, "+", buffer);
appendFilters(notFilters, "-", buffer);
buffer.append(")");
return buffer.toString();
}
buffer.append(filter); private void appendFilters(ArrayList filters, String occurString, StringBuffer buffer)
{
if (i < filters.size() - 1) if (filters != null) {
{ for (int i = 0; i < filters.size(); i++) {
buffer.append(' '); buffer.append(' ');
} buffer.append(occurString);
} buffer.append(filters.get(i).toString());
} }
}
}
} }

View File

@ -22,6 +22,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;
public class DuplicateFilter extends Filter public class DuplicateFilter extends Filter
{ {
@ -66,7 +67,7 @@ public class DuplicateFilter extends Filter
this.processingMode = processingMode; this.processingMode = processingMode;
} }
public BitSet bits(IndexReader reader) throws IOException public DocIdSet getDocIdSet(IndexReader reader) throws IOException
{ {
if(processingMode==PM_FAST_INVALIDATION) if(processingMode==PM_FAST_INVALIDATION)
{ {
@ -78,10 +79,10 @@ public class DuplicateFilter extends Filter
} }
} }
private BitSet correctBits(IndexReader reader) throws IOException private OpenBitSet correctBits(IndexReader reader) throws IOException
{ {
BitSet bits=new BitSet(reader.maxDoc()); //assume all are INvalid OpenBitSet bits=new OpenBitSet(reader.maxDoc()); //assume all are INvalid
Term startTerm=new Term(fieldName,""); Term startTerm=new Term(fieldName,"");
TermEnum te = reader.terms(startTerm); TermEnum te = reader.terms(startTerm);
if(te!=null) if(te!=null)
@ -117,10 +118,10 @@ public class DuplicateFilter extends Filter
return bits; return bits;
} }
private BitSet fastBits(IndexReader reader) throws IOException private OpenBitSet fastBits(IndexReader reader) throws IOException
{ {
BitSet bits=new BitSet(reader.maxDoc()); OpenBitSet bits=new OpenBitSet(reader.maxDoc());
bits.set(0,reader.maxDoc()); //assume all are valid bits.set(0,reader.maxDoc()); //assume all are valid
Term startTerm=new Term(fieldName,""); Term startTerm=new Term(fieldName,"");
TermEnum te = reader.terms(startTerm); TermEnum te = reader.terms(startTerm);
@ -143,7 +144,7 @@ public class DuplicateFilter extends Filter
do do
{ {
lastDoc=td.doc(); lastDoc=td.doc();
bits.set(lastDoc,false); bits.clear(lastDoc);
}while(td.next()); }while(td.next());
if(keepMode==KM_USE_LAST_OCCURRENCE) if(keepMode==KM_USE_LAST_OCCURRENCE)
{ {

View File

@ -26,6 +26,7 @@ import java.util.TreeSet;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.util.OpenBitSet;
/** /**
* Constructs a filter for docs matching any of the terms added to this class. * Constructs a filter for docs matching any of the terms added to this class.
@ -50,11 +51,11 @@ public class TermsFilter extends Filter
} }
/* (non-Javadoc) /* (non-Javadoc)
* @see org.apache.lucene.search.Filter#bits(org.apache.lucene.index.IndexReader) * @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader)
*/ */
public BitSet bits(IndexReader reader) throws IOException public DocIdSet getDocIdSet(IndexReader reader) throws IOException
{ {
BitSet result=new BitSet(reader.maxDoc()); OpenBitSet result=new OpenBitSet(reader.maxDoc());
TermDocs td = reader.termDocs(); TermDocs td = reader.termDocs();
try try
{ {

View File

@ -32,6 +32,7 @@ import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause; import org.apache.lucene.search.FilterClause;
import org.apache.lucene.search.RangeFilter; import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.DocIdBitSet;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -66,100 +67,141 @@ public class BooleanFilterTest extends TestCase
writer.addDocument(doc); writer.addDocument(doc);
} }
private Filter getRangeFilter(String field,String lowerPrice, String upperPrice) private Filter getRangeFilter(String field,String lowerPrice, String upperPrice, boolean old)
{ {
return new RangeFilter(field,lowerPrice,upperPrice,true,true); Filter f = new RangeFilter(field,lowerPrice,upperPrice,true,true);
if (old) {
return new OldBitSetFilterWrapper(f);
}
return f;
} }
private TermsFilter getTermsFilter(String field,String text) private Filter getTermsFilter(String field,String text, boolean old)
{ {
TermsFilter tf=new TermsFilter(); TermsFilter tf=new TermsFilter();
tf.addTerm(new Term(field,text)); tf.addTerm(new Term(field,text));
if (old) {
return new OldBitSetFilterWrapper(tf);
}
return tf; return tf;
} }
private void tstFilterCard(String mes, int expected, Filter filt)
throws Throwable
{
DocIdSetIterator disi = filt.getDocIdSet(reader).iterator();
int actual = 0;
while (disi.next()) {
actual++;
}
assertEquals(mes, expected, actual);
}
public void testShould() throws Throwable public void testShould() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getTermsFilter("price","030"),BooleanClause.Occur.SHOULD)); boolean old = (i==0);
BitSet bits = booleanFilter.bits(reader); BooleanFilter booleanFilter = new BooleanFilter();
assertEquals("Should retrieves only 1 doc",1,bits.cardinality()); booleanFilter.add(new FilterClause(getTermsFilter("price","030", old),BooleanClause.Occur.SHOULD));
tstFilterCard("Should retrieves only 1 doc",1,booleanFilter);
}
} }
public void testShoulds() throws Throwable public void testShoulds() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD)); boolean old = (i==0);
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD)); BooleanFilter booleanFilter = new BooleanFilter();
BitSet bits = booleanFilter.bits(reader); booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
assertEquals("Shoulds are Ored together",5,bits.cardinality()); booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
tstFilterCard("Shoulds are Ored together",5,booleanFilter);
}
} }
public void testShouldsAndMustNot() throws Throwable public void testShouldsAndMustNot() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD)); boolean old = (i==0);
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but AndNot",4,bits.cardinality());
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "Maybe"),BooleanClause.Occur.MUST_NOT)); BooleanFilter booleanFilter = new BooleanFilter();
bits = booleanFilter.bits(reader); booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
assertEquals("Shoulds Ored but AndNots",3,bits.cardinality()); booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("Shoulds Ored but AndNot",4,booleanFilter);
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "Maybe", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("Shoulds Ored but AndNots",3,booleanFilter);
}
} }
public void testShouldsAndMust() throws Throwable public void testShouldsAndMust() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD)); boolean old = (i==0);
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD)); BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST)); booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
BitSet bits = booleanFilter.bits(reader); booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
assertEquals("Shoulds Ored but MUST",3,bits.cardinality()); booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
tstFilterCard("Shoulds Ored but MUST",3,booleanFilter);
}
} }
public void testShouldsAndMusts() throws Throwable public void testShouldsAndMusts() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD)); boolean old = (i==0);
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST)); BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("date","20040101", "20041231"),BooleanClause.Occur.MUST)); booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
BitSet bits = booleanFilter.bits(reader); booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
assertEquals("Shoulds Ored but MUSTs ANDED",1,bits.cardinality()); booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getRangeFilter("date","20040101", "20041231", old),BooleanClause.Occur.MUST));
tstFilterCard("Shoulds Ored but MUSTs ANDED",1,booleanFilter);
}
} }
public void testShouldsAndMustsAndMustNot() throws Throwable public void testShouldsAndMustsAndMustNot() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getRangeFilter("price","030", "040"),BooleanClause.Occur.SHOULD)); boolean old = (i==0);
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getRangeFilter("date","20050101", "20051231"),BooleanClause.Occur.MUST)); BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST_NOT)); booleanFilter.add(new FilterClause(getRangeFilter("price","030", "040", old),BooleanClause.Occur.SHOULD));
BitSet bits = booleanFilter.bits(reader); booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
assertEquals("Shoulds Ored but MUSTs ANDED and MustNot",0,bits.cardinality()); booleanFilter.add(new FilterClause(getRangeFilter("date","20050101", "20051231", old),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("Shoulds Ored but MUSTs ANDED and MustNot",0,booleanFilter);
}
} }
public void testJustMust() throws Throwable public void testJustMust() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST)); boolean old = (i==0);
BitSet bits = booleanFilter.bits(reader);
assertEquals("MUST",3,bits.cardinality()); BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
tstFilterCard("MUST",3,booleanFilter);
}
} }
public void testJustMustNot() throws Throwable public void testJustMustNot() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST_NOT)); boolean old = (i==0);
BitSet bits = booleanFilter.bits(reader);
assertEquals("MUST_NOT",4,bits.cardinality()); BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("MUST_NOT",4,booleanFilter);
}
} }
public void testMustAndMustNot() throws Throwable public void testMustAndMustNot() throws Throwable
{ {
BooleanFilter booleanFilter = new BooleanFilter(); for (int i = 0; i < 2; i++) {
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST)); boolean old = (i==0);
booleanFilter.add(new FilterClause(getTermsFilter("price","030"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader); BooleanFilter booleanFilter = new BooleanFilter();
assertEquals("MUST_NOT wins over MUST for same docs",0,bits.cardinality()); booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getTermsFilter("price","030", old),BooleanClause.Occur.MUST_NOT));
tstFilterCard("MUST_NOT wins over MUST for same docs",0,booleanFilter);
}
} }
} }

View File

@ -150,8 +150,8 @@ public class ParallelMultiSearcher extends MultiSearcher {
/** Lower-level search API. /** Lower-level search API.
* *
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero * <p>{@link HitCollector#collect(int,float)} is called for every matching
* scoring document. * document.
* *
* <p>Applications should only use this if they need <i>all</i> of the * <p>Applications should only use this if they need <i>all</i> of the
* matching documents. The high-level search API ({@link * matching documents. The high-level search API ({@link

View File

@ -33,7 +33,7 @@ import java.io.IOException; // for javadoc
* *
* <p>Queries, filters and sort criteria are designed to be compact so that * <p>Queries, filters and sort criteria are designed to be compact so that
* they may be efficiently passed to a remote index, with only the top-scoring * they may be efficiently passed to a remote index, with only the top-scoring
* hits being returned, rather than every non-zero scoring hit. * hits being returned, rather than every matching hit.
*/ */
public interface Searchable extends java.rmi.Remote { public interface Searchable extends java.rmi.Remote {
/** Lower-level search API. /** Lower-level search API.

View File

@ -88,8 +88,8 @@ public abstract class Searcher implements Searchable {
/** Lower-level search API. /** Lower-level search API.
* *
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero * <p>{@link HitCollector#collect(int,float)} is called for every matching
* scoring document. * document.
* *
* <p>Applications should only use this if they need <i>all</i> of the * <p>Applications should only use this if they need <i>all</i> of the
* matching documents. The high-level search API ({@link * matching documents. The high-level search API ({@link
@ -107,8 +107,8 @@ public abstract class Searcher implements Searchable {
/** Lower-level search API. /** Lower-level search API.
* *
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero * <p>{@link HitCollector#collect(int,float)} is called for every matching
* scoring document. * document.
* <br>HitCollector-based access to remote indexes is discouraged. * <br>HitCollector-based access to remote indexes is discouraged.
* *
* <p>Applications should only use this if they need <i>all</i> of the * <p>Applications should only use this if they need <i>all</i> of the

View File

@ -0,0 +1,101 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
public class OpenBitSetDISI extends OpenBitSet {
/** Construct an OpenBitSetDISI with its bits set
* from the doc ids of the given DocIdSetIterator.
* Also give a maximum size one larger than the largest doc id for which a
* bit may ever be set on this OpenBitSetDISI.
*/
public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException {
super(maxSize);
inPlaceOr(disi);
}
/** Construct an OpenBitSetDISI with no bits set, and a given maximum size
* one larger than the largest doc id for which a bit may ever be set
* on this OpenBitSetDISI.
*/
public OpenBitSetDISI(int maxSize) {
super(maxSize);
}
/**
* Perform an inplace OR with the doc ids from a given DocIdSetIterator,
* setting the bit for each such doc id.
* These doc ids should be smaller than the maximum size passed to the
* constructor.
*/
public void inPlaceOr(DocIdSetIterator disi) throws IOException {
while (disi.next() && (disi.doc() < size())) {
fastSet(disi.doc());
}
}
/**
* Perform an inplace AND with the doc ids from a given DocIdSetIterator,
* leaving only the bits set for which the doc ids are in common.
* These doc ids should be smaller than the maximum size passed to the
* constructor.
*/
public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
int index = nextSetBit(0);
int lastNotCleared = -1;
while ((index != -1) && disi.skipTo(index)) {
while ((index != -1) && (index < disi.doc())) {
fastClear(index);
index = nextSetBit(index + 1);
}
if (index == disi.doc()) {
lastNotCleared = index;
index++;
}
assert (index == -1) || (index > disi.doc());
}
clear(lastNotCleared+1, size());
}
/**
* Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
* clearing all the bits for each such doc id.
* These doc ids should be smaller than the maximum size passed to the
* constructor.
*/
public void inPlaceNot(DocIdSetIterator disi) throws IOException {
while (disi.next() && (disi.doc() < size())) {
fastClear(disi.doc());
}
}
/**
* Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
* flipping all the bits for each such doc id.
* These doc ids should be smaller than the maximum size passed to the
* constructor.
*/
public void inPlaceXor(DocIdSetIterator disi) throws IOException {
while (disi.next() && (disi.doc() < size())) {
fastFlip(disi.doc());
}
}
}

View File

@ -0,0 +1,48 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
/**
* Helper class used for testing compatibility with old BitSet-based filters.
* Does not override {@link Filter#getDocIdSet(IndexReader)} and thus ensures
* that {@link #bits(IndexReader)} is called.
*
* @deprecated This class will be removed together with the
* {@link Filter#bits(IndexReader)} method in Lucene 3.0.
*/
public class OldBitSetFilterWrapper extends Filter {
private Filter filter;
public OldBitSetFilterWrapper(Filter filter) {
this.filter = filter;
}
public BitSet bits(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc());
DocIdSetIterator it = filter.getDocIdSet(reader).iterator();
while(it.next()) {
bits.set(it.doc());
}
return bits;
}
}