Added BooleanFilter with JUnit test, added BooleanFilter support to XMLQueryParser, Changed TermsFilterBuilder to only build a filter for a single choice of field - multiples can be combined using new BooleanFilter clauses. Added missing MatchAllDocsQueryBuilder source.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@389037 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Harwood 2006-03-27 07:23:37 +00:00
parent fa0516f6ee
commit 0fa0e25de8
11 changed files with 443 additions and 30 deletions

View File

@ -0,0 +1,127 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause.Occur;
/**
* A container Filter that allows Boolean composition of Filters.
* Filters are allocated into one of three logical constructs;
* SHOULD, MUST NOT, MUST
* The results Filter BitSet is constructed as follows:
* SHOULD Filters are OR'd together
* The resulting Filter is NOT'd with the NOT Filters
* The resulting Filter is AND'd with the MUST Filters
* @author BPDThebault
*/
public class BooleanFilter extends Filter
{
//ArrayList of SHOULD filters
ArrayList shouldFilters = null;
//ArrayList of NOT filters
ArrayList notFilters = null;
//ArrayList of MUST filters
ArrayList mustFilters = null;
/**
* Returns the a BitSet representing the Boolean composition
* of the filters that have been added.
*/
public BitSet bits(IndexReader reader) throws IOException
{
//create a new bitSet
BitSet returnBits = null;
//SHOULD filters
if (shouldFilters!=null)
{
returnBits = ((Filter)shouldFilters.get(0)).bits(reader);
if (shouldFilters.size() > 1)
{
for (int i = 1; i < shouldFilters.size(); i++)
{
returnBits.or(((Filter)shouldFilters.get(i)).bits(reader));
}
}
}
//NOT filters
if (notFilters!=null)
{
for (int i = 0; i < notFilters.size(); i++)
{
BitSet notBits=((Filter)notFilters.get(i)).bits(reader);
if(returnBits==null)
{
returnBits=notBits;
returnBits.flip(0,reader.maxDoc());
}
else
{
returnBits.andNot(notBits);
}
}
}
//MUST filters
if (mustFilters!=null)
{
for (int i = 0; i < mustFilters.size(); i++)
{
BitSet mustBits=((Filter)mustFilters.get(i)).bits(reader);
if(returnBits==null)
{
returnBits=mustBits;
}
else
{
returnBits.and(mustBits);
}
}
}
if(returnBits==null)
{
returnBits=new BitSet(reader.maxDoc());
}
return returnBits;
}
/**
* Adds a new FilterClause to the Boolean Filter container
* @param filterClause A FilterClause object containing a Filter and an Occur parameter
*/
public void add(FilterClause filterClause)
{
if (filterClause.getOccur().equals(Occur.MUST))
{
if(mustFilters==null)
{
mustFilters=new ArrayList();
}
mustFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.SHOULD))
{
if(shouldFilters==null)
{
shouldFilters=new ArrayList();
}
shouldFilters.add(filterClause.getFilter());
}
if (filterClause.getOccur().equals(Occur.MUST_NOT))
{
if(notFilters==null)
{
notFilters=new ArrayList();
}
notFilters.add(filterClause.getFilter());
}
}
}

View File

@ -0,0 +1,50 @@
package org.apache.lucene.search;
import org.apache.lucene.search.BooleanClause.Occur;
/**
* A Filter that wrapped with an indication of how that filter
* is used when composed with another filter.
* (Follows the boolean logic in BooleanClause for composition
* of queries.)
* @author BPDThebault
*/
public class FilterClause implements java.io.Serializable
{
Occur occur = null;
Filter filter = null;
/**
* Create a new FilterClause
* @param filter A Filter object containing a BitSet
* @param occur A parameter implementation indicating SHOULD, MUST or MUST NOT
*/
public FilterClause( Filter filter,Occur occur)
{
this.occur = occur;
this.filter = filter;
}
/**
* Returns this FilterClause's filter
* @return A Filter object
*/
public Filter getFilter()
{
return filter;
}
/**
* Returns this FilterClause's occur parameter
* @return An Occur object
*/
public Occur getOccur()
{
return occur;
}
}

View File

@ -0,0 +1,148 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.store.RAMDirectory;
import junit.framework.TestCase;
public class BooleanFilterTest extends TestCase
{
private RAMDirectory directory;
private IndexReader reader;
protected void setUp() throws Exception
{
directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
//Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags
addDoc(writer, "admin guest", "010", "20040101","Y");
addDoc(writer, "guest", "020", "20040101","Y");
addDoc(writer, "guest", "020", "20050101","Y");
addDoc(writer, "admin", "020", "20050101","Maybe");
addDoc(writer, "admin guest", "030", "20050101","N");
writer.close();
reader=IndexReader.open(directory);
}
private void addDoc(IndexWriter writer, String accessRights, String price, String date, String inStock) throws IOException
{
Document doc=new Document();
doc.add(new Field("accessRights",accessRights,Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("price",price,Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("date",date,Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("inStock",inStock,Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
}
private Filter getRangeFilter(String field,String lowerPrice, String upperPrice)
{
return new RangeFilter(field,lowerPrice,upperPrice,true,true);
}
private TermsFilter getTermsFilter(String field,String text)
{
TermsFilter tf=new TermsFilter();
tf.addTerm(new Term(field,text));
return tf;
}
public void testShould() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("price","030"),BooleanClause.Occur.SHOULD));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Should retrieves only 1 doc",1,bits.cardinality());
}
public void testShoulds() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds are Ored together",5,bits.cardinality());
}
public void testShouldsAndMustNot() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but AndNot",4,bits.cardinality());
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "Maybe"),BooleanClause.Occur.MUST_NOT));
bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but AndNots",3,bits.cardinality());
}
public void testShouldsAndMust() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but MUST",3,bits.cardinality());
}
public void testShouldsAndMusts() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getRangeFilter("date","20040101", "20041231"),BooleanClause.Occur.MUST));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but MUSTs ANDED",1,bits.cardinality());
}
public void testShouldsAndMustsAndMustNot() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getRangeFilter("price","030", "040"),BooleanClause.Occur.SHOULD));
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getRangeFilter("date","20050101", "20051231"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("Shoulds Ored but MUSTs ANDED and MustNot",0,bits.cardinality());
}
public void testJustMust() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
BitSet bits = booleanFilter.bits(reader);
assertEquals("MUST",3,bits.cardinality());
}
public void testJustMustNot() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("MUST_NOT",4,bits.cardinality());
}
public void testMustAndMustNot() throws Throwable
{
BooleanFilter booleanFilter = new BooleanFilter();
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST));
booleanFilter.add(new FilterClause(getTermsFilter("price","030"),BooleanClause.Occur.MUST_NOT));
BitSet bits = booleanFilter.bits(reader);
assertEquals("MUST_NOT wins over MUST for same docs",0,bits.cardinality());
}
}

View File

@ -2,6 +2,7 @@ package org.apache.lucene.xmlparser;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.xmlparser.builders.BooleanFilterBuilder;
import org.apache.lucene.xmlparser.builders.BoostingQueryBuilder;
import org.apache.lucene.xmlparser.builders.FuzzyLikeThisQueryBuilder;
import org.apache.lucene.xmlparser.builders.LikeThisQueryBuilder;
@ -14,6 +15,7 @@ public class CorePlusExtensionsParser extends CoreParser
{
super(analyzer, parser);
filterFactory.addBuilder("TermsFilter",new TermsFilterBuilder(analyzer));
filterFactory.addBuilder("BooleanFilter",new BooleanFilterBuilder(filterFactory));
String fields[]={"contents"};
queryFactory.addBuilder("LikeThisQuery",new LikeThisQueryBuilder(analyzer,fields));
queryFactory.addBuilder("BoostingQuery", new BoostingQueryBuilder(queryFactory));

View File

@ -0,0 +1,45 @@
/*
* Created on 25-Jan-2006
*/
package org.apache.lucene.xmlparser.builders;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.FilterBuilder;
import org.apache.lucene.xmlparser.ParserException;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
/**
* @author maharwood
*/
public class BooleanFilterBuilder implements FilterBuilder {
private FilterBuilder factory;
public BooleanFilterBuilder(FilterBuilder factory)
{
this.factory=factory;
}
public Filter getFilter(Element e) throws ParserException {
BooleanFilter bf=new BooleanFilter();
NodeList nl = e.getElementsByTagName("Clause");
for(int i=0;i<nl.getLength();i++)
{
Element clauseElem=(Element) nl.item(i);
BooleanClause.Occur occurs=BooleanQueryBuilder.getOccursValue(clauseElem);
Element clauseFilter=DOMUtils.getFirstChildOrFail(clauseElem);
Filter f=factory.getFilter(clauseFilter);
bf.add(new FilterClause(f,occurs));
}
return bf;
}
}

View File

@ -44,7 +44,7 @@ public class BooleanQueryBuilder implements QueryBuilder {
return bq;
}
private BooleanClause.Occur getOccursValue(Element clauseElem) throws ParserException
static BooleanClause.Occur getOccursValue(Element clauseElem) throws ParserException
{
String occs=clauseElem.getAttribute("occurs");
BooleanClause.Occur occurs=BooleanClause.Occur.SHOULD;

View File

@ -0,0 +1,15 @@
package org.apache.lucene.xmlparser.builders;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.xmlparser.ParserException;
import org.apache.lucene.xmlparser.QueryBuilder;
import org.w3c.dom.Element;
public class MatchAllDocsQueryBuilder implements QueryBuilder
{
public Query getQuery(Element e) throws ParserException
{
return new MatchAllDocsQuery();
}
}

View File

@ -13,7 +13,6 @@ import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.FilterBuilder;
import org.apache.lucene.xmlparser.ParserException;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
/**
@ -33,43 +32,40 @@ public class TermsFilterBuilder implements FilterBuilder
this.analyzer = analyzer;
}
/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see org.apache.lucene.xmlparser.FilterBuilder#process(org.w3c.dom.Element)
*/
public Filter getFilter(Element e) throws ParserException
{
TermsFilter tf=new TermsFilter();
NodeList nl = e.getElementsByTagName("Field");
for(int i=0;i<nl.getLength();i++)
TermsFilter tf = new TermsFilter();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try
{
Element fieldElem=(Element) nl.item(i);
String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(fieldElem,"fieldName");
String text=DOMUtils.getNonBlankTextOrFail(fieldElem);
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try
Token token = ts.next();
Term term = null;
while (token != null)
{
Token token=ts.next();
Term term=null;
while(token!=null)
{
if(term==null)
if (term == null)
{
term=new Term(fieldName,token.termText());
}
else
term = new Term(fieldName, token.termText());
} else
{
term=term.createTerm(token.termText()); //create from previous to save fieldName.intern overhead
// create from previous to save fieldName.intern overhead
term = term.createTerm(token.termText());
}
tf.addTerm(term);
token=ts.next();
}
}
catch(IOException ioe)
{
throw new RuntimeException("Error constructing terms from index:"+ioe);
token = ts.next();
}
}
catch (IOException ioe)
{
throw new RuntimeException("Error constructing terms from index:"
+ ioe);
}
return tf;
}

View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<FilteredQuery>
<Query>
<MatchAllDocsQuery/>
</Query>
<Filter>
<!--
This query illustrates how a BooleanFilter can be used to combine
multiple filters in the same way BooleanQueries can be combined
with must, should and mustnot clauses
-->
<BooleanFilter>
<Clause occurs="should">
<RangeFilter fieldName="date" lowerTerm="19870409" upperTerm="19870412"/>
</Clause>
<Clause occurs="mustNot">
<TermsFilter fieldName="contents">Emcore</TermsFilter>
</Clause>
</BooleanFilter>
</Filter>
</FilteredQuery>

View File

@ -22,8 +22,8 @@
This example might be just a list of Saturdays ie not a contiguous range of values
which can be handled by rangefilter
-->
<TermsFilter>
<Field fieldName="date" >19870601 19870608 19870615</Field>
<TermsFilter fieldName="date">
19870601 19870608 19870615
</TermsFilter>
</Filter>

View File

@ -140,6 +140,11 @@ public class TestParser extends TestCase {
Query q=parse("MatchAllDocsQuery.xml");
dumpResults("MatchAllDocsQuery with range filter", q, 5);
}
public void testBooleanFilterXML() throws ParserException, IOException
{
Query q=parse("BooleanFilter.xml");
dumpResults("Boolean filter", q, 5);
}