Added new "CachedFilter" feature to XML syntax enabling any queries or filters to be cached for better repeat performance. Added JUnit test and example XML file. Also fixed ClassCastException in DOMUtils which occured when getAttributeWithInheritance reached the root of a document without finding the required attribute.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@492823 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Harwood 2007-01-05 00:05:17 +00:00
parent 59b624efc6
commit 998908257b
7 changed files with 193 additions and 5 deletions

View File

@ -12,6 +12,7 @@ import org.apache.lucene.xmlparser.builders.BooleanQueryBuilder;
import org.apache.lucene.xmlparser.builders.ConstantScoreQueryBuilder;
import org.apache.lucene.xmlparser.builders.FilteredQueryBuilder;
import org.apache.lucene.xmlparser.builders.MatchAllDocsQueryBuilder;
import org.apache.lucene.xmlparser.builders.CachedFilterBuilder;
import org.apache.lucene.xmlparser.builders.RangeFilterBuilder;
import org.apache.lucene.xmlparser.builders.SpanFirstBuilder;
import org.apache.lucene.xmlparser.builders.SpanNearBuilder;
@ -38,6 +39,9 @@ public class CoreParser implements QueryBuilder
protected QueryParser parser;
protected QueryBuilderFactory queryFactory;
protected FilterBuilderFactory filterFactory;
//Controls the max size of the LRU cache used for QueryFilter objects parsed.
public static int maxNumCachedFilters=20;
public CoreParser(Analyzer analyzer, QueryParser parser)
{
@ -56,6 +60,10 @@ public class CoreParser implements QueryBuilder
queryFactory.addBuilder("FilteredQuery",new FilteredQueryBuilder(filterFactory,queryFactory));
queryFactory.addBuilder("ConstantScoreQuery",new ConstantScoreQueryBuilder(filterFactory));
filterFactory.addBuilder("CachedFilter",new CachedFilterBuilder(queryFactory,
filterFactory, maxNumCachedFilters));
SpanQueryBuilderFactory sqof=new SpanQueryBuilderFactory();
SpanNearBuilder snb=new SpanNearBuilder(sqof);

View File

@ -99,8 +99,12 @@ public class DOMUtils
{
return null;
}
Element parent=(Element) n;
return getAttributeWithInheritance(parent,attributeName);
if(n instanceof Element)
{
Element parent=(Element) n;
return getAttributeWithInheritance(parent,attributeName);
}
return null; //we reached the top level of the document without finding attribute
}
return result;
}
@ -250,7 +254,7 @@ public class DOMUtils
}
return doc;
}
}
}

View File

@ -27,5 +27,8 @@ public class FilterBuilderFactory implements FilterBuilder {
{
builders.put(nodeName,builder);
}
public FilterBuilder getFilterBuilder(String nodeName)
{
return (FilterBuilder) builders.get(nodeName);
}
}

View File

@ -27,5 +27,9 @@ public class QueryBuilderFactory implements QueryBuilder {
{
builders.put(nodeName,builder);
}
public QueryBuilder getQueryBuilder(String nodeName)
{
return (QueryBuilder) builders.get(nodeName);
}
}

View File

@ -0,0 +1,123 @@
/*
* Created on 25-Jan-2006
*/
package org.apache.lucene.xmlparser.builders;
import java.util.Map.Entry;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryFilter;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.FilterBuilder;
import org.apache.lucene.xmlparser.FilterBuilderFactory;
import org.apache.lucene.xmlparser.ParserException;
import org.apache.lucene.xmlparser.QueryBuilder;
import org.apache.lucene.xmlparser.QueryBuilderFactory;
import org.w3c.dom.Element;
/**
* Filters are cached in an LRU Cache keyed on the contained query or filter object. Using this will
* speed up overall performance for repeated uses of the same expensive query/filter. The sorts of
* queries/filters likely to benefit from caching need not necessarily be complex - e.g. simple
* TermQuerys with a large DF (document frequency) can be expensive on large indexes.
* A good example of this might be a term query on a field with only 2 possible values -
* "true" or "false". In a large index, querying or filtering on this field requires reading
* millions of document ids from disk which can more usefully be cached as a filter bitset.
*
* For Queries/Filters to be cached and reused the object must implement hashcode and
* equals methods correctly so that duplicate queries/filters can be detected in the cache.
*
* The CoreParser.maxNumCachedFilters property can be used to control the size of the LRU
* Cache established during the construction of CoreParser instances.
*
* @author maharwood
*/
public class CachedFilterBuilder implements FilterBuilder {
private QueryBuilderFactory queryFactory;
private FilterBuilderFactory filterFactory;
private LRUCache filterCache = null;
private int cacheSize;
public CachedFilterBuilder(QueryBuilderFactory queryFactory,
FilterBuilderFactory filterFactory,int cacheSize)
{
this.queryFactory=queryFactory;
this.filterFactory=filterFactory;
this.cacheSize=cacheSize;
}
public Filter getFilter(Element e) throws ParserException
{
Element childElement = DOMUtils.getFirstChildOrFail(e);
if (filterCache == null)
{
filterCache = new LRUCache(cacheSize);
}
// Test to see if child Element is a query or filter that needs to be
// cached
QueryBuilder qb = queryFactory.getQueryBuilder(childElement
.getNodeName());
Object cacheKey = null;
Query q = null;
Filter f = null;
if (qb != null)
{
q = qb.getQuery(childElement);
cacheKey = q;
} else
{
f = filterFactory.getFilter(childElement);
cacheKey = f;
}
Filter cachedFilter = null;
synchronized (filterCache)
{ // check cache
cachedFilter = (Filter) filterCache.get(cacheKey);
if (cachedFilter != null)
{
return cachedFilter; // cache hit
}
}
//cache miss
if (qb != null)
{
cachedFilter = new QueryFilter(q);
} else
{
cachedFilter = new CachingWrapperFilter(f);
}
synchronized (filterCache)
{ // update cache
filterCache.put(cacheKey, cachedFilter);
}
return cachedFilter;
}
static class LRUCache extends java.util.LinkedHashMap
{
public LRUCache(int maxsize)
{
super(maxsize * 4 / 3 + 1, 0.75f, true);
this.maxsize = maxsize;
}
protected int maxsize;
protected boolean removeEldestEntry(Entry eldest)
{
return size() > maxsize;
}
}
}

View File

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<FilteredQuery>
<Query>
<BooleanQuery fieldName="contents">
<Clause occurs="should">
<TermQuery>merger</TermQuery>
</Clause>
<Clause occurs="mustnot">
<TermQuery >sumitomo</TermQuery>
</Clause>
</BooleanQuery>
</Query>
<Filter>
<!--
CachedFilter elements can contain any Query or Filter.
CachedFilters are cached in an LRU Cache keyed on the contained query/filter object.
Using this will speed up overall performance for repeated uses of the same expensive
query/filter. The sorts of queries likely to benefit from caching need not necessarily be
complex - e.g. simple TermQuerys with a large DF (document frequency) can be expensive
on large indexes. A good example of this might be a term query on a field with only 2 possible
values - "true" or "false". In a large index, querying or filtering on this field requires
reading millions of document ids from disk which can more usefully be cached as a
QueryFilter bitset.
For Queries/Filters to be cached and reused the object must implement hashcode and
equals methods correctly so that duplicate queries/filters can be detected in the cache.
The CoreParser.maxNumCachedFilters property can be used to control the size
of the LRU Cache established during the construction of CoreParser instances.
-->
<CachedFilter>
<!-- Example query to be cached for fast, repeated use -->
<TermQuery fieldName="contents">bank</TermQuery>
<!-- Alternatively, a filter object can be cached ....
<RangeFilter fieldName="date" lowerTerm="19870409" upperTerm="19870412"/>
-->
</CachedFilter>
</Filter>
</FilteredQuery>

View File

@ -155,6 +155,11 @@ public class TestParser extends TestCase {
Query q=parse("NestedBooleanQuery.xml");
dumpResults("Nested Boolean query", q, 5);
}
public void testCachedFilterXML() throws ParserException, IOException
{
Query q=parse("CachedFilter.xml");
dumpResults("Cached filter", q, 5);
}