Added new "CachedFilter" feature to XML syntax enabling any queries or filters to be cached for better repeat performance. Added JUnit test and example XML file. Also fixed ClassCastException in DOMUtils which occured when getAttributeWithInheritance reached the root of a document without finding the required attribute.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@492823 13f79535-47bb-0310-9956-ffa450edef68
2007-01-05 00:05:17 +00:00 · 2007-01-05 00:05:17 +00:00 · 998908257b
parent 59b624efc6
commit 998908257b
7 changed files with 193 additions and 5 deletions
--- a/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/CoreParser.java
+++ b/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/CoreParser.java
@ -12,6 +12,7 @@ import org.apache.lucene.xmlparser.builders.BooleanQueryBuilder;
 import org.apache.lucene.xmlparser.builders.ConstantScoreQueryBuilder;
 import org.apache.lucene.xmlparser.builders.FilteredQueryBuilder;
 import org.apache.lucene.xmlparser.builders.MatchAllDocsQueryBuilder;
 import org.apache.lucene.xmlparser.builders.CachedFilterBuilder;
 import org.apache.lucene.xmlparser.builders.RangeFilterBuilder;
 import org.apache.lucene.xmlparser.builders.SpanFirstBuilder;
 import org.apache.lucene.xmlparser.builders.SpanNearBuilder;
@ -38,6 +39,9 @@ public class CoreParser implements QueryBuilder
 	protected QueryParser parser;
 	protected QueryBuilderFactory queryFactory;
 	protected FilterBuilderFactory filterFactory;
 	//Controls the max size of the LRU cache used for QueryFilter objects parsed.
 	public static int maxNumCachedFilters=20;
 	public CoreParser(Analyzer analyzer, QueryParser parser)
 	{
@ -56,6 +60,10 @@ public class CoreParser implements QueryBuilder
 		queryFactory.addBuilder("FilteredQuery",new FilteredQueryBuilder(filterFactory,queryFactory));
 		queryFactory.addBuilder("ConstantScoreQuery",new ConstantScoreQueryBuilder(filterFactory));
 		filterFactory.addBuilder("CachedFilter",new CachedFilterBuilder(queryFactory,
 							filterFactory, maxNumCachedFilters));
 		SpanQueryBuilderFactory sqof=new SpanQueryBuilderFactory();
 		SpanNearBuilder snb=new SpanNearBuilder(sqof);
--- a/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/DOMUtils.java
+++ b/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/DOMUtils.java
@ -99,9 +99,13 @@ public class DOMUtils
 			{
 				return null;
 			}
 			if(n instanceof Element)
 			{
 				Element parent=(Element) n;
 				return getAttributeWithInheritance(parent,attributeName);
 			}
 			return null; //we reached the top level of the document without finding attribute
 		}
 		return result;		
 	}
--- a/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/FilterBuilderFactory.java
+++ b/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/FilterBuilderFactory.java
@ -27,5 +27,8 @@ public class FilterBuilderFactory implements FilterBuilder {
 	{
 		builders.put(nodeName,builder);
 	}
-	
+	public FilterBuilder getFilterBuilder(String nodeName)
 	{
 		return (FilterBuilder) builders.get(nodeName);		
 	}	
 }
--- a/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryBuilderFactory.java
+++ b/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryBuilderFactory.java
@ -27,5 +27,9 @@ public class QueryBuilderFactory implements QueryBuilder {
 	{
 		builders.put(nodeName,builder);
 	}
 	public QueryBuilder getQueryBuilder(String nodeName)
 	{
 		return (QueryBuilder) builders.get(nodeName);		
 	}
 }
--- a/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java
+++ b/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java
@ -0,0 +1,123 @@
 /*
 * Created on 25-Jan-2006
 */
 package org.apache.lucene.xmlparser.builders;
 import java.util.Map.Entry;
 import org.apache.lucene.search.CachingWrapperFilter;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryFilter;
 import org.apache.lucene.xmlparser.DOMUtils;
 import org.apache.lucene.xmlparser.FilterBuilder;
 import org.apache.lucene.xmlparser.FilterBuilderFactory;
 import org.apache.lucene.xmlparser.ParserException;
 import org.apache.lucene.xmlparser.QueryBuilder;
 import org.apache.lucene.xmlparser.QueryBuilderFactory;
 import org.w3c.dom.Element;
 /**
 * Filters are cached in an LRU Cache keyed on the contained query or filter object. Using this will 
 * speed up overall performance for repeated uses of the same expensive query/filter. The sorts of 
 * queries/filters likely to benefit from caching need not necessarily be complex - e.g. simple 
 * TermQuerys with a large DF (document frequency) can be expensive	on large indexes. 
 * A good example of this might be a term query on a field with only 2 possible	values - 
 * "true" or "false". In a large index, querying or filtering on this field requires reading 
 * millions	of document ids from disk which can more usefully be cached as a filter bitset.
 * 
 * For Queries/Filters to be cached and reused the object must implement hashcode and
 * equals methods correctly so that duplicate queries/filters can be detected in the cache.
 * 
 * The CoreParser.maxNumCachedFilters property can be used to control the size of the LRU 
 * Cache established during the construction of CoreParser instances.
 * 
 * @author maharwood
 */
 public class CachedFilterBuilder implements FilterBuilder {
 	private QueryBuilderFactory queryFactory;
 	private FilterBuilderFactory filterFactory;
    private  LRUCache filterCache = null;
 	private int cacheSize;
 	public CachedFilterBuilder(QueryBuilderFactory queryFactory, 
 			FilterBuilderFactory filterFactory,int cacheSize)
 	{
 		this.queryFactory=queryFactory;
 		this.filterFactory=filterFactory;
 		this.cacheSize=cacheSize;
 	}
 	public Filter getFilter(Element e) throws ParserException
 	{
 		Element childElement = DOMUtils.getFirstChildOrFail(e);
 		if (filterCache == null)
 		{
 			filterCache = new LRUCache(cacheSize);
 		}
 		// Test to see if child Element is a query or filter that needs to be
 		// cached
 		QueryBuilder qb = queryFactory.getQueryBuilder(childElement
 				.getNodeName());
 		Object cacheKey = null;
 		Query q = null;
 		Filter f = null;
 		if (qb != null)
 		{
 			q = qb.getQuery(childElement);
 			cacheKey = q;
 		} else
 		{
 			f = filterFactory.getFilter(childElement);
 			cacheKey = f;
 		}
 		Filter cachedFilter = null;
 		synchronized (filterCache)
 		{ // check cache
 			cachedFilter = (Filter) filterCache.get(cacheKey);
 			if (cachedFilter != null)
 			{
 				return cachedFilter; // cache hit
 			}
 		}
 		//cache miss
 		if (qb != null)
 		{
 			cachedFilter = new QueryFilter(q);
 		} else
 		{
 			cachedFilter = new CachingWrapperFilter(f);
 		}
 		synchronized (filterCache)
 		{ // update cache
 			filterCache.put(cacheKey, cachedFilter);
 		}
 		return cachedFilter;
 	}
 	static class LRUCache extends java.util.LinkedHashMap
 	{
 	    public LRUCache(int maxsize)
 	    {
 	        super(maxsize * 4 / 3 + 1, 0.75f, true);
 	        this.maxsize = maxsize;
 	    }
 	    protected int maxsize;
 	    protected boolean removeEldestEntry(Entry eldest)
 	    {
 	        return size() > maxsize;
 	    }
 	}
 }
--- a/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/CachedFilter.xml
+++ b/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/CachedFilter.xml
@ -0,0 +1,41 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <FilteredQuery>
 	<Query>
 		<BooleanQuery fieldName="contents">
 			<Clause occurs="should">
 				<TermQuery>merger</TermQuery>
 			</Clause>
 			<Clause occurs="mustnot">
 				<TermQuery >sumitomo</TermQuery>		
 			</Clause>
 		</BooleanQuery>
 	</Query>
 	<Filter>
 		<!--
 			CachedFilter elements can contain any Query or Filter. 
 			CachedFilters are cached in an LRU Cache keyed on the contained query/filter object. 
 			Using this will speed up overall performance for repeated uses of the same expensive 
 			query/filter. The sorts of queries likely to benefit from caching need not necessarily be 
 			complex - e.g. simple TermQuerys with a large DF (document frequency) can be expensive
 			on large indexes. A good example of this might be a term query on a field with only 2 possible 
 			values - "true" or "false". In a large index, querying or filtering on this field requires 
 			reading millions of document ids from disk which can more usefully be cached as a 
 			QueryFilter bitset.
 			For Queries/Filters to be cached and reused the object must implement hashcode and
 			equals methods correctly so that duplicate queries/filters can be detected in the cache.
 			The CoreParser.maxNumCachedFilters property can be used to control the size
 			of the LRU Cache established during the construction of CoreParser instances.
 			-->
 		<CachedFilter>
 			<!-- Example query to be cached for fast, repeated use -->
 			<TermQuery fieldName="contents">bank</TermQuery> 
 			<!-- Alternatively, a filter object can be cached ....
 				<RangeFilter fieldName="date" lowerTerm="19870409" upperTerm="19870412"/>
 			-->				
 		</CachedFilter>
 	</Filter>
 </FilteredQuery>
--- a/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
+++ b/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
@ -155,6 +155,11 @@ public class TestParser extends TestCase {
 			Query q=parse("NestedBooleanQuery.xml");
 			dumpResults("Nested Boolean query", q, 5);
 	}
 	public void testCachedFilterXML() throws ParserException, IOException
 	{
 			Query q=parse("CachedFilter.xml");
 			dumpResults("Cached filter", q, 5);
 	}