Added QueryTemplateManager.java to aid construction of XML queries from form input by using XSL templates. A Junit test provides examples of use. This approach offers a convenient way of externalizing and changing how user input is turned into Lucene queries. Database applications often adopt similar practices by externalizing SQL in template files that can be easily changed/optimized by a DBA.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@500053 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Harwood 2007-01-25 23:31:02 +00:00
parent 9251a63e01
commit c02aed3b5e
5 changed files with 348 additions and 0 deletions

View File

@ -0,0 +1,102 @@
package org.apache.lucene.xmlparser;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.Properties;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
/**
* Provides utilities for turning query form input (such as from a web page or Swing gui) into
* Lucene XML queries by using XSL templates. This approach offers a convenient way of externalizing
* and changing how user input is turned into Lucene queries.
* Database applications often adopt similar practices by externalizing SQL in template files that can
* be easily changed/optimized by a DBA.
* @author Mark Harwood
*/
public class QueryTemplateManager
{
static DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance ();
static TransformerFactory tFactory = TransformerFactory.newInstance();
public static String getQueryAsXmlString(Properties formProperties, String templateName) throws SAXException, IOException, ParserConfigurationException, TransformerException
{
return getQueryAsXmlString(formProperties,
getDOMSource(QueryTemplateManager.class.getResourceAsStream(templateName)));
}
public static String getQueryAsXmlString(Properties formProperties, Source xslDs) throws SAXException, IOException, ParserConfigurationException, TransformerException
{
ByteArrayOutputStream baos=new ByteArrayOutputStream();
StreamResult result=new StreamResult(baos);
transformCriteria(formProperties,xslDs,result);
return baos.toString();
}
public static Document getQueryAsDOM(Properties formProperties, String templateName) throws SAXException, IOException, ParserConfigurationException, TransformerException
{
return getQueryAsDOM(formProperties, getDOMSource(QueryTemplateManager.class.getResourceAsStream(templateName)));
}
public static Document getQueryAsDOM(Properties formProperties, InputStream xslIs) throws SAXException, IOException, ParserConfigurationException, TransformerException
{
return getQueryAsDOM(formProperties, getDOMSource(xslIs));
}
public static Document getQueryAsDOM(Properties formProperties, Source xslDs) throws SAXException, IOException, ParserConfigurationException, TransformerException
{
DOMResult result=new DOMResult();
transformCriteria(formProperties,xslDs,result);
return (Document)result.getNode();
}
public static void transformCriteria(Properties formProperties, Source xslDs, Result result) throws SAXException, IOException, ParserConfigurationException, TransformerException
{
dbf.setNamespaceAware(true);
Transformer transformer = tFactory.newTransformer(xslDs);
//Create an XML document representing the search index document.
DocumentBuilder db = dbf.newDocumentBuilder ();
org.w3c.dom.Document doc = db.newDocument ();
Element root = doc.createElement ("Document");
doc.appendChild (root);
Enumeration keysEnum = formProperties.keys();
while(keysEnum.hasMoreElements())
{
String propName=(String) keysEnum.nextElement();
String value=formProperties.getProperty(propName);
if((value!=null)&&(value.length()>0))
{
DOMUtils.insertChild(root,propName,value);
}
}
//Use XSLT to to transform into an XML query string using the queryTemplate
DOMSource xml=new DOMSource(doc);
transformer.transform(xml,result);
}
public static DOMSource getDOMSource(InputStream xslIs) throws ParserConfigurationException, SAXException, IOException
{
dbf.setNamespaceAware(true);
DocumentBuilder builder = dbf.newDocumentBuilder();
org.w3c.dom.Document xslDoc = builder.parse(xslIs);
return new DOMSource(xslDoc);
}
}

View File

@ -0,0 +1,163 @@
package org.apache.lucene.xmlparser;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Properties;
import java.util.StringTokenizer;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerException;
import javax.xml.transform.dom.DOMSource;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.RAMDirectory;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
/**
* This class illustrates how form input (such as from a web page or Swing gui) can be
* turned into Lucene queries using a choice of XSL templates for different styles of queries.
* @author maharwood
*/
public class TestQueryTemplateManager extends TestCase {
CoreParser builder;
Analyzer analyzer=new StandardAnalyzer();
HashMap templates=new HashMap();
private IndexSearcher searcher;
//A collection of documents' field values for use in our tests
String docFieldValues []=
{
"artist=Jeff Buckley \talbum=Grace \treleaseDate=1999 \tgenre=rock",
"artist=Fugazi \talbum=Repeater \treleaseDate=1990 \tgenre=alternative",
"artist=Fugazi \talbum=Red Medicine \treleaseDate=1995 \tgenre=alternative",
"artist=Peeping Tom \talbum=Peeping Tom \treleaseDate=2006 \tgenre=rock",
"artist=Red Snapper \talbum=Prince Blimey \treleaseDate=1996 \tgenre=electronic"
};
//A collection of example queries, consisting of name/value pairs representing form content plus
// a choice of query style template to use in the test, with expected number of hits
String queryForms[]=
{
"artist=Fugazi \texpectedMatches=2 \ttemplate=albumBooleanQuery.xsl",
"artist=Fugazi \treleaseDate=1990 \texpectedMatches=1 \ttemplate=albumBooleanQuery.xsl",
"artist=Buckley \tgenre=rock \texpectedMatches=1 \ttemplate=albumFilteredQuery.xsl",
"artist=Buckley \tgenre=electronic \texpectedMatches=0 \ttemplate=albumFilteredQuery.xsl",
"queryString=artist:buckly~ NOT genre:electronic \texpectedMatches=1 \ttemplate=albumLuceneClassicQuery.xsl"
};
public void testFormTransforms() throws SAXException, IOException, ParserConfigurationException, TransformerException, ParserException
{
//Run all of our test queries
for (int i = 0; i < queryForms.length; i++)
{
Properties queryFormProperties=getPropsFromString(queryForms[i]);
//Get the required query XSL template for this test
Source template=getTemplate(queryFormProperties.getProperty("template"));
//Transform the queryFormProperties into a Lucene XML query
Document doc=QueryTemplateManager.getQueryAsDOM(queryFormProperties,template);
//Parse the XML query using the XML parser
Query q=builder.getQuery(doc.getDocumentElement());
//Run the query
Hits h=searcher.search(q);
//Check we have the expected number of results
int expectedHits=Integer.parseInt(queryFormProperties.getProperty("expectedMatches"));
assertEquals("Number of results should match for query "+queryForms[i],expectedHits,h.length());
}
}
private Source getTemplate(String templateName) throws ParserConfigurationException, SAXException, IOException
{
Source result=(Source) templates.get(templateName);
if(result==null)
{
//Not yet loaded - load the stylesheet
result=QueryTemplateManager.getDOMSource(getClass().getResourceAsStream(templateName));
templates.put(templateName,result);
}
return result;
}
//Helper method to construct Lucene query forms used in our test
Properties getPropsFromString(String nameValuePairs)
{
Properties result=new Properties();
StringTokenizer st=new StringTokenizer(nameValuePairs,"\t=");
while(st.hasMoreTokens())
{
String name=st.nextToken().trim();
if(st.hasMoreTokens())
{
String value=st.nextToken().trim();
result.setProperty(name,value);
}
}
return result;
}
//Helper method to construct Lucene documents used in our tests
org.apache.lucene.document.Document getDocumentFromString(String nameValuePairs)
{
org.apache.lucene.document.Document result=new org.apache.lucene.document.Document();
StringTokenizer st=new StringTokenizer(nameValuePairs,"\t=");
while(st.hasMoreTokens())
{
String name=st.nextToken().trim();
if(st.hasMoreTokens())
{
String value=st.nextToken().trim();
result.add(new Field(name,value,Field.Store.YES,Field.Index.TOKENIZED));
}
}
return result;
}
/*
* @see TestCase#setUp()
*/
protected void setUp() throws Exception {
super.setUp();
//Create an index
RAMDirectory dir=new RAMDirectory();
IndexWriter w=new IndexWriter(dir,analyzer,true);
for (int i = 0; i < docFieldValues.length; i++)
{
w.addDocument(getDocumentFromString(docFieldValues[i]));
}
w.optimize();
w.close();
searcher=new IndexSearcher(dir);
//initialize the parser
builder=new CorePlusExtensionsParser(analyzer,new QueryParser("artist", analyzer));
}
protected void tearDown() throws Exception {
searcher.close();
}
}

View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/Document">
<!--This template ANDs all fields together. Within a single field all terms are ORed.
The query fields are fed directly through an analyzer and so do not need to adhere to
traditional Lucene query syntax.
-->
<BooleanQuery>
<xsl:if test="count(artist)>0">
<Clause occurs="must">
<TermsQuery fieldName="artist"><xsl:value-of select="artist"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(album)>0">
<Clause occurs="must">
<TermsQuery fieldName="album"><xsl:value-of select="album"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(genre)>0">
<Clause occurs="must">
<TermsQuery fieldName="genre"><xsl:value-of select="genre"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(releaseDate)>0">
<Clause occurs="must">
<TermsQuery fieldName="releaseDate"><xsl:value-of select="releaseDate"/></TermsQuery>
</Clause>
</xsl:if>
</BooleanQuery>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,38 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/Document">
<!-- This template uses an efficient, cached filter for the "genre" field".
Other query fields are fed directly through an analyzer and so do not need to adhere to
traditional Lucene query syntax. Terms within a field are ORed while different fields are ANDed
-->
<FilteredQuery>
<Query>
<BooleanQuery>
<xsl:if test="count(artist)>0">
<Clause occurs="must">
<TermsQuery fieldName="artist"><xsl:value-of select="artist"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(album)>0">
<Clause occurs="must">
<TermsQuery fieldName="album"><xsl:value-of select="album"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(releaseDate)>0">
<Clause occurs="must">
<TermsQuery fieldName="releaseDate"><xsl:value-of select="releaseDate"/></TermsQuery>
</Clause>
</xsl:if>
</BooleanQuery>
</Query>
<Filter>
<CachedFilter>
<!-- Example filter to be cached for fast, repeated use -->
<TermsFilter fieldName="genre">
<xsl:value-of select="genre"/>
</TermsFilter>
</CachedFilter>
</Filter>
</FilteredQuery>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/Document">
<!-- This template is designed to work with a google-like search form - one edit box and
uses the traditional Lucene query syntax
-->
<BooleanQuery>
<Clause occurs="must">
<UserQuery><xsl:value-of select="queryString"/></UserQuery>
</Clause>
</BooleanQuery>
</xsl:template>
</xsl:stylesheet>