mirror of https://github.com/apache/lucene.git
Added QueryTemplateManager.java to aid construction of XML queries from form input by using XSL templates. A Junit test provides examples of use. This approach offers a convenient way of externalizing and changing how user input is turned into Lucene queries. Database applications often adopt similar practices by externalizing SQL in template files that can be easily changed/optimized by a DBA.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@500053 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9251a63e01
commit
c02aed3b5e
|
@ -0,0 +1,102 @@
|
||||||
|
package org.apache.lucene.xmlparser;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Enumeration;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
import javax.xml.transform.Result;
|
||||||
|
import javax.xml.transform.Source;
|
||||||
|
import javax.xml.transform.Transformer;
|
||||||
|
import javax.xml.transform.TransformerException;
|
||||||
|
import javax.xml.transform.TransformerFactory;
|
||||||
|
import javax.xml.transform.dom.DOMResult;
|
||||||
|
import javax.xml.transform.dom.DOMSource;
|
||||||
|
import javax.xml.transform.stream.StreamResult;
|
||||||
|
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
import org.w3c.dom.Element;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides utilities for turning query form input (such as from a web page or Swing gui) into
|
||||||
|
* Lucene XML queries by using XSL templates. This approach offers a convenient way of externalizing
|
||||||
|
* and changing how user input is turned into Lucene queries.
|
||||||
|
* Database applications often adopt similar practices by externalizing SQL in template files that can
|
||||||
|
* be easily changed/optimized by a DBA.
|
||||||
|
* @author Mark Harwood
|
||||||
|
*/
|
||||||
|
public class QueryTemplateManager
|
||||||
|
{
|
||||||
|
static DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance ();
|
||||||
|
static TransformerFactory tFactory = TransformerFactory.newInstance();
|
||||||
|
|
||||||
|
public static String getQueryAsXmlString(Properties formProperties, String templateName) throws SAXException, IOException, ParserConfigurationException, TransformerException
|
||||||
|
{
|
||||||
|
return getQueryAsXmlString(formProperties,
|
||||||
|
getDOMSource(QueryTemplateManager.class.getResourceAsStream(templateName)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getQueryAsXmlString(Properties formProperties, Source xslDs) throws SAXException, IOException, ParserConfigurationException, TransformerException
|
||||||
|
{
|
||||||
|
ByteArrayOutputStream baos=new ByteArrayOutputStream();
|
||||||
|
StreamResult result=new StreamResult(baos);
|
||||||
|
transformCriteria(formProperties,xslDs,result);
|
||||||
|
return baos.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Document getQueryAsDOM(Properties formProperties, String templateName) throws SAXException, IOException, ParserConfigurationException, TransformerException
|
||||||
|
{
|
||||||
|
return getQueryAsDOM(formProperties, getDOMSource(QueryTemplateManager.class.getResourceAsStream(templateName)));
|
||||||
|
}
|
||||||
|
public static Document getQueryAsDOM(Properties formProperties, InputStream xslIs) throws SAXException, IOException, ParserConfigurationException, TransformerException
|
||||||
|
{
|
||||||
|
return getQueryAsDOM(formProperties, getDOMSource(xslIs));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static Document getQueryAsDOM(Properties formProperties, Source xslDs) throws SAXException, IOException, ParserConfigurationException, TransformerException
|
||||||
|
{
|
||||||
|
DOMResult result=new DOMResult();
|
||||||
|
transformCriteria(formProperties,xslDs,result);
|
||||||
|
return (Document)result.getNode();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void transformCriteria(Properties formProperties, Source xslDs, Result result) throws SAXException, IOException, ParserConfigurationException, TransformerException
|
||||||
|
{
|
||||||
|
dbf.setNamespaceAware(true);
|
||||||
|
|
||||||
|
Transformer transformer = tFactory.newTransformer(xslDs);
|
||||||
|
//Create an XML document representing the search index document.
|
||||||
|
DocumentBuilder db = dbf.newDocumentBuilder ();
|
||||||
|
org.w3c.dom.Document doc = db.newDocument ();
|
||||||
|
Element root = doc.createElement ("Document");
|
||||||
|
doc.appendChild (root);
|
||||||
|
|
||||||
|
Enumeration keysEnum = formProperties.keys();
|
||||||
|
while(keysEnum.hasMoreElements())
|
||||||
|
{
|
||||||
|
String propName=(String) keysEnum.nextElement();
|
||||||
|
String value=formProperties.getProperty(propName);
|
||||||
|
if((value!=null)&&(value.length()>0))
|
||||||
|
{
|
||||||
|
DOMUtils.insertChild(root,propName,value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Use XSLT to to transform into an XML query string using the queryTemplate
|
||||||
|
DOMSource xml=new DOMSource(doc);
|
||||||
|
transformer.transform(xml,result);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DOMSource getDOMSource(InputStream xslIs) throws ParserConfigurationException, SAXException, IOException
|
||||||
|
{
|
||||||
|
dbf.setNamespaceAware(true);
|
||||||
|
DocumentBuilder builder = dbf.newDocumentBuilder();
|
||||||
|
org.w3c.dom.Document xslDoc = builder.parse(xslIs);
|
||||||
|
return new DOMSource(xslDoc);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,163 @@
|
||||||
|
package org.apache.lucene.xmlparser;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.StringTokenizer;
|
||||||
|
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
import javax.xml.transform.Source;
|
||||||
|
import javax.xml.transform.TransformerException;
|
||||||
|
import javax.xml.transform.dom.DOMSource;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
|
import org.apache.lucene.search.Hits;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class illustrates how form input (such as from a web page or Swing gui) can be
|
||||||
|
* turned into Lucene queries using a choice of XSL templates for different styles of queries.
|
||||||
|
* @author maharwood
|
||||||
|
*/
|
||||||
|
public class TestQueryTemplateManager extends TestCase {
|
||||||
|
|
||||||
|
CoreParser builder;
|
||||||
|
Analyzer analyzer=new StandardAnalyzer();
|
||||||
|
HashMap templates=new HashMap();
|
||||||
|
private IndexSearcher searcher;
|
||||||
|
|
||||||
|
//A collection of documents' field values for use in our tests
|
||||||
|
String docFieldValues []=
|
||||||
|
{
|
||||||
|
"artist=Jeff Buckley \talbum=Grace \treleaseDate=1999 \tgenre=rock",
|
||||||
|
"artist=Fugazi \talbum=Repeater \treleaseDate=1990 \tgenre=alternative",
|
||||||
|
"artist=Fugazi \talbum=Red Medicine \treleaseDate=1995 \tgenre=alternative",
|
||||||
|
"artist=Peeping Tom \talbum=Peeping Tom \treleaseDate=2006 \tgenre=rock",
|
||||||
|
"artist=Red Snapper \talbum=Prince Blimey \treleaseDate=1996 \tgenre=electronic"
|
||||||
|
};
|
||||||
|
|
||||||
|
//A collection of example queries, consisting of name/value pairs representing form content plus
|
||||||
|
// a choice of query style template to use in the test, with expected number of hits
|
||||||
|
String queryForms[]=
|
||||||
|
{
|
||||||
|
"artist=Fugazi \texpectedMatches=2 \ttemplate=albumBooleanQuery.xsl",
|
||||||
|
"artist=Fugazi \treleaseDate=1990 \texpectedMatches=1 \ttemplate=albumBooleanQuery.xsl",
|
||||||
|
"artist=Buckley \tgenre=rock \texpectedMatches=1 \ttemplate=albumFilteredQuery.xsl",
|
||||||
|
"artist=Buckley \tgenre=electronic \texpectedMatches=0 \ttemplate=albumFilteredQuery.xsl",
|
||||||
|
"queryString=artist:buckly~ NOT genre:electronic \texpectedMatches=1 \ttemplate=albumLuceneClassicQuery.xsl"
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
public void testFormTransforms() throws SAXException, IOException, ParserConfigurationException, TransformerException, ParserException
|
||||||
|
{
|
||||||
|
//Run all of our test queries
|
||||||
|
for (int i = 0; i < queryForms.length; i++)
|
||||||
|
{
|
||||||
|
Properties queryFormProperties=getPropsFromString(queryForms[i]);
|
||||||
|
|
||||||
|
//Get the required query XSL template for this test
|
||||||
|
Source template=getTemplate(queryFormProperties.getProperty("template"));
|
||||||
|
|
||||||
|
//Transform the queryFormProperties into a Lucene XML query
|
||||||
|
Document doc=QueryTemplateManager.getQueryAsDOM(queryFormProperties,template);
|
||||||
|
|
||||||
|
//Parse the XML query using the XML parser
|
||||||
|
Query q=builder.getQuery(doc.getDocumentElement());
|
||||||
|
|
||||||
|
//Run the query
|
||||||
|
Hits h=searcher.search(q);
|
||||||
|
|
||||||
|
//Check we have the expected number of results
|
||||||
|
int expectedHits=Integer.parseInt(queryFormProperties.getProperty("expectedMatches"));
|
||||||
|
assertEquals("Number of results should match for query "+queryForms[i],expectedHits,h.length());
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Source getTemplate(String templateName) throws ParserConfigurationException, SAXException, IOException
|
||||||
|
{
|
||||||
|
Source result=(Source) templates.get(templateName);
|
||||||
|
if(result==null)
|
||||||
|
{
|
||||||
|
//Not yet loaded - load the stylesheet
|
||||||
|
result=QueryTemplateManager.getDOMSource(getClass().getResourceAsStream(templateName));
|
||||||
|
templates.put(templateName,result);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Helper method to construct Lucene query forms used in our test
|
||||||
|
Properties getPropsFromString(String nameValuePairs)
|
||||||
|
{
|
||||||
|
Properties result=new Properties();
|
||||||
|
StringTokenizer st=new StringTokenizer(nameValuePairs,"\t=");
|
||||||
|
while(st.hasMoreTokens())
|
||||||
|
{
|
||||||
|
String name=st.nextToken().trim();
|
||||||
|
if(st.hasMoreTokens())
|
||||||
|
{
|
||||||
|
String value=st.nextToken().trim();
|
||||||
|
result.setProperty(name,value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Helper method to construct Lucene documents used in our tests
|
||||||
|
org.apache.lucene.document.Document getDocumentFromString(String nameValuePairs)
|
||||||
|
{
|
||||||
|
org.apache.lucene.document.Document result=new org.apache.lucene.document.Document();
|
||||||
|
StringTokenizer st=new StringTokenizer(nameValuePairs,"\t=");
|
||||||
|
while(st.hasMoreTokens())
|
||||||
|
{
|
||||||
|
String name=st.nextToken().trim();
|
||||||
|
if(st.hasMoreTokens())
|
||||||
|
{
|
||||||
|
String value=st.nextToken().trim();
|
||||||
|
result.add(new Field(name,value,Field.Store.YES,Field.Index.TOKENIZED));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @see TestCase#setUp()
|
||||||
|
*/
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
|
||||||
|
|
||||||
|
//Create an index
|
||||||
|
RAMDirectory dir=new RAMDirectory();
|
||||||
|
IndexWriter w=new IndexWriter(dir,analyzer,true);
|
||||||
|
for (int i = 0; i < docFieldValues.length; i++)
|
||||||
|
{
|
||||||
|
w.addDocument(getDocumentFromString(docFieldValues[i]));
|
||||||
|
}
|
||||||
|
w.optimize();
|
||||||
|
w.close();
|
||||||
|
searcher=new IndexSearcher(dir);
|
||||||
|
|
||||||
|
//initialize the parser
|
||||||
|
builder=new CorePlusExtensionsParser(analyzer,new QueryParser("artist", analyzer));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected void tearDown() throws Exception {
|
||||||
|
searcher.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||||
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||||
|
<xsl:template match="/Document">
|
||||||
|
<!--This template ANDs all fields together. Within a single field all terms are ORed.
|
||||||
|
The query fields are fed directly through an analyzer and so do not need to adhere to
|
||||||
|
traditional Lucene query syntax.
|
||||||
|
-->
|
||||||
|
<BooleanQuery>
|
||||||
|
<xsl:if test="count(artist)>0">
|
||||||
|
<Clause occurs="must">
|
||||||
|
<TermsQuery fieldName="artist"><xsl:value-of select="artist"/></TermsQuery>
|
||||||
|
</Clause>
|
||||||
|
</xsl:if>
|
||||||
|
<xsl:if test="count(album)>0">
|
||||||
|
<Clause occurs="must">
|
||||||
|
<TermsQuery fieldName="album"><xsl:value-of select="album"/></TermsQuery>
|
||||||
|
</Clause>
|
||||||
|
</xsl:if>
|
||||||
|
<xsl:if test="count(genre)>0">
|
||||||
|
<Clause occurs="must">
|
||||||
|
<TermsQuery fieldName="genre"><xsl:value-of select="genre"/></TermsQuery>
|
||||||
|
</Clause>
|
||||||
|
</xsl:if>
|
||||||
|
<xsl:if test="count(releaseDate)>0">
|
||||||
|
<Clause occurs="must">
|
||||||
|
<TermsQuery fieldName="releaseDate"><xsl:value-of select="releaseDate"/></TermsQuery>
|
||||||
|
</Clause>
|
||||||
|
</xsl:if>
|
||||||
|
</BooleanQuery>
|
||||||
|
|
||||||
|
</xsl:template>
|
||||||
|
</xsl:stylesheet>
|
|
@ -0,0 +1,38 @@
|
||||||
|
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||||
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||||
|
<xsl:template match="/Document">
|
||||||
|
<!-- This template uses an efficient, cached filter for the "genre" field".
|
||||||
|
Other query fields are fed directly through an analyzer and so do not need to adhere to
|
||||||
|
traditional Lucene query syntax. Terms within a field are ORed while different fields are ANDed
|
||||||
|
-->
|
||||||
|
<FilteredQuery>
|
||||||
|
<Query>
|
||||||
|
<BooleanQuery>
|
||||||
|
<xsl:if test="count(artist)>0">
|
||||||
|
<Clause occurs="must">
|
||||||
|
<TermsQuery fieldName="artist"><xsl:value-of select="artist"/></TermsQuery>
|
||||||
|
</Clause>
|
||||||
|
</xsl:if>
|
||||||
|
<xsl:if test="count(album)>0">
|
||||||
|
<Clause occurs="must">
|
||||||
|
<TermsQuery fieldName="album"><xsl:value-of select="album"/></TermsQuery>
|
||||||
|
</Clause>
|
||||||
|
</xsl:if>
|
||||||
|
<xsl:if test="count(releaseDate)>0">
|
||||||
|
<Clause occurs="must">
|
||||||
|
<TermsQuery fieldName="releaseDate"><xsl:value-of select="releaseDate"/></TermsQuery>
|
||||||
|
</Clause>
|
||||||
|
</xsl:if>
|
||||||
|
</BooleanQuery>
|
||||||
|
</Query>
|
||||||
|
<Filter>
|
||||||
|
<CachedFilter>
|
||||||
|
<!-- Example filter to be cached for fast, repeated use -->
|
||||||
|
<TermsFilter fieldName="genre">
|
||||||
|
<xsl:value-of select="genre"/>
|
||||||
|
</TermsFilter>
|
||||||
|
</CachedFilter>
|
||||||
|
</Filter>
|
||||||
|
</FilteredQuery>
|
||||||
|
</xsl:template>
|
||||||
|
</xsl:stylesheet>
|
|
@ -0,0 +1,13 @@
|
||||||
|
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||||
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||||
|
<xsl:template match="/Document">
|
||||||
|
<!-- This template is designed to work with a google-like search form - one edit box and
|
||||||
|
uses the traditional Lucene query syntax
|
||||||
|
-->
|
||||||
|
<BooleanQuery>
|
||||||
|
<Clause occurs="must">
|
||||||
|
<UserQuery><xsl:value-of select="queryString"/></UserQuery>
|
||||||
|
</Clause>
|
||||||
|
</BooleanQuery>
|
||||||
|
</xsl:template>
|
||||||
|
</xsl:stylesheet>
|
Loading…
Reference in New Issue