LUCENE-8291: Remove QueryTemplateManager utility class from XML queryparser

This commit is contained in:
Uwe Schindler 2018-05-15 23:34:59 +02:00
parent f959777995
commit 11c6a7ad88
6 changed files with 6 additions and 489 deletions

View File

@ -230,6 +230,12 @@ Other
* LUCENE-8122, LUCENE-8175: Upgrade analysis/icu to ICU 61.1. * LUCENE-8122, LUCENE-8175: Upgrade analysis/icu to ICU 61.1.
(Robert Muir, Adrien Grand, Uwe Schindler) (Robert Muir, Adrien Grand, Uwe Schindler)
* LUCENE-8291: Remove QueryTemplateManager utility class from XML queryparser.
This class is just a general XML transforming tool (using property files and
XSLT) and has nothing to do with query parsing. It can easily be implemented
using more sophisticated libraries or using XSL transformers from the JDK.
(Uwe Schindler)
Build Build
* LUCENE-7935: Publish .sha512 hash files with the release artifacts and stop * LUCENE-7935: Publish .sha512 hash files with the release artifacts and stop

View File

@ -1,202 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.xml;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.StringWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Properties;
/**
* Provides utilities for turning query form input (such as from a web page or Swing gui) into
* Lucene XML queries by using XSL templates. This approach offers a convenient way of externalizing
* and changing how user input is turned into Lucene queries.
* Database applications often adopt similar practices by externalizing SQL in template files that can
* be easily changed/optimized by a DBA.
* The static methods can be used on their own or by creating an instance of this class you can store and
* re-use compiled stylesheets for fast use (e.g. in a server environment)
*/
public class QueryTemplateManager {
static final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
static final TransformerFactory tFactory = TransformerFactory.newInstance();
HashMap<String, Templates> compiledTemplatesCache = new HashMap<>();
Templates defaultCompiledTemplates = null;
public QueryTemplateManager() {
}
public QueryTemplateManager(InputStream xslIs)
throws TransformerConfigurationException, ParserConfigurationException, SAXException, IOException {
addDefaultQueryTemplate(xslIs);
}
public void addDefaultQueryTemplate(InputStream xslIs)
throws TransformerConfigurationException, ParserConfigurationException, SAXException, IOException {
defaultCompiledTemplates = getTemplates(xslIs);
}
public void addQueryTemplate(String name, InputStream xslIs)
throws TransformerConfigurationException, ParserConfigurationException, SAXException, IOException {
compiledTemplatesCache.put(name, getTemplates(xslIs));
}
public String getQueryAsXmlString(Properties formProperties, String queryTemplateName)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
Templates ts = compiledTemplatesCache.get(queryTemplateName);
return getQueryAsXmlString(formProperties, ts);
}
public Document getQueryAsDOM(Properties formProperties, String queryTemplateName)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
Templates ts = compiledTemplatesCache.get(queryTemplateName);
return getQueryAsDOM(formProperties, ts);
}
public String getQueryAsXmlString(Properties formProperties)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
return getQueryAsXmlString(formProperties, defaultCompiledTemplates);
}
public Document getQueryAsDOM(Properties formProperties)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
return getQueryAsDOM(formProperties, defaultCompiledTemplates);
}
/**
* Fast means of constructing query using a precompiled stylesheet
*/
public static String getQueryAsXmlString(Properties formProperties, Templates template)
throws ParserConfigurationException, TransformerException {
// TODO: Suppress XML header with encoding (as Strings have no encoding)
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
transformCriteria(formProperties, template, result);
return writer.toString();
}
/**
* Slow means of constructing query parsing a stylesheet from an input stream
*/
public static String getQueryAsXmlString(Properties formProperties, InputStream xslIs)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
// TODO: Suppress XML header with encoding (as Strings have no encoding)
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
transformCriteria(formProperties, xslIs, result);
return writer.toString();
}
/**
* Fast means of constructing query using a cached,precompiled stylesheet
*/
public static Document getQueryAsDOM(Properties formProperties, Templates template)
throws ParserConfigurationException, TransformerException {
DOMResult result = new DOMResult();
transformCriteria(formProperties, template, result);
return (Document) result.getNode();
}
/**
* Slow means of constructing query - parses stylesheet from input stream
*/
public static Document getQueryAsDOM(Properties formProperties, InputStream xslIs)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
DOMResult result = new DOMResult();
transformCriteria(formProperties, xslIs, result);
return (Document) result.getNode();
}
/**
* Slower transformation using an uncompiled stylesheet (suitable for development environment)
*/
public static void transformCriteria(Properties formProperties, InputStream xslIs, Result result)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
dbf.setNamespaceAware(true);
DocumentBuilder builder = dbf.newDocumentBuilder();
org.w3c.dom.Document xslDoc = builder.parse(xslIs);
DOMSource ds = new DOMSource(xslDoc);
Transformer transformer = null;
synchronized (tFactory) {
transformer = tFactory.newTransformer(ds);
}
transformCriteria(formProperties, transformer, result);
}
/**
* Fast transformation using a pre-compiled stylesheet (suitable for production environments)
*/
public static void transformCriteria(Properties formProperties, Templates template, Result result)
throws ParserConfigurationException, TransformerException {
transformCriteria(formProperties, template.newTransformer(), result);
}
public static void transformCriteria(Properties formProperties, Transformer transformer, Result result)
throws ParserConfigurationException, TransformerException {
dbf.setNamespaceAware(true);
//Create an XML document representing the search index document.
DocumentBuilder db = dbf.newDocumentBuilder();
org.w3c.dom.Document doc = db.newDocument();
Element root = doc.createElement("Document");
doc.appendChild(root);
Enumeration<?> keysEnum = formProperties.propertyNames();
while (keysEnum.hasMoreElements()) {
String propName = keysEnum.nextElement().toString();
String value = formProperties.getProperty(propName);
if ((value != null) && (value.length() > 0)) {
DOMUtils.insertChild(root, propName, value);
}
}
//Use XSLT to to transform into an XML query string using the queryTemplate
DOMSource xml = new DOMSource(doc);
transformer.transform(xml, result);
}
/**
* Parses a query stylesheet for repeated use
*/
public static Templates getTemplates(InputStream xslIs)
throws ParserConfigurationException, SAXException, IOException, TransformerConfigurationException {
dbf.setNamespaceAware(true);
DocumentBuilder builder = dbf.newDocumentBuilder();
org.w3c.dom.Document xslDoc = builder.parse(xslIs);
DOMSource ds = new DOMSource(xslDoc);
return tFactory.newTemplates(ds);
}
}

View File

@ -1,163 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.xml;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.LuceneTestCase;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import java.io.IOException;
import java.util.Properties;
import java.util.StringTokenizer;
/**
* This class illustrates how form input (such as from a web page or Swing gui) can be
* turned into Lucene queries using a choice of XSL templates for different styles of queries.
*/
public class TestQueryTemplateManager extends LuceneTestCase {
private CoreParser builder;
private Analyzer analyzer;
private IndexSearcher searcher;
private IndexReader reader;
private Directory dir;
//A collection of documents' field values for use in our tests
String docFieldValues[] =
{
"artist=Jeff Buckley \talbum=Grace \treleaseDate=1999 \tgenre=rock",
"artist=Fugazi \talbum=Repeater \treleaseDate=1990 \tgenre=alternative",
"artist=Fugazi \talbum=Red Medicine \treleaseDate=1995 \tgenre=alternative",
"artist=Peeping Tom \talbum=Peeping Tom \treleaseDate=2006 \tgenre=rock",
"artist=Red Snapper \talbum=Prince Blimey \treleaseDate=1996 \tgenre=electronic"
};
//A collection of example queries, consisting of name/value pairs representing form content plus
// a choice of query style template to use in the test, with expected number of hits
String queryForms[] =
{
"artist=Fugazi \texpectedMatches=2 \ttemplate=albumBooleanQuery",
"artist=Fugazi \treleaseDate=1990 \texpectedMatches=1 \ttemplate=albumBooleanQuery",
"artist=Buckley \tgenre=rock \texpectedMatches=1 \ttemplate=albumFilteredQuery",
"artist=Buckley \tgenre=electronic \texpectedMatches=0 \ttemplate=albumFilteredQuery",
"queryString=artist:buckly~ NOT genre:electronic \texpectedMatches=1 \ttemplate=albumLuceneClassicQuery"
};
public void testFormTransforms() throws SAXException, IOException, ParserConfigurationException, TransformerException, ParserException {
assumeFalse("test temporarily disabled on J9, see https://issues.apache.org/jira/browse/LUCENE-6556",
Constants.JAVA_VENDOR.startsWith("IBM"));
//Cache all the query templates we will be referring to.
QueryTemplateManager qtm = new QueryTemplateManager();
qtm.addQueryTemplate("albumBooleanQuery", getClass().getResourceAsStream("albumBooleanQuery.xsl"));
qtm.addQueryTemplate("albumFilteredQuery", getClass().getResourceAsStream("albumFilteredQuery.xsl"));
qtm.addQueryTemplate("albumLuceneClassicQuery", getClass().getResourceAsStream("albumLuceneClassicQuery.xsl"));
//Run all of our test queries
for (String queryForm : queryForms) {
Properties queryFormProperties = getPropsFromString(queryForm);
//Get the required query XSL template for this test
// Templates template=getTemplate(queryFormProperties.getProperty("template"));
//Transform the queryFormProperties into a Lucene XML query
Document doc = qtm.getQueryAsDOM(queryFormProperties, queryFormProperties.getProperty("template"));
//Parse the XML query using the XML parser
Query q = builder.getQuery(doc.getDocumentElement());
//Run the query
long h = searcher.search(q, 1000).totalHits;
//Check we have the expected number of results
int expectedHits = Integer.parseInt(queryFormProperties.getProperty("expectedMatches"));
assertEquals("Number of results should match for query " + queryForm, expectedHits, h);
}
}
//Helper method to construct Lucene query forms used in our test
Properties getPropsFromString(String nameValuePairs) {
Properties result = new Properties();
StringTokenizer st = new StringTokenizer(nameValuePairs, "\t=");
while (st.hasMoreTokens()) {
String name = st.nextToken().trim();
if (st.hasMoreTokens()) {
String value = st.nextToken().trim();
result.setProperty(name, value);
}
}
return result;
}
//Helper method to construct Lucene documents used in our tests
org.apache.lucene.document.Document getDocumentFromString(String nameValuePairs) {
org.apache.lucene.document.Document result = new org.apache.lucene.document.Document();
StringTokenizer st = new StringTokenizer(nameValuePairs, "\t=");
while (st.hasMoreTokens()) {
String name = st.nextToken().trim();
if (st.hasMoreTokens()) {
String value = st.nextToken().trim();
result.add(newTextField(name, value, Field.Store.YES));
}
}
return result;
}
/*
* @see TestCase#setUp()
*/
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new MockAnalyzer(random());
//Create an index
dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
for (String docFieldValue : docFieldValues) {
w.addDocument(getDocumentFromString(docFieldValue));
}
w.forceMerge(1);
w.close();
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
//initialize the parser
builder = new CorePlusExtensionsParser("artist", analyzer);
}
@Override
public void tearDown() throws Exception {
reader.close();
dir.close();
super.tearDown();
}
}

View File

@ -1,48 +0,0 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/Document">
<!--This template ANDs all fields together. Within a single field all terms are ORed.
The query fields are fed directly through an analyzer and so do not need to adhere to
traditional Lucene query syntax.
-->
<BooleanQuery>
<xsl:if test="count(artist)>0">
<Clause occurs="must">
<TermsQuery fieldName="artist"><xsl:value-of select="artist"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(album)>0">
<Clause occurs="must">
<TermsQuery fieldName="album"><xsl:value-of select="album"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(genre)>0">
<Clause occurs="must">
<TermsQuery fieldName="genre"><xsl:value-of select="genre"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(releaseDate)>0">
<Clause occurs="must">
<TermsQuery fieldName="releaseDate"><xsl:value-of select="releaseDate"/></TermsQuery>
</Clause>
</xsl:if>
</BooleanQuery>
</xsl:template>
</xsl:stylesheet>

View File

@ -1,47 +0,0 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/Document">
<!-- This template uses an efficient, cached filter for the "genre" field".
Other query fields are fed directly through an analyzer and so do not need to adhere to
traditional Lucene query syntax. Terms within a field are ORed while different fields are ANDed
-->
<BooleanQuery>
<xsl:if test="count(artist)>0">
<Clause occurs="must">
<TermsQuery fieldName="artist"><xsl:value-of select="artist"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(album)>0">
<Clause occurs="must">
<TermsQuery fieldName="album"><xsl:value-of select="album"/></TermsQuery>
</Clause>
</xsl:if>
<xsl:if test="count(releaseDate)>0">
<Clause occurs="must">
<TermsQuery fieldName="releaseDate"><xsl:value-of select="releaseDate"/></TermsQuery>
</Clause>
</xsl:if>
<Clause occurs="filter">
<TermsQuery fieldName="genre">
<xsl:value-of select="genre"/>
</TermsQuery>
</Clause>
</BooleanQuery>
</xsl:template>
</xsl:stylesheet>

View File

@ -1,29 +0,0 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/Document">
<!-- This template is designed to work with a google-like search form - one edit box and
uses the traditional Lucene query syntax
-->
<BooleanQuery>
<Clause occurs="must">
<UserQuery><xsl:value-of select="queryString"/></UserQuery>
</Clause>
</BooleanQuery>
</xsl:template>
</xsl:stylesheet>