SOLR-2630: Added new XsltUpdateRequestHandler that works like XmlUpdateRequestHandler but allows to transform the POSTed XML document using XSLT

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1141999 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-07-01 17:31:04 +00:00
parent a9ca8d5191
commit 592b402ab5
8 changed files with 376 additions and 1 deletions

View File

@ -282,6 +282,11 @@ New Features
checked only for documents that match the main query and all other filters. checked only for documents that match the main query and all other filters.
The "frange" query now implements the PostFilter interface. (yonik) The "frange" query now implements the PostFilter interface. (yonik)
* SOLR-2630: Added new XsltUpdateRequestHandler that works like
XmlUpdateRequestHandler but allows to transform the POSTed XML document
using XSLT. This allows to POST arbitrary XML documents to the update
handler, as long as you also provide a XSL to transform them to a valid
Solr input document. (Upayavira, Uwe Schindler)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -911,6 +911,12 @@
</lst> </lst>
</requestHandler> </requestHandler>
<!-- XSLT Update Request Handler
Transforms incoming XML with stylesheet identified by tr=
-->
<requestHandler name="/update/xslt"
startup="lazy"
class="solr.XsltUpdateRequestHandler"/>
<!-- Field Analysis Request Handler <!-- Field Analysis Request Handler

View File

@ -48,7 +48,7 @@ import java.io.IOException;
**/ **/
class XMLLoader extends ContentStreamLoader { class XMLLoader extends ContentStreamLoader {
protected UpdateRequestProcessor processor; protected UpdateRequestProcessor processor;
private XMLInputFactory inputFactory; protected XMLInputFactory inputFactory;
public XMLLoader(UpdateRequestProcessor processor, XMLInputFactory inputFactory) { public XMLLoader(UpdateRequestProcessor processor, XMLInputFactory inputFactory) {
this.processor = processor; this.processor = processor;

View File

@ -0,0 +1,100 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.XMLErrorLogger;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.xml.stream.XMLInputFactory;
/**
* Add documents to solr using the STAX XML parser, transforming it with XSLT first
*/
public class XsltUpdateRequestHandler extends ContentStreamHandlerBase {
public static Logger log = LoggerFactory.getLogger(XsltUpdateRequestHandler.class);
public static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
public static final String UPDATE_PROCESSOR = "update.processor";
public static final int XSLT_CACHE_DEFAULT = 60;
private static final String XSLT_CACHE_PARAM = "xsltCacheLifetimeSeconds";
XMLInputFactory inputFactory;
private Integer xsltCacheLifetimeSeconds;
@Override
public void init(NamedList args) {
super.init(args);
inputFactory = XMLInputFactory.newInstance();
try {
// The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
// XMLInputFactory, as that implementation tries to cache and reuse the
// XMLStreamReader. Setting the parser-specific "reuse-instance" property to false
// prevents this.
// All other known open-source stax parsers (and the bea ref impl)
// have thread-safe factories.
inputFactory.setProperty("reuse-instance", Boolean.FALSE);
}
catch (IllegalArgumentException ex) {
// Other implementations will likely throw this exception since "reuse-instance"
// isimplementation specific.
log.debug("Unable to set the 'reuse-instance' property for the input chain: " + inputFactory);
}
inputFactory.setXMLReporter(xmllog);
final SolrParams p = SolrParams.toSolrParams(args);
this.xsltCacheLifetimeSeconds = p.getInt(XSLT_CACHE_PARAM,XSLT_CACHE_DEFAULT);
log.info("xsltCacheLifetimeSeconds=" + xsltCacheLifetimeSeconds);
}
@Override
protected ContentStreamLoader newLoader(SolrQueryRequest req, UpdateRequestProcessor processor) {
return new XsltXMLLoader(processor, inputFactory, xsltCacheLifetimeSeconds);
}
//////////////////////// SolrInfoMBeans methods //////////////////////
@Override
public String getDescription() {
return "Add documents with XML, transforming with XSLT first";
}
@Override
public String getVersion() {
return "$Revision$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getSource() {
return "$URL$";
}
}

View File

@ -0,0 +1,119 @@
package org.apache.solr.handler;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.util.xslt.TransformerProvider;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.XMLErrorLogger;
import org.apache.solr.core.SolrConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.io.IOUtils;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXSource;
import org.xml.sax.InputSource;
import java.io.InputStream;
import java.io.IOException;
import java.util.Map;
/**
* Extends the XMLLoader by applying an XSLT transform before the
* XMLLoader actually loads the XML
*
**/
class XsltXMLLoader extends XMLLoader {
public static final String TRANSFORM_PARAM = "tr";
public static final String CONTEXT_TRANSFORMER_KEY = "xsltupdater.transformer";
private final Integer xsltCacheLifetimeSeconds;
public XsltXMLLoader(UpdateRequestProcessor processor, XMLInputFactory inputFactory, Integer xsltCacheLifetimeSeconds) {
super(processor, inputFactory);
this.xsltCacheLifetimeSeconds = xsltCacheLifetimeSeconds;
}
@Override
public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws Exception {
final DOMResult result = new DOMResult();
final Transformer t = getTransformer(req);
InputStream is = null;
XMLStreamReader parser = null;
// first step: read XML and build DOM using Transformer (this is no overhead, as XSL always produces
// an internal result DOM tree, we just access it directly as input for StAX):
try {
is = stream.getStream();
final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
final InputSource isrc = new InputSource(is);
isrc.setEncoding(charset);
final SAXSource source = new SAXSource(isrc);
t.transform(source, result);
} catch(TransformerException te) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, te.getMessage(), te);
} finally {
IOUtils.closeQuietly(is);
}
// second step feed the intermediate DOM tree into StAX parser:
try {
parser = inputFactory.createXMLStreamReader(new DOMSource(result.getNode()));
this.processUpdate(req, processor, parser);
} catch (XMLStreamException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e);
} finally {
if (parser != null) parser.close();
}
}
/** Get Transformer from request context, or from TransformerProvider.
* This allows either getContentType(...) or write(...) to instantiate the Transformer,
* depending on which one is called first, then the other one reuses the same Transformer
*/
protected Transformer getTransformer(SolrQueryRequest request) throws IOException {
final String xslt = request.getParams().get(TRANSFORM_PARAM,null);
if(xslt==null) {
throw new IOException("'" + TRANSFORM_PARAM + "' request parameter is required to use the XSLTResponseWriter");
}
// not the cleanest way to achieve this
SolrConfig solrConfig = request.getCore().getSolrConfig();
// no need to synchronize access to context, right?
// Nothing else happens with it at the same time
final Map<Object,Object> ctx = request.getContext();
Transformer result = (Transformer)ctx.get(CONTEXT_TRANSFORMER_KEY);
if(result==null) {
result = TransformerProvider.instance.getTransformer(solrConfig, xslt,xsltCacheLifetimeSeconds.intValue());
result.setErrorListener(XsltUpdateRequestHandler.xmllog);
ctx.put(CONTEXT_TRANSFORMER_KEY,result);
}
return result;
}
}

View File

@ -35,6 +35,7 @@ import javax.xml.transform.stream.StreamSource;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.XMLErrorLogger;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.xslt.TransformerProvider; import org.apache.solr.util.xslt.TransformerProvider;
@ -53,6 +54,7 @@ public class XSLTResponseWriter implements QueryResponseWriter {
private static final String XSLT_CACHE_PARAM = "xsltCacheLifetimeSeconds"; private static final String XSLT_CACHE_PARAM = "xsltCacheLifetimeSeconds";
private static final Logger log = LoggerFactory.getLogger(XSLTResponseWriter.class); private static final Logger log = LoggerFactory.getLogger(XSLTResponseWriter.class);
private static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
public void init(NamedList n) { public void init(NamedList n) {
final SolrParams p = SolrParams.toSolrParams(n); final SolrParams p = SolrParams.toSolrParams(n);
@ -126,6 +128,7 @@ public class XSLTResponseWriter implements QueryResponseWriter {
Transformer result = (Transformer)ctx.get(CONTEXT_TRANSFORMER_KEY); Transformer result = (Transformer)ctx.get(CONTEXT_TRANSFORMER_KEY);
if(result==null) { if(result==null) {
result = TransformerProvider.instance.getTransformer(solrConfig, xslt,xsltCacheLifetimeSeconds.intValue()); result = TransformerProvider.instance.getTransformer(solrConfig, xslt,xsltCacheLifetimeSeconds.intValue());
result.setErrorListener(xmllog);
ctx.put(CONTEXT_TRANSFORMER_KEY,result); ctx.put(CONTEXT_TRANSFORMER_KEY,result);
} }
return result; return result;

View File

@ -0,0 +1,49 @@
<?xml version='1.0' encoding='UTF-8'?>
<!--
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-->
<!--
XSL transform used to test the XSLTUpdateRequestHandler.
Transforms a test XML into standard Solr <add><doc/></add> format.
-->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="/">
<add>
<xsl:apply-templates select="/random/document"/>
</add>
</xsl:template>
<xsl:template match="document">
<doc boost="5.5">
<xsl:apply-templates select="*"/>
</doc>
</xsl:template>
<xsl:template match="node">
<field name="{@name}">
<xsl:if test="@enhance!=''">
<xsl:attribute name="boost"><xsl:value-of select="@enhance"/></xsl:attribute>
</xsl:if>
<xsl:value-of select="@value"/>
</field>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,93 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler;
import org.apache.solr.SolrTestCaseJ4;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.QueryResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
public class XsltUpdateRequestHandlerTest extends SolrTestCaseJ4 {
protected static XsltUpdateRequestHandler handler;
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig.xml","schema.xml");
handler = new XsltUpdateRequestHandler();
}
@Override
@Before
public void setUp() throws Exception {
super.setUp();
clearIndex();
assertU(commit());
}
@Test
public void testUpdate() throws Exception
{
String xml =
"<random>" +
" <document>" +
" <node name=\"id\" enhance=\"2.2\" value=\"12345\"/>" +
" <node name=\"name\" value=\"kitten\"/>" +
" <node name=\"text\" enhance=\"3\" value=\"some other day\"/>" +
" <node name=\"title\" enhance=\"4\" value=\"A story\"/>" +
" <node name=\"timestamp\" enhance=\"5\" value=\"2011-07-01T10:31:57.140Z\"/>" +
" </document>" +
"</random>";
Map<String,String> args = new HashMap<String, String>();
args.put("tr", "xsl-update-handler-test.xsl");
SolrCore core = h.getCore();
LocalSolrQueryRequest req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
ArrayList<ContentStream> streams = new ArrayList<ContentStream>();
streams.add(new ContentStreamBase.StringStream(xml));
req.setContentStreams(streams);
SolrQueryResponse rsp = new SolrQueryResponse();
XsltUpdateRequestHandler handler = new XsltUpdateRequestHandler();
handler.init(new NamedList<String>());
handler.handleRequestBody(req, rsp);
StringWriter sw = new StringWriter(32000);
QueryResponseWriter responseWriter = core.getQueryResponseWriter(req);
responseWriter.write(sw,req,rsp);
req.close();
String response = sw.toString();
assertU(response);
assertU(commit());
assertQ("test document was correctly committed", req("q","*:*")
, "//result[@numFound='1']"
, "//int[@name='id'][.='12345']"
);
}
}