This commit is contained in:
lmds1 2014-09-25 11:21:07 -04:00
commit 86b5e4acf9
5 changed files with 249 additions and 49 deletions

View File

@ -79,7 +79,7 @@ public class ValidatorExamples {
}
// If we make it here with no exception, all the files validated!
//START SNIPPET: validateFiles
//END SNIPPET: validateFiles
}
}

View File

@ -59,6 +59,10 @@
Add phloc-commons dependency explicitly, which resolves an issue building HAPI from source on
some platforms. Thanks to Odysseas Pentakalos for the patch!
</action>
<action type="add">
HAPI now logs a single line indicating the StAX implementation being used upon the
first time an XML parser is created.
</action>
</release>
<release version="0.6" date="2014-Sep-08" description="This release brings a number of new features and bug fixes!">
<!--

View File

@ -21,14 +21,19 @@ package ca.uhn.fhir.util;
*/
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.jar.Attributes;
import java.util.jar.Manifest;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLEventReader;
@ -39,23 +44,37 @@ import javax.xml.stream.XMLResolver;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.codehaus.stax2.XMLOutputFactory2;
import org.codehaus.stax2.io.EscapingWriterFactory;
import com.ctc.wstx.api.WstxInputProperties;
import com.ctc.wstx.stax.WstxInputFactory;
import com.ctc.wstx.stax.WstxOutputFactory;
/**
* Utility methods for working with the StAX API.
*
* This class contains code adapted from the Apache Axiom project.
*/
public class XmlUtil {
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(XmlUtil.class);
private static volatile XMLOutputFactory ourOutputFactory;
private static volatile XMLInputFactory ourInputFactory;
private static volatile boolean ourHaveLoggedStaxImplementation;
private static final Map<String, Integer> VALID_ENTITY_NAMES;
private static final ExtendedEntityReplacingXmlResolver XML_RESOLVER = new ExtendedEntityReplacingXmlResolver();
public static void main(String[] args) {
private static final Attributes.Name IMPLEMENTATION_TITLE = new Attributes.Name("Implementation-Title");
System.out.println(Character.toString((char)167));
private static final Attributes.Name IMPLEMENTATION_VENDOR = new Attributes.Name("Implementation-Vendor");
}
private static final Attributes.Name IMPLEMENTATION_VERSION = new Attributes.Name("Implementation-Version");
private static final Attributes.Name BUNDLE_SYMBOLIC_NAME = new Attributes.Name("Bundle-SymbolicName");
private static final Attributes.Name BUNDLE_VENDOR = new Attributes.Name("Bundle-Vendor");
private static final Attributes.Name BUNDLE_VERSION = new Attributes.Name("Bundle-Version");
static {
HashMap<String, Integer> validEntityNames = new HashMap<String, Integer>();
@ -70,7 +89,8 @@ public class XmlUtil {
validEntityNames.put("uml", 168); // diaeresis = spacing diaeresis, U+00A8 ISOdia -->
validEntityNames.put("copy", 169); // copyright sign, U+00A9 ISOnum -->
validEntityNames.put("ordf", 170); // feminine ordinal indicator, U+00AA ISOnum -->
validEntityNames.put("laquo", 171); // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum -->
validEntityNames.put("laquo", 171); // left-pointing double angle quotation mark = left pointing guillemet,
// U+00AB ISOnum -->
validEntityNames.put("not", 172); // not sign = angled dash, U+00AC ISOnum -->
validEntityNames.put("shy", 173); // soft hyphen = discretionary hyphen, U+00AD ISOnum -->
validEntityNames.put("reg", 174); // registered sign = registered trade mark sign, U+00AE ISOnum -->
@ -86,17 +106,21 @@ public class XmlUtil {
validEntityNames.put("cedil", 184); // cedilla = spacing cedilla, U+00B8 ISOdia -->
validEntityNames.put("sup1", 185); // superscript one = superscript digit one, U+00B9 ISOnum -->
validEntityNames.put("ordm", 186); // masculine ordinal indicator, U+00BA ISOnum -->
validEntityNames.put("raquo", 187); // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum -->
validEntityNames.put("raquo", 187); // right-pointing double angle quotation mark = right pointing guillemet,
// U+00BB ISOnum -->
validEntityNames.put("frac14", 188); // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum -->
validEntityNames.put("frac12", 189); // vulgar fraction one half = fraction one half, U+00BD ISOnum -->
validEntityNames.put("frac34", 190); // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum -->
validEntityNames.put("frac34", 190); // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
// -->
validEntityNames.put("iquest", 191); // inverted question mark = turned question mark, U+00BF ISOnum -->
validEntityNames.put("Agrave", 192); // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 -->
validEntityNames.put("Agrave", 192); // latin capital letter A with grave = latin capital letter A grave, U+00C0
// ISOlat1 -->
validEntityNames.put("Aacute", 193); // latin capital letter A with acute, U+00C1 ISOlat1 -->
validEntityNames.put("Acirc", 194); // latin capital letter A with circumflex, U+00C2 ISOlat1 -->
validEntityNames.put("Atilde", 195); // latin capital letter A with tilde, U+00C3 ISOlat1 -->
validEntityNames.put("Auml", 196); // latin capital letter A with diaeresis, U+00C4 ISOlat1 -->
validEntityNames.put("Aring", 197); // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 -->
validEntityNames.put("Aring", 197); // latin capital letter A with ring above = latin capital letter A ring,
// U+00C5 ISOlat1 -->
validEntityNames.put("AElig", 198); // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 -->
validEntityNames.put("Ccedil", 199); // latin capital letter C with cedilla, U+00C7 ISOlat1 -->
validEntityNames.put("Egrave", 200); // latin capital letter E with grave, U+00C8 ISOlat1 -->
@ -115,7 +139,8 @@ public class XmlUtil {
validEntityNames.put("Otilde", 213); // latin capital letter O with tilde, U+00D5 ISOlat1 -->
validEntityNames.put("Ouml", 214); // latin capital letter O with diaeresis, U+00D6 ISOlat1 -->
validEntityNames.put("times", 215); // multiplication sign, U+00D7 ISOnum -->
validEntityNames.put("Oslash", 216); // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 -->
validEntityNames.put("Oslash", 216); // latin capital letter O with stroke = latin capital letter O slash,
// U+00D8 ISOlat1 -->
validEntityNames.put("Ugrave", 217); // latin capital letter U with grave, U+00D9 ISOlat1 -->
validEntityNames.put("Uacute", 218); // latin capital letter U with acute, U+00DA ISOlat1 -->
validEntityNames.put("Ucirc", 219); // latin capital letter U with circumflex, U+00DB ISOlat1 -->
@ -123,12 +148,14 @@ public class XmlUtil {
validEntityNames.put("Yacute", 221); // latin capital letter Y with acute, U+00DD ISOlat1 -->
validEntityNames.put("THORN", 222); // latin capital letter THORN, U+00DE ISOlat1 -->
validEntityNames.put("szlig", 223); // latin small letter sharp s = ess-zed, U+00DF ISOlat1 -->
validEntityNames.put("agrave", 224); // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 -->
validEntityNames.put("agrave", 224); // latin small letter a with grave = latin small letter a grave, U+00E0
// ISOlat1 -->
validEntityNames.put("aacute", 225); // latin small letter a with acute, U+00E1 ISOlat1 -->
validEntityNames.put("acirc", 226); // latin small letter a with circumflex, U+00E2 ISOlat1 -->
validEntityNames.put("atilde", 227); // latin small letter a with tilde, U+00E3 ISOlat1 -->
validEntityNames.put("auml", 228); // latin small letter a with diaeresis, U+00E4 ISOlat1 -->
validEntityNames.put("aring", 229); // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 -->
validEntityNames.put("aring", 229); // latin small letter a with ring above = latin small letter a ring, U+00E5
// ISOlat1 -->
validEntityNames.put("aelig", 230); // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 -->
validEntityNames.put("ccedil", 231); // latin small letter c with cedilla, U+00E7 ISOlat1 -->
validEntityNames.put("egrave", 232); // latin small letter e with grave, U+00E8 ISOlat1 -->
@ -147,7 +174,8 @@ public class XmlUtil {
validEntityNames.put("otilde", 245); // latin small letter o with tilde, U+00F5 ISOlat1 -->
validEntityNames.put("ouml", 246); // latin small letter o with diaeresis, U+00F6 ISOlat1 -->
validEntityNames.put("divide", 247); // division sign, U+00F7 ISOnum -->
validEntityNames.put("oslash", 248); // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 -->
validEntityNames.put("oslash", 248); // latin small letter o with stroke, = latin small letter o slash, U+00F8
// ISOlat1 -->
validEntityNames.put("ugrave", 249); // latin small letter u with grave, U+00F9 ISOlat1 -->
validEntityNames.put("uacute", 250); // latin small letter u with acute, U+00FA ISOlat1 -->
validEntityNames.put("ucirc", 251); // latin small letter u with circumflex, U+00FB ISOlat1 -->
@ -160,47 +188,143 @@ public class XmlUtil {
}
public static XMLEventReader createXmlReader(Reader reader) throws FactoryConfigurationError, XMLStreamException {
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLResolver xmlResolver = new XMLResolver() {
@Override
public Object resolveEntity(String thePublicID, String theSystemID, String theBaseURI, String theNamespace) throws XMLStreamException {
if (thePublicID == null && theSystemID == null) {
if (theNamespace != null && VALID_ENTITY_NAMES.containsKey(theNamespace)) {
return new String(Character.toChars(VALID_ENTITY_NAMES.get(theNamespace)));
}
}
return null;
}
};
// In the following two lines, you can uncomment the first and comment the second to disable
// automatic parsing of extended entities, e.g. &sect;
// inputFactory.setProperty(WstxInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
inputFactory.setProperty(WstxInputProperties.P_UNDECLARED_ENTITY_RESOLVER, xmlResolver);
XMLInputFactory inputFactory = getOrCreateInputFactory();
// Now.. create the reader and return it
XMLEventReader er = inputFactory.createXMLEventReader(reader);
return er;
}
public static XMLEventWriter createXmlWriter(Writer theWriter) throws FactoryConfigurationError, XMLStreamException {
XMLOutputFactory newInstance = XMLOutputFactory.newInstance();
private static XMLInputFactory getOrCreateInputFactory() throws FactoryConfigurationError {
if (ourInputFactory == null) {
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
newInstance.setProperty(XMLOutputFactory2.P_TEXT_ESCAPER, new MyEscaper());
if (!ourHaveLoggedStaxImplementation) {
logStaxImplementation(inputFactory.getClass());
}
XMLEventWriter ew = newInstance.createXMLEventWriter(theWriter);
return ew;
/*
* In the following few lines, you can uncomment the first and comment the second to disable automatic
* parsing of extended entities, e.g. &sect;
*
* Note that these properties are Woodstox specific and they cause a crash in environments where SJSXP is
* being used (e.g. glassfish) so we don't set them there.
*/
if (inputFactory instanceof com.ctc.wstx.stax.WstxInputFactory) {
// inputFactory.setProperty(WstxInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
inputFactory.setProperty(WstxInputProperties.P_UNDECLARED_ENTITY_RESOLVER, XML_RESOLVER);
}
ourInputFactory = inputFactory;
}
return ourInputFactory;
}
private static void logStaxImplementation(Class<?> theClass) {
try {
URL rootUrl = getRootUrlForClass(theClass);
if (rootUrl == null) {
ourLog.info("Unable to determine location of StAX implementation containing class");
} else {
Manifest manifest;
URL metaInfUrl = new URL(rootUrl, "META-INF/MANIFEST.MF");
InputStream is = metaInfUrl.openStream();
try {
manifest = new Manifest(is);
} finally {
is.close();
}
Attributes attrs = manifest.getMainAttributes();
String title = attrs.getValue(IMPLEMENTATION_TITLE);
String symbolicName = attrs.getValue(BUNDLE_SYMBOLIC_NAME);
if (symbolicName != null) {
int i = symbolicName.indexOf(';');
if (i != -1) {
symbolicName = symbolicName.substring(0, i);
}
}
String vendor = attrs.getValue(IMPLEMENTATION_VENDOR);
if (vendor == null) {
vendor = attrs.getValue(BUNDLE_VENDOR);
}
String version = attrs.getValue(IMPLEMENTATION_VERSION);
if (version == null) {
version = attrs.getValue(BUNDLE_VERSION);
}
if (ourLog.isDebugEnabled()) {
ourLog.debug("FHIR XML procesing will use StAX implementation at {}\n Title: {}\n Symbolic name: {}\n Vendor: {}\n Version: {}", rootUrl, title, symbolicName, vendor, version);
} else {
ourLog.info("FHIR XML procesing will use StAX implementation '{}' version '{}'", title, version);
}
}
} catch (Throwable e) {
ourLog.info("Unable to determine StAX implementation: " + e.getMessage());
} finally {
ourHaveLoggedStaxImplementation = true;
}
}
private static URL getRootUrlForClass(Class<?> cls) {
ClassLoader classLoader = cls.getClassLoader();
String resource = cls.getName().replace('.', '/') + ".class";
if (classLoader == null) {
// A null class loader means the bootstrap class loader. In this case we use the
// system class loader. This is safe since we can assume that the system class
// loader uses parent first as delegation policy.
classLoader = ClassLoader.getSystemClassLoader();
}
URL url = classLoader.getResource(resource);
if (url == null) {
return null;
}
String file = url.getFile();
if (file.endsWith(resource)) {
try {
return new URL(url.getProtocol(), url.getHost(), url.getPort(), file.substring(0, file.length() - resource.length()));
} catch (MalformedURLException ex) {
return null;
}
} else {
return null;
}
}
public static XMLStreamWriter createXmlStreamWriter(Writer theWriter) throws FactoryConfigurationError, XMLStreamException {
XMLOutputFactory newInstance = XMLOutputFactory.newInstance();
newInstance.setProperty(XMLOutputFactory2.P_TEXT_ESCAPER, new MyEscaper());
XMLStreamWriter ew = newInstance.createXMLStreamWriter(theWriter);
return ew;
XMLOutputFactory outputFactory = getOrCreateOutputFactory();
XMLStreamWriter retVal = outputFactory.createXMLStreamWriter(theWriter);
return retVal;
}
public static XMLEventWriter createXmlWriter(Writer theWriter) throws FactoryConfigurationError, XMLStreamException {
XMLOutputFactory outputFactory = getOrCreateOutputFactory();
XMLEventWriter retVal = outputFactory.createXMLEventWriter(theWriter);
return retVal;
}
public static void main(String[] args) {
System.out.println(Character.toString((char) 167));
}
private static XMLOutputFactory getOrCreateOutputFactory() throws FactoryConfigurationError {
if (ourOutputFactory == null) {
XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
if (!ourHaveLoggedStaxImplementation) {
logStaxImplementation(outputFactory.getClass());
}
/*
* Note that these properties are Woodstox specific and they cause a crash in environments where SJSXP is
* being used (e.g. glassfish) so we don't set them there.
*/
if (outputFactory instanceof WstxOutputFactory) {
outputFactory.setProperty(XMLOutputFactory2.P_TEXT_ESCAPER, new MyEscaper());
}
ourOutputFactory = outputFactory;
}
return ourOutputFactory;
}
public static class MyEscaper implements EscapingWriterFactory {
@Override
@ -249,4 +373,17 @@ public class XmlUtil {
}
private static final class ExtendedEntityReplacingXmlResolver implements XMLResolver {
@Override
public Object resolveEntity(String thePublicID, String theSystemID, String theBaseURI, String theNamespace) throws XMLStreamException {
if (thePublicID == null && theSystemID == null) {
if (theNamespace != null && VALID_ENTITY_NAMES.containsKey(theNamespace)) {
return new String(Character.toChars(VALID_ENTITY_NAMES.get(theNamespace)));
}
}
return null;
}
}
}

View File

@ -54,6 +54,39 @@
</section>
<section name="Dependencies">
<p>
The HAPI-FHIR library depends on other libraries to provide specific functionality.
Some of those libraries are listed here:
</p>
<subsection name="StAX / Woodstox">
<p>
XML processing (for resource marshalling and unmarshalling) uses the
Java StAX API, which is a fast and efficient API for XML processing.
HAPI bundles (for release archives) and depends on (for Maven builds)
the <a href="http://woodstox.codehaus.org/">Woodstox</a> library, which
is a good implementation of StAX.
</p>
<p>
Upon starting up, HAPI will emit a log line indicating which StAX implementation
is being used, e.g:
</p>
<source>08:01:32.044 [main] INFO ca.uhn.fhir.util.XmlUtil - FHIR XML procesing will use StAX implementation 'Woodstox XML-processor' version '4.4.0'</source>
<p>
If a different implementation is being used, you may want to consider using
Woodstox instead by setting the following system properties:
</p>
<source>System.setProperty("javax.xml.stream.XMLInputFactory", "com.ctc.wstx.stax.WstxInputFactory");
System.setProperty("javax.xml.stream.XMLOutputFactory", "com.ctc.wstx.stax.WstxOutputFactory");
System.setProperty("javax.xml.stream.XMLEventFactory", "com.ctc.wstx.stax.WstxEventFactory");</source>
</subsection>
</section>
</body>
</document>

View File

@ -0,0 +1,26 @@
package ca.uhn.fhir.util;
import java.io.StringReader;
import java.io.StringWriter;
import org.junit.Test;
public class XmlUtilTest {
@Test
public void testCreateReader() throws Exception {
XmlUtil.createXmlReader(new StringReader("<a/>"));
}
@Test
public void testCreateWriter() throws Exception {
XmlUtil.createXmlWriter(new StringWriter());
}
@Test
public void testCreateStreamWriter() throws Exception {
XmlUtil.createXmlStreamWriter(new StringWriter());
}
}