HADOOP-14216. Improve Configuration XML Parsing Performance (jeagles)

This commit is contained in:
Jonathan Eagles 2017-03-29 10:12:02 -05:00
parent 84d787b9d5
commit 523f467d93
4 changed files with 263 additions and 130 deletions

View File

@ -313,6 +313,16 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>
<artifactId>stax2-api</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml</groupId>
<artifactId>aalto-xml</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>
<build>

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.conf;
import com.fasterxml.aalto.stax.InputFactoryImpl;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.google.common.annotations.VisibleForTesting;
@ -65,9 +66,11 @@
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
@ -93,14 +96,10 @@
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringInterner;
import org.apache.hadoop.util.StringUtils;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.codehaus.stax2.XMLInputFactory2;
import org.codehaus.stax2.XMLStreamReader2;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
@ -280,7 +279,13 @@ public String toString() {
* the key most recently
*/
private Map<String, String[]> updatingResource;
/**
* Specify exact input factory to avoid time finding correct one.
* Factory is reusable across un-synchronized threads once initialized
*/
private static final XMLInputFactory2 factory = new InputFactoryImpl();
/**
* Class to keep the information about the keys which replace the deprecated
* ones.
@ -689,7 +694,7 @@ private void handleDeprecation() {
addDefaultResource("hadoop-site.xml");
}
}
private Properties properties;
private Properties overlay;
private ClassLoader classLoader;
@ -2613,8 +2618,8 @@ public Map<String, String> getPropsWithPrefix(String confPrefix) {
return configMap;
}
private Document parse(DocumentBuilder builder, URL url)
throws IOException, SAXException {
private XMLStreamReader parse(URL url)
throws IOException, XMLStreamException {
if (!quietmode) {
if (LOG.isDebugEnabled()) {
LOG.debug("parsing URL " + url);
@ -2630,23 +2635,18 @@ private Document parse(DocumentBuilder builder, URL url)
// with other users.
connection.setUseCaches(false);
}
return parse(builder, connection.getInputStream(), url.toString());
return parse(connection.getInputStream(), url.toString());
}
private Document parse(DocumentBuilder builder, InputStream is,
String systemId) throws IOException, SAXException {
private XMLStreamReader parse(InputStream is,
String systemId) throws IOException, XMLStreamException {
if (!quietmode) {
LOG.debug("parsing input stream " + is);
}
if (is == null) {
return null;
}
try {
return (systemId == null) ? builder.parse(is) : builder.parse(is,
systemId);
} finally {
is.close();
}
return factory.createXMLStreamReader(systemId, is);
}
private void loadResources(Properties properties,
@ -2666,37 +2666,20 @@ private void loadResources(Properties properties,
}
}
private Resource loadResource(Properties properties, Resource wrapper, boolean quiet) {
private Resource loadResource(Properties properties,
Resource wrapper, boolean quiet) {
String name = UNKNOWN_RESOURCE;
try {
Object resource = wrapper.getResource();
name = wrapper.getName();
DocumentBuilderFactory docBuilderFactory
= DocumentBuilderFactory.newInstance();
//ignore all comments inside the xml file
docBuilderFactory.setIgnoringComments(true);
//allow includes in the xml file
docBuilderFactory.setNamespaceAware(true);
try {
docBuilderFactory.setXIncludeAware(true);
} catch (UnsupportedOperationException e) {
LOG.error("Failed to set setXIncludeAware(true) for parser "
+ docBuilderFactory
+ ":" + e,
e);
}
DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
Document doc = null;
Element root = null;
XMLStreamReader2 reader = null;
boolean returnCachedProperties = false;
if (resource instanceof URL) { // an URL resource
doc = parse(builder, (URL)resource);
reader = (XMLStreamReader2)parse((URL)resource);
} else if (resource instanceof String) { // a CLASSPATH resource
URL url = getResource((String)resource);
doc = parse(builder, url);
reader = (XMLStreamReader2)parse(url);
} else if (resource instanceof Path) { // a file resource
// Can't use FileSystem API or we get an infinite loop
// since FileSystem uses Configuration API. Use java.io.File instead.
@ -2706,104 +2689,188 @@ private Resource loadResource(Properties properties, Resource wrapper, boolean q
if (!quiet) {
LOG.debug("parsing File " + file);
}
doc = parse(builder, new BufferedInputStream(
reader = (XMLStreamReader2)parse(new BufferedInputStream(
new FileInputStream(file)), ((Path)resource).toString());
}
} else if (resource instanceof InputStream) {
doc = parse(builder, (InputStream) resource, null);
reader = (XMLStreamReader2)parse((InputStream)resource, null);
returnCachedProperties = true;
} else if (resource instanceof Properties) {
overlay(properties, (Properties)resource);
} else if (resource instanceof Element) {
root = (Element)resource;
}
if (root == null) {
if (doc == null) {
if (quiet) {
return null;
}
throw new RuntimeException(resource + " not found");
if (reader == null) {
if (quiet) {
return null;
}
root = doc.getDocumentElement();
throw new RuntimeException(resource + " not found");
}
Properties toAddTo = properties;
if(returnCachedProperties) {
toAddTo = new Properties();
}
if (!"configuration".equals(root.getTagName()))
LOG.fatal("bad conf file: top-level element not <configuration>");
NodeList props = root.getChildNodes();
DeprecationContext deprecations = deprecationContext.get();
for (int i = 0; i < props.getLength(); i++) {
Node propNode = props.item(i);
if (!(propNode instanceof Element))
continue;
Element prop = (Element)propNode;
if ("configuration".equals(prop.getTagName())) {
loadResource(toAddTo, new Resource(prop, name), quiet);
continue;
}
if (!"property".equals(prop.getTagName()))
LOG.warn("bad conf file: element not <property>");
String attr = null;
String value = null;
boolean finalParameter = false;
LinkedList<String> source = new LinkedList<String>();
StringBuilder token = new StringBuilder();
String confName = null;
String confValue = null;
boolean confFinal = false;
boolean fallbackAllowed = false;
boolean fallbackEntered = false;
boolean parseToken = false;
LinkedList<String> confSource = new LinkedList<String>();
Attr propAttr = prop.getAttributeNode("name");
if (propAttr != null)
attr = StringInterner.weakIntern(propAttr.getValue());
propAttr = prop.getAttributeNode("value");
if (propAttr != null)
value = StringInterner.weakIntern(propAttr.getValue());
propAttr = prop.getAttributeNode("final");
if (propAttr != null)
finalParameter = "true".equals(propAttr.getValue());
propAttr = prop.getAttributeNode("source");
if (propAttr != null)
source.add(StringInterner.weakIntern(propAttr.getValue()));
while (reader.hasNext()) {
switch (reader.next()) {
case XMLStreamConstants.START_ELEMENT:
switch (reader.getLocalName()) {
case "property":
confName = null;
confValue = null;
confFinal = false;
confSource.clear();
NodeList fields = prop.getChildNodes();
for (int j = 0; j < fields.getLength(); j++) {
Node fieldNode = fields.item(j);
if (!(fieldNode instanceof Element))
continue;
Element field = (Element)fieldNode;
if ("name".equals(field.getTagName()) && field.hasChildNodes())
attr = StringInterner.weakIntern(
((Text)field.getFirstChild()).getData().trim());
if ("value".equals(field.getTagName()) && field.hasChildNodes())
value = StringInterner.weakIntern(
((Text)field.getFirstChild()).getData());
if ("final".equals(field.getTagName()) && field.hasChildNodes())
finalParameter = "true".equals(((Text)field.getFirstChild()).getData());
if ("source".equals(field.getTagName()) && field.hasChildNodes())
source.add(StringInterner.weakIntern(
((Text)field.getFirstChild()).getData()));
}
source.add(name);
// Ignore this parameter if it has already been marked as 'final'
if (attr != null) {
if (deprecations.getDeprecatedKeyMap().containsKey(attr)) {
DeprecatedKeyInfo keyInfo =
deprecations.getDeprecatedKeyMap().get(attr);
keyInfo.clearAccessed();
for (String key:keyInfo.newKeys) {
// update new keys with deprecated key's value
loadProperty(toAddTo, name, key, value, finalParameter,
source.toArray(new String[source.size()]));
// First test for short format configuration
int attrCount = reader.getAttributeCount();
for (int i = 0; i < attrCount; i++) {
String propertyAttr = reader.getAttributeLocalName(i);
if ("name".equals(propertyAttr)) {
confName = StringInterner.weakIntern(
reader.getAttributeValue(i));
} else if ("value".equals(propertyAttr)) {
confValue = StringInterner.weakIntern(
reader.getAttributeValue(i));
} else if ("final".equals(propertyAttr)) {
confFinal = "true".equals(reader.getAttributeValue(i));
} else if ("source".equals(propertyAttr)) {
confSource.add(StringInterner.weakIntern(
reader.getAttributeValue(i)));
}
}
break;
case "name":
case "value":
case "final":
case "source":
parseToken = true;
token.setLength(0);
break;
case "include":
if (!"xi".equals(reader.getPrefix())) {
break;
}
// Determine href for xi:include
String confInclude = null;
attrCount = reader.getAttributeCount();
for (int i = 0; i < attrCount; i++) {
String attrName = reader.getAttributeLocalName(i);
if ("href".equals(attrName)) {
confInclude = reader.getAttributeValue(i);
}
}
if (confInclude == null) {
break;
}
// Determine if the included resource is a classpath resource
// otherwise fallback to a file resource
// xi:include are treated as inline and retain current source
URL include = getResource(confInclude);
if (include != null) {
Resource classpathResource = new Resource(include, name);
loadResource(properties, classpathResource, quiet);
} else {
File href = new File(confInclude);
if (!href.isAbsolute()) {
// Included resources are relative to the current resource
File baseFile = new File(name).getParentFile();
href = new File(baseFile, href.getPath());
}
if (!href.exists()) {
// Resource errors are non-fatal iff there is 1 xi:fallback
fallbackAllowed = true;
break;
}
Resource uriResource = new Resource(href.toURI().toURL(), name);
loadResource(properties, uriResource, quiet);
}
break;
case "fallback":
if (!"xi".equals(reader.getPrefix())) {
break;
}
fallbackEntered = true;
break;
case "configuration":
break;
default:
break;
}
else {
loadProperty(toAddTo, name, attr, value, finalParameter,
source.toArray(new String[source.size()]));
break;
case XMLStreamConstants.CHARACTERS:
if (parseToken) {
char[] text = reader.getTextCharacters();
token.append(text, reader.getTextStart(), reader.getTextLength());
}
break;
case XMLStreamConstants.END_ELEMENT:
switch (reader.getLocalName()) {
case "name":
if (token.length() > 0) {
confName = StringInterner.weakIntern(token.toString().trim());
}
break;
case "value":
if (token.length() > 0) {
confValue = StringInterner.weakIntern(token.toString());
}
break;
case "final":
confFinal = "true".equals(token.toString());
break;
case "source":
confSource.add(StringInterner.weakIntern(token.toString()));
break;
case "include":
if (!"xi".equals(reader.getPrefix())) {
break;
}
if (fallbackAllowed && !fallbackEntered) {
throw new IOException("Fetch fail on include with no "
+ "fallback while loading '" + name + "'");
}
fallbackAllowed = false;
fallbackEntered = false;
break;
case "property":
if (confName == null || (!fallbackAllowed && fallbackEntered)) {
break;
}
confSource.add(name);
DeprecatedKeyInfo keyInfo =
deprecations.getDeprecatedKeyMap().get(confName);
if (keyInfo != null) {
keyInfo.clearAccessed();
for (String key : keyInfo.newKeys) {
// update new keys with deprecated key's value
loadProperty(toAddTo, name, key, confValue, confFinal,
confSource.toArray(new String[confSource.size()]));
}
} else {
loadProperty(toAddTo, name, confName, confValue, confFinal,
confSource.toArray(new String[confSource.size()]));
}
break;
default:
break;
}
default:
break;
}
}
reader.close();
if (returnCachedProperties) {
overlay(properties, toAddTo);
return new Resource(toAddTo, name);
@ -2812,15 +2879,9 @@ private Resource loadResource(Properties properties, Resource wrapper, boolean q
} catch (IOException e) {
LOG.fatal("error parsing conf " + name, e);
throw new RuntimeException(e);
} catch (DOMException e) {
} catch (XMLStreamException e) {
LOG.fatal("error parsing conf " + name, e);
throw new RuntimeException(e);
} catch (SAXException e) {
LOG.fatal("error parsing conf " + name, e);
throw new RuntimeException(e);
} catch (ParserConfigurationException e) {
LOG.fatal("error parsing conf " + name , e);
throw new RuntimeException(e);
}
}

View File

@ -99,10 +99,22 @@ private void endConfig() throws IOException{
out.close();
}
private void addInclude(String filename) throws IOException{
out.write("<xi:include href=\"" + filename + "\" xmlns:xi=\"http://www.w3.org/2001/XInclude\" />\n ");
private void startInclude(String filename) throws IOException {
out.write("<xi:include href=\"" + filename + "\" xmlns:xi=\"http://www.w3.org/2001/XInclude\" >\n ");
}
private void endInclude() throws IOException{
out.write("</xi:include>\n ");
}
private void startFallback() throws IOException {
out.write("<xi:fallback>\n ");
}
private void endFallback() throws IOException {
out.write("</xi:fallback>\n ");
}
public void testInputStreamResource() throws Exception {
StringWriter writer = new StringWriter();
out = new BufferedWriter(writer);
@ -507,7 +519,8 @@ public void testIncludes() throws Exception {
out=new BufferedWriter(new FileWriter(CONFIG));
startConfig();
addInclude(CONFIG2);
startInclude(CONFIG2);
endInclude();
appendProperty("e","f");
appendProperty("g","h");
endConfig();
@ -522,6 +535,44 @@ public void testIncludes() throws Exception {
tearDown();
}
public void testIncludesWithFallback() throws Exception {
tearDown();
out=new BufferedWriter(new FileWriter(CONFIG2));
startConfig();
appendProperty("a","b");
appendProperty("c","d");
endConfig();
out=new BufferedWriter(new FileWriter(CONFIG));
startConfig();
startInclude(CONFIG2);
startFallback();
appendProperty("a", "b.fallback");
appendProperty("c", "d.fallback", true);
endFallback();
endInclude();
appendProperty("e","f");
appendProperty("g","h");
startInclude("MissingConfig.xml");
startFallback();
appendProperty("i", "j.fallback");
appendProperty("k", "l.fallback", true);
endFallback();
endInclude();
endConfig();
// verify that the includes file contains all properties
Path fileResource = new Path(CONFIG);
conf.addResource(fileResource);
assertEquals("b", conf.get("a"));
assertEquals("d", conf.get("c"));
assertEquals("f", conf.get("e"));
assertEquals("h", conf.get("g"));
assertEquals("j.fallback", conf.get("i"));
assertEquals("l.fallback", conf.get("k"));
tearDown();
}
public void testRelativeIncludes() throws Exception {
tearDown();
String relConfig = new File("./tmp/test-config.xml").getAbsolutePath();
@ -536,7 +587,8 @@ public void testRelativeIncludes() throws Exception {
out = new BufferedWriter(new FileWriter(relConfig));
startConfig();
// Add the relative path instead of the absolute one.
addInclude(new File(relConfig2).getName());
startInclude(new File(relConfig2).getName());
endInclude();
appendProperty("c", "d");
endConfig();

View File

@ -869,6 +869,16 @@
<artifactId>core</artifactId>
<version>3.1.1</version>
</dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>
<artifactId>stax2-api</artifactId>
<version>3.1.4</version>
</dependency>
<dependency>
<groupId>com.fasterxml</groupId>
<artifactId>aalto-xml</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>